I am currently running this on Python Kernel 3.11.3

In [10]:
#Import packages
from astroquery.gaia import Gaia
from astropy.coordinates import SkyCoord
import astropy.units as u
import pandas as pd
import numpy as np

In [11]:
cluster_df = pd.read_csv('cluster_members.txt', delim_whitespace = True, na_values = '---')
cluster_df.shape

(401448, 26)

In [12]:
cluster_labels = cluster_df[['Source', 'PMemb', 'Cluster']]
cluster_labels.rename(columns = {'Source': 'source_id'}, inplace = True)
cluster_labels.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cluster_labels.rename(columns = {'Source': 'source_id'}, inplace = True)


Unnamed: 0,source_id,PMemb,Cluster
0,4190669036038417152,0.7,Alessi_10
1,4190657903483144960,1.0,Alessi_10
2,4190723255705770240,0.8,Alessi_10
3,4190732120518267776,1.0,Alessi_10
4,4190774206899418880,1.0,Alessi_10


In [13]:
cluster_df['Cluster'].value_counts().head(10)

NGC_7789         3646
NGC_6259         3554
Trumpler_5       3330
Trumpler_20      3163
NGC_6705         2743
NGC_6067         2541
NGC_2437         2195
NGC_2477         2195
Collinder_261    2043
NGC_6871         2014
Name: Cluster, dtype: int64

In [14]:
dr3_cols = ['source_id', 'ra', 'dec', 'parallax', 'pmra', 'pmdec', 'phot_g_mean_mag', 'phot_bp_mean_mag', 'phot_rp_mean_mag', 'bp_rp', 'radial_velocity', 'teff_gspphot']
dr2_cols = ['source_id', 'ra', 'dec', 'parallax', 'pmra', 'pmdec', 'phot_g_mean_mag', 'phot_bp_mean_mag', 'phot_rp_mean_mag', 'bp_rp', 'radial_velocity', 'teff_val']

## teff_val is not available in gaia dr3 data, only available in gaia dr2

***

**NGC 7789 Star Cluster**

In [15]:
desired_cluster = 'NGC_7789'
ngc_7789 = cluster_labels[cluster_labels['Cluster'] == desired_cluster]
ngc_7789

Unnamed: 0,source_id,PMemb,Cluster
312137,420901507912889088,0.1,NGC_7789
312138,420933909146938368,0.3,NGC_7789
312139,420935798932648064,0.4,NGC_7789
312140,420909990465240320,0.1,NGC_7789
312141,420925250493059840,0.6,NGC_7789
...,...,...,...
315778,1994792136095009152,1.0,NGC_7789
315779,1994806395386313472,1.0,NGC_7789
315780,1994807907214801280,0.6,NGC_7789
315781,1994791655058682368,1.0,NGC_7789


In [16]:
ngc_7789['PMemb'].value_counts()

1.0    2097
0.9     450
0.1     243
0.8     204
0.7     146
0.2     136
0.6     122
0.3      87
0.4      81
0.5      80
Name: PMemb, dtype: int64

In [17]:
ngc_7789_sources = ngc_7789["source_id"]
ngc_7789_sources.shape

(3646,)

In [18]:
# Construct the ADQL query
adql_query = f"SELECT * FROM gaiadr3.gaia_source WHERE source_id IN ({','.join(map(str, ngc_7789_sources))})"

# Query the Gaia Archive
results = Gaia.launch_job(adql_query).get_results()

# Convert to Pandas DataFrame
ngc_7789_dr3 = results.to_pandas()
ngc_7789_dr3 = ngc_7789_dr3[dr3_cols]
ngc_7789_dr3.shape

(2000, 12)

In [19]:
ngc_7789_dr3 = pd.merge(ngc_7789_dr3, cluster_labels, on = 'source_id', how = 'inner')
ngc_7789_dr3.shape

(2000, 14)

In [20]:
ngc_7789_dr3['PMemb'].value_counts()

1.0    1165
0.9     247
0.1     147
0.8     107
0.2      79
0.7      76
0.6      50
0.4      47
0.3      42
0.5      40
Name: PMemb, dtype: int64

In [21]:
ngc_7789_dr3.head()

Unnamed: 0,source_id,ra,dec,parallax,pmra,pmdec,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag,bp_rp,radial_velocity,teff_gspphot,PMemb,Cluster
0,420901507912889088,0.082375,56.014056,0.320644,-0.386428,-1.528307,16.820808,17.2866,16.18437,1.10223,,5409.381836,0.1,NGC_7789
1,420906936751548288,0.23708,56.055887,0.597838,-1.651632,-2.148538,17.591908,18.355732,16.755722,1.60001,,4343.029785,0.1,NGC_7789
2,420907142909967104,0.149607,56.061129,0.211846,-1.312115,-2.421624,15.102909,15.690556,14.344952,1.345604,,5978.277344,0.3,NGC_7789
3,420907722722130816,0.003523,56.075048,0.342945,-1.708469,-2.459792,16.090326,16.448742,15.559808,0.888934,,6960.671387,0.2,NGC_7789
4,420908414210912512,0.0133,56.152271,0.56082,-1.24588,-2.100259,16.771912,,,,,,0.3,NGC_7789


In [22]:
## Creating final NGC 7789 dataset ready for use
ngc_7789_dr3.loc[ngc_7789_dr3['PMemb'] != 1.0, 'Cluster'] = 'Other'
ngc_7789_dr3 = ngc_7789_dr3.drop('PMemb', axis = 1)
ngc_7789_dr3.head()

Unnamed: 0,source_id,ra,dec,parallax,pmra,pmdec,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag,bp_rp,radial_velocity,teff_gspphot,Cluster
0,420901507912889088,0.082375,56.014056,0.320644,-0.386428,-1.528307,16.820808,17.2866,16.18437,1.10223,,5409.381836,Other
1,420906936751548288,0.23708,56.055887,0.597838,-1.651632,-2.148538,17.591908,18.355732,16.755722,1.60001,,4343.029785,Other
2,420907142909967104,0.149607,56.061129,0.211846,-1.312115,-2.421624,15.102909,15.690556,14.344952,1.345604,,5978.277344,Other
3,420907722722130816,0.003523,56.075048,0.342945,-1.708469,-2.459792,16.090326,16.448742,15.559808,0.888934,,6960.671387,Other
4,420908414210912512,0.0133,56.152271,0.56082,-1.24588,-2.100259,16.771912,,,,,,Other


In [23]:
ngc_7789_dr3['Cluster'].value_counts()

NGC_7789    1165
Other        835
Name: Cluster, dtype: int64

***

**Trumpler 5 Star Cluster**

In [24]:
desired_cluster = 'Trumpler_5'
trumpler_5 = cluster_labels[cluster_labels['Cluster'] == desired_cluster]
trumpler_5

Unnamed: 0,source_id,PMemb,Cluster
395195,3326189391771063424,0.4,Trumpler_5
395196,3326189662353462400,0.7,Trumpler_5
395197,3326189421835294080,0.2,Trumpler_5
395198,3326188940798945920,0.4,Trumpler_5
395199,3326188665921045120,0.3,Trumpler_5
...,...,...,...
398520,3326872184197185024,0.3,Trumpler_5
398521,3326866686638968192,1.0,Trumpler_5
398522,3326860639324729088,0.6,Trumpler_5
398523,3326885245192654848,0.5,Trumpler_5


In [25]:
trumpler_5['PMemb'].value_counts()

1.0    836
0.9    554
0.8    378
0.7    283
0.1    261
0.6    216
0.5    210
0.2    208
0.3    202
0.4    182
Name: PMemb, dtype: int64

In [26]:
trumpler_5_sources = trumpler_5["source_id"]
trumpler_5_sources.shape

(3330,)

In [27]:
# Construct the ADQL query
adql_query = f"SELECT * FROM gaiadr3.gaia_source WHERE source_id IN ({','.join(map(str, trumpler_5_sources))})"

# Query the Gaia Archive
results = Gaia.launch_job(adql_query).get_results()

# Convert to Pandas DataFrame
trumpler_5_dr3 = results.to_pandas()
trumpler_5_dr3 = trumpler_5_dr3[dr3_cols]
trumpler_5_dr3.shape

(2000, 12)

In [28]:
trumpler_5_dr3 = pd.merge(trumpler_5_dr3, cluster_labels, on = 'source_id', how = 'inner')
trumpler_5_dr3.shape

(2000, 14)

In [29]:
trumpler_5_dr3['PMemb'].value_counts()

1.0    547
0.9    354
0.8    230
0.7    175
0.1    141
0.6    126
0.5    118
0.2    112
0.3     99
0.4     98
Name: PMemb, dtype: int64

In [30]:
trumpler_5_dr3.head()

Unnamed: 0,source_id,ra,dec,parallax,pmra,pmdec,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag,bp_rp,radial_velocity,teff_gspphot,PMemb,Cluster
0,3326188665921045120,99.180263,8.93809,0.117289,-0.767038,0.202383,17.720343,18.314255,16.970951,1.343304,,5201.759766,0.3,Trumpler_5
1,3326188940798945920,99.123816,8.924983,-0.052569,0.351594,-0.838942,17.792179,18.782574,16.812962,1.969612,,4615.896973,0.4,Trumpler_5
2,3326189391771063424,99.161032,8.937259,0.354193,-0.229926,-0.055456,16.855852,17.416264,16.147139,1.269125,,5466.880371,0.4,Trumpler_5
3,3326189421835294080,99.175908,8.955257,0.423364,0.278428,-0.069258,17.581598,18.265062,16.802801,1.462261,,5164.189941,0.2,Trumpler_5
4,3326189593633986432,99.104969,8.958145,0.333585,-0.597968,-0.29504,17.811178,18.439911,17.031984,1.407927,,4466.015625,0.1,Trumpler_5


In [31]:
## Creating final NGC 7789 dataset ready for use
trumpler_5_dr3.loc[trumpler_5_dr3['PMemb'] != 1.0, 'Cluster'] = 'Other'
trumpler_5_dr3 = trumpler_5_dr3.drop('PMemb', axis = 1)
trumpler_5_dr3.head()

Unnamed: 0,source_id,ra,dec,parallax,pmra,pmdec,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag,bp_rp,radial_velocity,teff_gspphot,Cluster
0,3326188665921045120,99.180263,8.93809,0.117289,-0.767038,0.202383,17.720343,18.314255,16.970951,1.343304,,5201.759766,Other
1,3326188940798945920,99.123816,8.924983,-0.052569,0.351594,-0.838942,17.792179,18.782574,16.812962,1.969612,,4615.896973,Other
2,3326189391771063424,99.161032,8.937259,0.354193,-0.229926,-0.055456,16.855852,17.416264,16.147139,1.269125,,5466.880371,Other
3,3326189421835294080,99.175908,8.955257,0.423364,0.278428,-0.069258,17.581598,18.265062,16.802801,1.462261,,5164.189941,Other
4,3326189593633986432,99.104969,8.958145,0.333585,-0.597968,-0.29504,17.811178,18.439911,17.031984,1.407927,,4466.015625,Other


In [32]:
trumpler_5_dr3['Cluster'].value_counts()

Other         1453
Trumpler_5     547
Name: Cluster, dtype: int64

***

**NGC 2437 Star Cluster**

In [33]:
desired_cluster = 'NGC_2437'
ngc_2437 = cluster_labels[cluster_labels['Cluster'] == desired_cluster]
ngc_2437

Unnamed: 0,source_id,PMemb,Cluster
197607,3030366368135655296,0.9,NGC_2437
197608,3030382177918823552,0.8,NGC_2437
197609,3030369018139283712,0.1,NGC_2437
197610,3030361665155399808,0.7,NGC_2437
197611,3030361733874875776,0.9,NGC_2437
...,...,...,...
199797,3029625469102388096,0.1,NGC_2437
199798,3029627805561852032,0.4,NGC_2437
199799,3029587849487459328,1.0,NGC_2437
199800,3029612554139218688,0.1,NGC_2437


In [34]:
ngc_2437['PMemb'].value_counts()

1.0    1241
0.9     247
0.8     155
0.1     114
0.2      93
0.7      90
0.5      71
0.6      64
0.4      62
0.3      58
Name: PMemb, dtype: int64

In [35]:
ngc_2437_sources = ngc_2437["source_id"]
ngc_2437_sources.shape

(2195,)

In [36]:
# Construct the ADQL query
adql_query = f"SELECT * FROM gaiadr3.gaia_source WHERE source_id IN ({','.join(map(str, ngc_2437_sources))})"

# Query the Gaia Archive
results = Gaia.launch_job(adql_query).get_results()

# Convert to Pandas DataFrame
ngc_2437_dr3 = results.to_pandas()
ngc_2437_dr3 = ngc_2437_dr3[dr3_cols]
ngc_2437_dr3.shape

(2000, 12)

In [37]:
ngc_2437_dr3 = pd.merge(ngc_2437_dr3, cluster_labels, on = 'source_id', how = 'inner')
ngc_2437_dr3.shape

(2000, 14)

In [38]:
ngc_2437_dr3['PMemb'].value_counts()

1.0    1166
0.9     226
0.8     137
0.1      90
0.7      79
0.2      74
0.6      60
0.5      59
0.4      55
0.3      54
Name: PMemb, dtype: int64

In [39]:
ngc_2437_dr3.head()

Unnamed: 0,source_id,ra,dec,parallax,pmra,pmdec,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag,bp_rp,radial_velocity,teff_gspphot,PMemb,Cluster
0,3028907114352000000,115.526317,-15.606329,0.612704,-3.872994,0.365249,13.168623,13.362393,12.843882,0.518512,,7707.453613,1.0,NGC_2437
1,3028907664107832064,115.442723,-15.615391,0.615414,-3.872067,0.372196,17.382462,17.937426,16.681881,1.255545,,4557.757812,0.7,NGC_2437
2,3028907797242655872,115.490331,-15.610655,0.447055,-3.432289,-0.133326,17.563602,18.166182,16.813154,1.353027,,4681.834961,0.1,NGC_2437
3,3028911478038853504,115.792022,-15.538045,0.584538,-3.814146,0.389718,16.137276,16.610874,15.51557,1.095304,,5089.95166,1.0,NGC_2437
4,3028912092211579776,115.746816,-15.554148,0.613328,-3.882753,0.421553,16.254417,16.623857,15.66541,0.958447,,5201.231445,1.0,NGC_2437


In [40]:
## Creating final NGC 7789 dataset ready for use
ngc_2437_dr3.loc[ngc_2437_dr3['PMemb'] != 1.0, 'Cluster'] = 'Other'
ngc_2437_dr3 = ngc_2437_dr3.drop('PMemb', axis = 1)
ngc_2437_dr3.head()

Unnamed: 0,source_id,ra,dec,parallax,pmra,pmdec,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag,bp_rp,radial_velocity,teff_gspphot,Cluster
0,3028907114352000000,115.526317,-15.606329,0.612704,-3.872994,0.365249,13.168623,13.362393,12.843882,0.518512,,7707.453613,NGC_2437
1,3028907664107832064,115.442723,-15.615391,0.615414,-3.872067,0.372196,17.382462,17.937426,16.681881,1.255545,,4557.757812,Other
2,3028907797242655872,115.490331,-15.610655,0.447055,-3.432289,-0.133326,17.563602,18.166182,16.813154,1.353027,,4681.834961,Other
3,3028911478038853504,115.792022,-15.538045,0.584538,-3.814146,0.389718,16.137276,16.610874,15.51557,1.095304,,5089.95166,NGC_2437
4,3028912092211579776,115.746816,-15.554148,0.613328,-3.882753,0.421553,16.254417,16.623857,15.66541,0.958447,,5201.231445,NGC_2437


In [41]:
ngc_2437_dr3['Cluster'].value_counts()

NGC_2437    1166
Other        834
Name: Cluster, dtype: int64

***

In [None]:
tables = Gaia.load_tables(only_names = True)
for table in tables:    
    print(table.get_qualified_name())

In [138]:
main_table = queryGaia("SELECT TOP  * FROM gaiadr3.gaia_source WHERE ;")
main_table.head()

INFO: Query finished. [astroquery.utils.tap.core]


Unnamed: 0,solution_id,DESIGNATION,source_id,random_index,ref_epoch,ra,ra_error,dec,dec_error,parallax,...,azero_gspphot,azero_gspphot_lower,azero_gspphot_upper,ag_gspphot,ag_gspphot_lower,ag_gspphot_upper,ebpminrp_gspphot,ebpminrp_gspphot_lower,ebpminrp_gspphot_upper,libname_gspphot
0,1636148068921376768,Gaia DR3 5316888728975186304,5316888728975186304,480364689,2016.0,130.285076,0.072465,-55.743497,0.080178,6.700118,...,0.9059,0.7253,0.9526,0.5388,0.4298,0.5663,0.3974,0.3215,0.4172,MARCS
1,1636148068921376768,Gaia DR3 5316888759028795904,5316888759028795904,1315211472,2016.0,130.29404,0.238489,-55.736696,0.235666,0.207641,...,,,,,,,,,,
2,1636148068921376768,Gaia DR3 5316888759028816128,5316888759028816128,1196949855,2016.0,130.292894,0.266807,-55.730805,0.259652,0.946334,...,,,,,,,,,,
3,1636148068921376768,Gaia DR3 5316888759028818560,5316888759028818560,1638460320,2016.0,130.305125,0.064107,-55.730802,0.072081,0.558692,...,0.3907,0.2119,0.5661,0.3022,0.1629,0.4401,0.1617,0.0872,0.2349,PHOENIX
4,1636148068921376768,Gaia DR3 5316888759028818688,5316888759028818688,223867603,2016.0,130.305392,0.26346,-55.732275,0.267637,0.331408,...,,,,,,,,,,
