### UNSUPERVISED MACHINE LEARNING FOR THE CLASSIFICATION OF ASTROPHYSICAL X-RAY SOURCES

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min, silhouette_score
from sklearn.preprocessing import MinMaxScaler

from astroquery.simbad import Simbad
import astropy.coordinates as coord
import astropy.units as u

Simbad.TIMEOUT = 600 # sets the timeout to 60s

In [2]:
data = pd.read_csv("./cluster_data/astrox_knn6_src.csv", index_col=0)

In [3]:
data

Unnamed: 0,cluster,name,ra,dec,src_area_b,hard_hm,hard_hs,hard_ms,var_prob_b,var_sigma_b,...,var_sigma_s,ks_prob_b,ks_prob_h,ks_prob_m,ks_prob_s,kp_prob_b,kp_prob_h,kp_prob_m,kp_prob_s,bb_kt
0,2,2CXO J000002.9-350332,0.012318,-35.059068,6.228827,-0.042473,-0.440974,-0.405996,0.067012,0.000041,...,0.000059,0.491601,0.724755,0.317516,0.423297,0.808309,0.669796,0.197852,0.691868,0.439048
1,2,2CXO J000010.0-501526,0.041803,-50.257400,8.978497,-0.049969,-0.277327,-0.229856,0.060476,0.000008,...,0.000093,0.441207,0.249246,0.181138,0.805236,0.614278,0.101385,0.028956,0.918830,0.534998
2,2,2CXO J000019.8-245030,0.082814,-24.841752,884.616067,0.041224,-0.278576,-0.311056,0.092881,0.000060,...,0.000058,0.798917,0.847319,0.495847,0.506963,0.430789,0.817322,0.353208,0.778730,0.414136
3,0,2CXO J000025.4-245419,0.106246,-24.905300,39.749528,0.033729,-0.329794,-0.358526,0.327994,0.000311,...,0.000019,0.932091,0.067084,0.939628,0.368044,0.924866,0.051374,0.950423,0.288221,0.456196
4,0,2CXO J000027.4-500421,0.114303,-50.072669,3.925662,0.154903,-0.126171,-0.276077,0.041535,0.000013,...,0.000006,0.816880,0.969048,0.227237,0.030124,0.766089,0.932166,0.556750,0.047099,0.628996
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37872,2,2CXO J235928.0+440317,359.866965,44.054832,32.554951,-0.161149,-0.440974,-0.298563,0.348135,0.000966,...,0.000286,0.981183,0.906643,0.336316,0.967217,0.884821,0.704248,0.628147,0.834831,0.431097
37873,0,2CXO J235932.4+181247,359.885089,18.213181,0.014842,-0.187383,-0.608370,-0.474703,0.193192,0.000232,...,0.001025,0.934161,0.583490,0.964677,0.262496,0.974790,0.494843,0.989638,0.387899,0.288484
37874,5,2CXO J235937.1+623151,359.904697,62.530855,0.156711,0.193629,0.296065,0.104934,0.069176,0.000010,...,0.000032,0.249008,0.052800,0.338171,0.093817,0.447750,0.109237,0.512777,0.100806,0.744936
37875,2,2CXO J235945.8-574927,359.940857,-57.824258,6.741334,0.084947,-0.262336,-0.338538,0.065255,0.000026,...,0.000149,0.701624,0.691945,0.910468,0.948922,0.882940,0.727829,0.955295,0.743982,0.618062


In [17]:
data_0 = data[data.cluster == 0]
data_1 = data[data.cluster == 1]
data_2 = data[data.cluster == 2]
data_3 = data[data.cluster == 3]
data_4 = data[data.cluster == 4]
data_5 = data[data.cluster == 5]

n = 500
data_0_sample = data_0.sample(n=n)
data_1_sample = data_1.sample(n=n)
data_2_sample = data_2.sample(n=n)
data_3_sample = data_3.sample(n=n)
data_4_sample = data_4.sample(n=n)
data_5_sample = data_5.sample(n=n)

In [5]:
Simbad.add_votable_fields('typed_id')
Simbad.add_votable_fields('otype')

In [15]:
coords = coord.SkyCoord(data_0_sample.ra, data_0_sample.dec,unit=(u.deg, u.deg), frame='icrs')
result_table = Simbad.query_region(coords, radius=0.01 * u.deg); # 36 arcsec
resdf = result_table.to_pandas()
count_obj = resdf.groupby(resdf.OTYPE.tolist(),as_index=False).size()
count_obj.to_csv('0_count_obj{}.csv'.format(n))



In [19]:
coords = coord.SkyCoord(data_1_sample.ra, data_1_sample.dec,unit=(u.deg, u.deg), frame='icrs')
result_table = Simbad.query_region(coords, radius=0.01 * u.deg); # 36 arcsec
resdf = result_table.to_pandas()
count_obj = resdf.groupby(resdf.OTYPE.tolist(),as_index=False).size()
count_obj.to_csv('1_count_obj{}.csv'.format(n))



In [20]:
coords = coord.SkyCoord(data_2_sample.ra, data_2_sample.dec,unit=(u.deg, u.deg), frame='icrs')
result_table = Simbad.query_region(coords, radius=0.01 * u.deg); # 36 arcsec
resdf = result_table.to_pandas()
count_obj = resdf.groupby(resdf.OTYPE.tolist(),as_index=False).size()
count_obj.to_csv('2_count_obj{}.csv'.format(n))

