In [26]:
import pandas as pd
import numpy as np

In [44]:
def prepareSubtypes(path):
    df = pd.read_csv(path)
    df.head()
    df.drop([0,1], axis=0, inplace=True)
    df.drop(df[df['Glon'].isna() | df['Glat'].isna()].index, inplace=True)
    df.sort_values(by=['Spectral Type', 'Sp type', 'Mag type'], inplace=True)
    df.drop_duplicates(subset='LAMOST', inplace=True)
    
    df.rename(columns={'Class': 'Cl', 'LAMOST': 'Name'}, inplace=True)

    df['root'] = 'subtypes'
    df["plate"] = np.nan
    df["path"] = np.nan
    df["dx"] = np.zeros(df.shape[0])
    df["dy"] = np.zeros(df.shape[0])
    df[['_RAJ2000', '_DEJ2000']] = df[['RAJ2000', 'DEJ2000']].astype(float)
    return df

In [45]:
df = prepareSubtypes('data/DFBS_subtypes.csv')
df.head()

Unnamed: 0,No,Glon,Glat,RAJ2000,DEJ2000,Cl,Name,V,Spectral Type,Sp type,Mag type,root,plate,path,dx,dy,_RAJ2000,_DEJ2000
9,,122.74688757,-17.64003282,12.60909889,45.23135611,C,J005026.18+451352.8,12.61,Ba,C Ba,13.0,subtypes,,,0.0,0.0,12.609099,45.231356
10,,158.77834522,-32.19019519,45.06175694,21.56800806,C,J030014.82+213404.8,12.69,Ba,C Ba,13.0,subtypes,,,0.0,0.0,45.061757,21.568008
11,,15.57235887,24.42816973,253.34618889,-3.04951,C,J165323.08-030258.2,12.7,Ba,C Ba,13.0,subtypes,,,0.0,0.0,253.346189,-3.04951
12,,121.4986598,-36.05014231,11.56118111,26.80715389,C,J004614.68+264825.7,12.71,Ba,C Ba,13.0,subtypes,,,0.0,0.0,11.561181,26.807154
13,,97.64825928,63.20819967,210.412855,50.33292389,C,J140139.08+501958.5,12.71,Ba,C Ba,13.0,subtypes,,,0.0,0.0,210.412855,50.332924


In [46]:
df.isna().sum()

No               3610
Glon                0
Glat                0
RAJ2000             0
DEJ2000             0
Cl                  0
Name                0
V                   0
Spectral Type      39
Sp type           545
Mag type          844
root                0
plate            3610
path             3610
dx                  0
dy                  0
_RAJ2000            0
_DEJ2000            0
dtype: int64

In [47]:
df['Cl'].value_counts()

sd     1668
C      1088
Mrk     854
Name: Cl, dtype: int64

In [48]:
def prepareData(path):
    data = pd.read_csv(path)
    data.drop([0,1], inplace=True)
    data.reset_index(drop=True, inplace=True)
    data.drop_duplicates(subset='Name', inplace=True)

    data['root'] = 'initial'
    data["plate"] = np.nan
    data["path"] = np.nan
    data["dx"] = np.zeros(data.shape[0])
    data["dy"] = np.zeros(data.shape[0])
    data[['_RAJ2000', '_DEJ2000']] = data[['_RAJ2000', '_DEJ2000']].astype(float)
    return data

In [49]:
data = prepareData('data/DFBS.csv')
data.head()

Unnamed: 0,_Glon,_Glat,_RAJ2000,_DEJ2000,Cl,Name,Vmag,z,root,plate,path,dx,dy
0,100.174423,-55.203358,0.04875,5.388056,Sy1,RXS J00001+0523,16.4,0.04,initial,,,0.0,0.0
1,99.844434,-57.30727,0.61,3.351667,Sy1,MARK 543,14.68,0.026,initial,,,0.0,0.0
2,86.112841,-70.112882,0.88375,-10.744722,Sy1,NGC 7808,15.4,0.029,initial,,,0.0,0.0
3,114.304767,-16.638006,1.039583,45.440278,Sy1,RXS J00041+4526,16.9,0.12,initial,,,0.0,0.0
4,104.972206,-50.897341,1.45625,10.376944,Sy1,RXS J00058+1022,16.7,0.095,initial,,,0.0,0.0


In [50]:
merge_columns = ['root', '_RAJ2000', '_DEJ2000', 'Cl', 'Name', 'plate', 'path', 'dx', 'dy']

In [51]:
all_data = pd.concat([data[merge_columns], df[merge_columns]])
all_data.reset_index(inplace=True)
all_data.drop_duplicates(subset=['_RAJ2000', '_DEJ2000', 'Cl'], inplace=True)

In [52]:
all_data.head()

Unnamed: 0,index,root,_RAJ2000,_DEJ2000,Cl,Name,plate,path,dx,dy
0,0,initial,0.04875,5.388056,Sy1,RXS J00001+0523,,,0.0,0.0
1,1,initial,0.61,3.351667,Sy1,MARK 543,,,0.0,0.0
2,2,initial,0.88375,-10.744722,Sy1,NGC 7808,,,0.0,0.0
3,3,initial,1.039583,45.440278,Sy1,RXS J00041+4526,,,0.0,0.0
4,4,initial,1.45625,10.376944,Sy1,RXS J00058+1022,,,0.0,0.0


In [53]:
all_data[all_data['_DEJ2000'] == 63.2425278]

Unnamed: 0,index,root,_RAJ2000,_DEJ2000,Cl,Name,plate,path,dx,dy
4961,4963,initial,253.889333,63.242528,Mrk,503,,,0.0,0.0


In [54]:
all_data.to_csv('data/Combined.csv', index=False)