In [1]:
#Prepare the python system 
import pandas as pd  
import numpy as np 
import os   #move around in our OS

from astropy.cosmology import WMAP9 as cosmo  #Cosmology calculators
from astropy.io import fits  #Working with fits
from astropy.table import Table

import itertools as it #iteration / combination trick used

import seaborn as sb
import matplotlib.pyplot as plt


#Working directory control
cwd = os.getcwd() 
print("Initial working directory is:", cwd) 
if '/Users/users/verdult/Thesis/thesis' in cwd:
    print("Working at kapteyn, changing to data directory")
    os.chdir('/net/virgo01/data/users/verdult/Thesis')  #This is for kapteyn
if 'data' in cwd:
    print("Working in kapteyn data folder")
if 'Dropbox' in cwd:
    print("Working at home, changing to onedrive folder")
    os.chdir('D:\Onedrive\Thesis') 
if 'Onedrive' in cwd:
    print("Working in onedrive folder")
cwd = os.getcwd() 
print("Current working directory is:", cwd) 
os.chdir("D:\Onedrive\Thesis")

Initial working directory is: F:\Dropbox\Dropbox\Thesis
Working at home, changing to onedrive folder
Current working directory is: D:\Onedrive\Thesis


In [2]:
def pandafy(fits_filename):
    """
    Turns an .fits file into a pandas dataframe"""
    dat = Table.read(fits_filename, format='fits')
    df = dat.to_pandas(index = 'CATAID')
    return(df)

def contains(df, string):
    df = df[df.columns[df.columns.str.contains(string)]]
    return df
    
def endswith(df, string):
    df = df[df.columns[df.columns.str.endswith(string)]]
    return df

def startswith(df, string):
    df = df[df.columns[df.columns.str.startswith(string)]]
    return df
    
def fittify(df,filename='ThesisDB_selected.fits'):  #say which dataframe you want to turn into a fit file
    holder = []
    for i in range(df.columns.values.size):
        holder.append(fits.Column(name=df.columns.values[i], format='D', array=df.iloc[:,i]))

    cols = fits.ColDefs(holder)
    hdu = fits.BinTableHDU.from_columns(cols)

    hdu.writeto(filename,overwrite=True)
    
def save_db(db,dbname):
    db.to_hdf('support/%s/ThesisDB.h5'%(dbname), 'Dataframe')  
    db.to_csv('support/%s/ThesisDB.csv'%(dbname))  
    fittify(db,filename='support/%s/ThesisDB.fits'%(dbname))

    phot.to_hdf('support/%s/Parts_DB.h5'%(dbname), 'Photometric')  
    colour.to_hdf('support/%s/Parts_DB.h5'%(dbname), 'Colour') 
    spectral.to_hdf('support/%s/Parts_DB.h5'%(dbname), 'Spectral')
    spec_ds.to_hdf('support/%s/Parts_DB.h5'%(dbname), 'Direct_Summation')

def prep_set(dir_name):
    """Quick function to set up folders when a new empty dataset needs to be created.
    Directory name needs to be a string"""
    os.chdir("D:\Onedrive\Thesis\support")
    os.mkdir(dir_name)
    os.mkdir("%s\pics"%(dir_name))
    os.mkdir("%s\pics\lap"%(dir_name))
    os.mkdir("%s\pics\pfa"%(dir_name))
    os.mkdir("%s\pics\SR"%(dir_name))
    os.mkdir("%s\pics\BE"%(dir_name))
    os.mkdir("%s\pics\pairwise"%(dir_name))
    os.mkdir("%s\pics\eif"%(dir_name))
    os.mkdir("%s\ylist"%(dir_name))
    os.chdir("D:\Onedrive\Thesis")
#prep_set("delta2")
#os.chdir("D:\Onedrive\Thesis")

In [3]:
SDSS = pandafy('fits/SersicCatSDSS.fits')
UKIDSS = pandafy('fits/SersicCatUKIDSS.fits')

In [5]:
UKIDSS

Unnamed: 0_level_0,RA,DEC,R_PETRO,SURVEY_OLDCLASS,SURVEY_CLASS,CENFLUX_Y,BACKFLUX_Y,PSFNUM_Y,PSFCHI2_Y,PSFFWHM_Y,...,GALMUEAVG_K,GALR90_K,GALCHI2FULL_K,GALNDOF_K,GALCHI2_K,GALPLAN_K,PRICHI2FULL_K,PRINFP_K,PRINDOF_K,PRICHI2_K
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6802,174.005984,0.720935,18.622816,7,6,1215.502075,8.806604,21,1.20,0.91191,...,19.165457,2.158304,11045.000977,10194,1.083481,1,74.569214,7,65,1.147219
6803,174.021648,0.754885,19.219088,6,6,1626.389771,7.636289,18,1.14,0.91191,...,17.661249,2.820666,10937.115234,10191,1.073213,1,80.881653,7,72,1.123356
6804,174.092055,0.674096,19.341200,6,6,719.918030,13.045390,3,2.39,1.71534,...,18.820248,1.991406,8271.460938,10166,0.813640,1,44.391865,7,60,0.739864
6805,174.022355,0.702532,19.301096,6,6,1369.391602,8.462235,18,1.16,0.91191,...,16.985043,4.202578,11487.612305,10172,1.129337,1,74.135132,7,62,1.195728
6806,174.022807,0.705945,18.544878,7,6,964.250183,14.368077,18,1.17,0.90852,...,19.185122,3.990641,11519.376953,10180,1.131569,1,305.722656,7,169,1.809010
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4345061,174.637285,-0.060815,20.909586,2,2,69.469299,6.284157,12,1.16,1.10514,...,19.871147,1.881074,10598.260742,10116,1.047673,1,28.323858,7,33,0.858299
4345978,176.321436,-0.138858,20.622234,2,2,513.365784,-0.181604,20,1.17,0.88479,...,18.950581,3.812571,10537.488281,10177,1.035422,1,146.028091,7,76,1.921422
4347483,179.090796,-0.135979,20.504171,2,2,367.497925,-6.736897,18,1.10,1.07802,...,19.374432,1.966496,10542.975586,10170,1.036674,1,66.047874,7,70,0.943541
4348349,180.767584,-0.125180,18.312876,3,4,282.031921,42.262608,15,1.13,0.98988,...,17.980354,17.798452,10946.397461,10187,1.074546,1,985.054810,7,476,2.069443


# Alpha and Beta: 
Sersic + DirectSummation, manual calculation of absmag
Alpha: complete
Beta: prichi <2

In [6]:
#setup initial dataframe
# Sersic Photometry
SDSS = pandafy('fits/SersicCatSDSS.fits')
UKIDSS = pandafy('fits/SersicCatUKIDSS.fits')
dfm = pd.merge(SDSS,UKIDSS,right_index=True, left_index=True, how='inner') #116374 entries

# Spectral lines
DS = pandafy('fits/DirectSummation.fits')
DS = DS[DS['IS_BEST'] == True]  #select only the best fits from all
DS = DS[DS['SURVEY_CODE'] == 5 | 1]   #select only GAMA (5) or SDSS (1) fits
DS = DS[DS['SN'] > 1]   # cut of point for goodness of fit
dfm = pd.merge(DS,dfm,right_index=True, left_index=True, how='inner')

# galactic extinction
extinc = pandafy('fits/GalacticExtinction.fits').loc[:,'A_u':'A_K_UKIDSS']  #select only values we need
dfm = pd.merge(dfm,extinc,right_index=True, left_index=True, how='inner')

# kcorrection
kcor = pandafy('fits/kcorr_auto_z00.fits').iloc[:,4:13]   #select only the values we need
dfm = pd.merge(dfm,kcor,right_index=True, left_index=True, how='inner')


# Set up constants:
bands = "ugrizYJHK"  #All the bands we will iterate over
arcsec = (2*np.pi)/(360*3600)  #one arcsec in radians

#initiate a new dataframe  (87123 entries)
dfm2 = dfm.loc[:,'RA':'Z']
dis = cosmo.comoving_distance(dfm['Z'])
dfm2['Distance'] = dis # units of mega parsec
#dfm2['Petrosian'] = dfm['R_PETRO_x']
dfm2


j = 0
for i in bands: 
    #Absolute magnitude, based on distance, kcorrection and galactic foreground extinction
    dfm2['absmag_%s'%(i)] = 5 + (dfm['GALMAG_%s'%(i)] -5*np.log10((dis.value*10**6))) \
    - contains(dfm,'KCORR').iloc[:,j] - dfm.loc[:,'A_u':'A_K_UKIDSS'].iloc[:,j] 
    
    #Absolute magnitute at 10 Re   #per band: Mv = mv - 2.5*log10((distance / 10 pc)**2) - kcorr
    dfm2['absmag10re_%s'%(i)] = 5 + (dfm['GALMAG10RE_%s'%(i)] -5*np.log10((dis.value*10**6))) \
    - contains(dfm,'KCORR').iloc[:,j] - dfm.loc[:,'A_u':'A_K_UKIDSS'].iloc[:,j] 
    
    #Radius (kpc) that fits 90% of the light)
    dfm2['size90_%s'%(i)] = (np.sin(dfm['GALR90_%s'%(i)]*arcsec)*dis.value)*10**3    
   
    #Radius (kpc) where light is at 50%
    dfm2['sizeRE_%s'%(i)] = (np.sin(dfm['GALRE_%s'%(i)]*arcsec)*dis.value)*10**3    
    
    #Sersic index of the galaxy 
    dfm2['SersicIndex_%s'%(i)] = dfm['GALINDEX_%s'%(i)]  
    
    #ellipticity of the galaxy,
    dfm2['Ellipticity_%s'%(i)] = dfm['GALELLIP_%s'%(i)] 
    
    #Central surface brightness in (absmag / arcsec^2)  #No sense changing this  
    dfm2['MU@0_%s'%(i)] = dfm['GALMU0_%s'%(i)]  
    
    #Effective surface brightness at effective radius (absmag / arcsec^2) #No sense changing this
    dfm2['MU@E_%s'%(i)] = dfm['GALMUE_%s'%(i)] 
    
    #Average Effective surface brightness within effective radius (absmag / arcsec^2)
    dfm2['MUEAVG_%s'%(i)] = dfm['GALMUEAVG_%s'%(i)]  
    j += 1
dfm2 = dfm2[dfm2 > -9999]  #to set some nan before we go to colours and spectral 

    
#-------------------------------------------------------------
#Add spectral information, 52 columns added
#[:,123:175]
#Exrtract some flux info some line fluxes
equivW = endswith(dfm,"EW")  #Grab all the continua
#add the 4000 A break strength 
dfm2['D4000N'] = dfm['D4000N']  
for i in range(len(equivW.columns)):
    dfm2[equivW.columns[i]] = equivW.iloc[:,i]
#-------------------------------------------------------------
#Convert the colours and add them to the dataframe, 36 in total
# Adding this to the end instead, as we are unlikely to use them
#[:,87:123]

b=np.arange(len(bands))                                  #to make an combinations series
combi = pd.Series(list(it.combinations(b,2)))   #praise to atomh33ls at stackoverflow
for i in combi:                                
    dfm2['%s-%s'%(bands[i[0]],bands[i[1]])] = \
    (dfm2['absmag_%s'%(bands[i[0]])]-dfm2['absmag_%s'%(bands[i[1]])])

tester = dfm2
dfm2 = dfm2[dfm2 > -99999].dropna() #36765 rows × 175 columns, with old method
df = dfm2   # 26214 rows × 173 columns with new method. Where is this difference from?
#Unclear! Possibly in the merging of files in topcat. Which means we had been working with faulty files for a long while! 
# Saving the initial file (Alpha)
phot = dfm2.iloc[:,4:85]
spectral = dfm2.iloc[:,85:137] 
colour = dfm2.iloc[:,137:173]






Unnamed: 0_level_0,SPECID,RA,DEC,Z,NQ,SURVEY,SURVEY_CODE,IS_BEST,IS_SBEST,SN,...,HDA_CONT,HDA_EW_ERR,HDA_EW,HDA_FLUX_ERR,HDA_FLUX,OII_CONT,OII_EW_ERR,OII_EW,OII_FLUX_ERR,OII_FLUX
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6802,b'G12_Y3_017_187 ',174.005997,0.72093,0.050529,4,b'GAMA ',5,True,True,10.700412,...,5.288479,7.488305,5.106353,39.425926,28.240475,2.324466,26.618259,22.621220,53.140034,52.913776
6806,b'G12_Y1_AN1_238 ',174.022790,0.70594,0.331206,4,b'GAMA ',5,True,True,8.391569,...,11.557695,1.175526,-5.741475,18.996931,-88.366577,7.823625,1.690446,7.843846,16.223980,81.656792
6808,b'G12_Y1_AN1_235 ',174.100710,0.65891,0.229329,4,b'GAMA ',5,True,True,12.075199,...,7.473270,1.776299,-0.830661,16.396040,-7.675704,5.903210,1.720988,3.648445,12.090662,26.165625
6810,b'G12_Y2_041_164 ',174.109080,0.80390,0.326380,4,b'GAMA ',5,True,True,7.795063,...,5.392679,1.362924,0.414285,9.692929,2.594831,4.366937,1.499271,7.352146,8.015382,42.585522
6813,b'G12_Y3_017_177 ',174.063830,0.64454,0.484554,4,b'GAMA ',5,True,True,-99999.000000,...,5.934527,1.842995,-4.158036,16.817881,-36.643059,3.142103,3.832169,10.646575,14.787091,49.649075
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8003087,b'G02_Y6_017_254 ',37.836750,-4.05976,0.401419,2,b'GAMA ',5,True,True,-99999.000000,...,12.399290,1.698954,-2.667766,29.744314,-44.829803,11.821866,1.218002,3.282435,19.501949,54.379017
8003092,b'G02_Y6_017_246 ',37.647420,-4.02889,0.030410,2,b'GAMA ',5,True,True,11.608665,...,10.771623,3.328188,-0.986965,37.222153,-11.564173,15.543070,4.013684,-6.309324,66.209381,-101.151070
8003098,b'G02_Y6_017_196 ',37.146250,-4.19436,0.406779,2,b'GAMA ',5,True,True,-99999.000000,...,57.405777,13.481730,-10.871365,1227.800049,-710.307739,58.634628,11.623593,10.635059,855.175720,876.774170
8003102,b'G02_Y6_012_081 ',38.665420,-4.74022,0.253869,2,b'GAMA ',5,True,True,0.397337,...,5.571387,4.350465,-2.318051,30.962255,-16.062019,7.634825,2.585687,-2.509053,25.181015,-24.938957


In [8]:
phot

Unnamed: 0_level_0,absmag_u,absmag10re_u,size90_u,sizeRE_u,SersicIndex_u,Ellipticity_u,MU@0_u,MU@E_u,MUEAVG_u,absmag_g,...,MUEAVG_H,absmag_K,absmag10re_K,size90_K,sizeRE_K,SersicIndex_K,Ellipticity_K,MU@0_K,MU@E_K,MUEAVG_K
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6802,-16.498930,-16.498930,1.693470,1.217394,0.089800,0.6091,21.654612,21.669508,21.656874,-17.555164,...,18.456911,-18.329572,-18.329572,2.261476,1.108027,0.718200,0.6687,18.497427,19.713715,19.165457
6830,-20.563993,-20.445307,1573.234621,151.985781,7.280300,0.3254,15.022880,30.471449,28.761940,-19.144864,...,21.384781,-19.554675,-19.554661,8.410381,3.419993,1.144900,0.3671,19.586367,21.721439,20.959873
6837,-18.530791,-18.530265,13.819605,4.843815,1.545500,0.6322,20.558409,23.560274,22.653215,-19.266915,...,19.762506,-20.007647,-20.007439,10.125538,3.729863,1.405500,0.7441,17.953623,20.652346,19.791887
6838,-17.886899,-17.877645,24.967339,6.598801,2.455100,0.8897,18.827776,23.801743,22.661787,-18.610079,...,20.511658,-18.673820,-18.673820,4.348900,2.681564,0.306300,0.5852,20.817305,21.172449,20.947062
6839,-18.298163,-18.298163,9.534485,5.777714,0.333200,0.5407,23.503447,23.912020,23.660021,-19.509874,...,20.752487,-20.330712,-20.330699,12.660867,5.149431,1.144400,0.5641,19.296329,21.430323,20.668962
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3901701,-18.664665,-18.618874,25.032558,4.385122,4.128400,0.0564,16.357140,24.962646,23.553135,-19.817653,...,17.871639,-21.602422,-21.553901,13.973168,2.389463,4.239300,0.4060,10.681295,19.527546,18.104126
3901702,-19.268692,-19.268688,15.627759,6.619197,1.044400,0.6740,21.512436,23.430477,22.712326,-20.629055,...,18.073061,-21.884541,-21.826596,18.755593,2.958011,4.621800,0.5518,9.617155,19.293768,17.824934
3901768,-19.220608,-19.220547,21.666105,8.399797,1.267700,0.5981,21.735662,24.136213,23.325834,-20.199397,...,20.013775,-21.221068,-21.221068,12.165849,5.381250,0.941900,0.5615,18.764030,20.461014,19.790936
3973288,-15.955128,-15.955128,3.670465,2.476511,0.176500,0.6153,23.756243,23.875179,23.786745,-16.945159,...,16.664459,-18.233880,-17.968404,15.903862,0.385535,17.934999,0.6377,-19.329847,19.254202,17.061428


## Alpha, Full database, Direct Summation
26214 rows × 173 columns

In [5]:
# Alpha, Full database, Direct Summation
dfm2.to_hdf('support/Alpha/ThesisDB.h5', 'Dataframe')  
dfm2.to_csv('support/Alpha/ThesisDB.csv')  
fittify(dfm2,filename='support/Alpha/ThesisDB.fits')

phot.to_hdf('support/Alpha/Parts_DB.h5', 'Photometric')  
colour.to_hdf('support/Alpha/Parts_DB.h5', 'Colour') 
spectral.to_hdf('support/Alpha/Parts_DB.h5', 'Spectral') 

## Beta: Alpha with prichi^2 between 0.5 and 2
10177 rows × 173 columns

In [6]:
#Making new coumns with the prichi2 scores
SDSS = pandafy('fits/SersicCatSDSS.fits')
UKID = pandafy('fits/SersicCatUKIDSS.fits')
merged = pd.merge(UKID,SDSS,right_index=True, left_index=True, how='inner')
chi2 = startswith(merged,"PRICHI2_")

minchi = 0.5    
maxchi = 2  
chi2 = chi2[(chi2 > minchi) & (chi2 < maxchi)].dropna()
df_chi = pd.merge(dfm2,chi2, right_index = True, left_index = True, how='inner').iloc[:,0:-9]

print(' at prichi2 values between %.1f and  %.1f we have %i primarychi2 values to work with,\n \
      resulting in a final database of %i rows' %(minchi,maxchi,chi2.shape[0],df_chi.shape[0]))
print('This is a difference of %i compared to not filtering for chi2'%(dfm2.shape[0] - df_chi.shape[0]))

#create partial dataframes
phot = df_chi.iloc[:,4:85]
spectral = df_chi.iloc[:,85:137] 
colour = df_chi.iloc[:,137:173]
df_chi

 at prichi2 values between 0.5 and  2.0 we have 20626 primarychi2 values to work with,
       resulting in a final database of 10177 rows
This is a difference of 16037 compared to not filtering for chi2


Unnamed: 0_level_0,RA,DEC,Z,Distance,absmag_u,absmag10re_u,size90_u,sizeRE_u,SersicIndex_u,Ellipticity_u,...,z-Y,z-J,z-H,z-K,Y-J,Y-H,Y-K,J-H,J-K,H-K
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6802,174.005997,0.72093,0.050529,216.124754,-16.498930,-16.498930,1.693470,1.217394,0.0898,0.6091,...,-0.074355,0.054119,0.080334,-0.244385,0.128474,0.154689,-0.170030,0.026215,-0.298504,-0.324719
6837,174.302750,0.78998,0.074283,316.042533,-18.530791,-18.530265,13.819605,4.843815,1.5455,0.6322,...,-0.023039,-0.056638,0.314390,-0.133409,-0.033599,0.337429,-0.110370,0.371028,-0.076772,-0.447800
6838,174.305540,0.79034,0.074549,317.154217,-17.886899,-17.877645,24.967339,6.598801,2.4551,0.8897,...,-0.446172,-0.643587,-0.201072,-0.502923,-0.197415,0.245101,-0.056751,0.442516,0.140664,-0.301851
6840,174.346880,0.69645,0.193060,799.021212,-19.391659,-19.391626,14.594531,5.791895,1.2069,0.4717,...,-0.071671,-0.042171,0.552355,0.083708,0.029500,0.624026,0.155378,0.594526,0.125879,-0.468647
6846,174.439290,0.66226,0.075374,320.606235,-18.610924,-18.610924,17.420147,7.471441,1.0143,0.4178,...,0.045004,-0.274082,0.143344,-0.317933,-0.319086,0.098340,-0.362937,0.417426,-0.043850,-0.461277
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3901140,132.397710,-0.90088,0.247813,1012.149719,-20.361342,-20.344532,81.870811,19.338365,2.8713,0.3272,...,0.085124,0.166319,0.463283,0.269753,0.081195,0.378159,0.184629,0.296963,0.103433,-0.193530
3901178,132.413170,-1.03312,0.217588,895.247581,-19.882443,-19.881783,38.374570,13.263538,1.5860,0.4871,...,0.452042,0.242949,0.943910,0.350133,-0.209093,0.491868,-0.101910,0.700961,0.107184,-0.593778
3901184,132.414960,-0.90883,0.177885,738.882025,-18.909851,-18.909851,11.606056,8.416164,0.0770,0.7276,...,0.314233,0.555982,0.806603,0.763355,0.241749,0.492370,0.449122,0.250621,0.207373,-0.043248
3901441,132.476290,-0.92563,0.056787,242.553587,-17.400572,-17.400572,6.448936,4.732271,0.0578,0.7700,...,0.284168,0.440234,0.450702,0.378112,0.156066,0.166534,0.093944,0.010467,-0.062122,-0.072590


In [7]:
#save partial forms
phot.to_hdf('support/beta/Parts_DB.h5', 'Photometric')  #132,233 KB
colour.to_hdf('support/beta/Parts_DB.h5', 'Colour')  #132,233 KB
spectral.to_hdf('support/beta/Parts_DB.h5', 'Spectral')  #132,233 KB

#Save the whole form:
df_chi.to_hdf('support/beta/ThesisDB.h5', 'Dataframe')
df_chi.to_csv('support/beta/ThesisDB.csv')  
fittify(df_chi,filename='support/beta/ThesisDB.fits')

In [12]:
def prep_set(dir_name):
    """Quick function to set up folders when a new empty dataset needs to be created.
    Directory name needs to be a string"""
    os.chdir("D:\Onedrive\Thesis\support")
    os.mkdir(dir_name)
    os.mkdir("%s\pics"%(dir_name))
    os.mkdir("%s\pics\hybrid"%(dir_name))
    os.mkdir("%s\pics\lap"%(dir_name))
    os.mkdir("%s\pics\pfa"%(dir_name))
    os.mkdir("%s\pics\pairwise"%(dir_name))
    os.mkdir("%s\pics\pfa2"%(dir_name))
    os.mkdir("%s\pics\eif"%(dir_name))
    os.mkdir("%s\ylist"%(dir_name))
    os.chdir("D:\Onedrive\Thesis")
#prep_set("zeta")

In [22]:
prep_set("final_chi2")

In [11]:

#setup initial dataframe
# Sersic Photometry
SDSS = pandafy('fits/SersicCatSDSS.fits')
UKIDSS = pandafy('fits/SersicCatUKIDSS.fits')
dfm = pd.merge(SDSS,UKIDSS,right_index=True, left_index=True, how='inner') #116374 entries

# Spectral lines
simple = pandafy('fits/GaussFitSimple.fits')
simple = simple[simple['IS_BEST'] == True]  #select only the best fits from all
simple = simple[simple['SURVEY_CODE'] == 5 | 1]   #select only GAMA (5) or SDSS (1) fits
simple = simple[simple['SN'] > 3]   # cut of point for goodness of fit
dfm = pd.merge(simple,dfm,right_index=True, left_index=True, how='inner')

pd.merge(dfm['D4000N'],SDSS,right_index=True, left_index=True, how='inner')



Unnamed: 0_level_0,D4000N,RA,DEC,R_PETRO,SURVEY_OLDCLASS,SURVEY_CLASS,CENFLUX_u,BACKFLUX_u,PSFNUM_u,PSFCHI2_u,...,GALMUEAVG_z,GALR90_z,GALCHI2FULL_z,GALNDOF_z,GALCHI2_z,GALPLAN_z,PRICHI2FULL_z,PRINFP_z,PRINDOF_z,PRICHI2_z
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6802,1.811305,174.005984,0.720935,18.622816,7,6,163.785355,3.086078,2,1.36,...,19.848860,3.227556,10736.242188,10187,1.053916,1,123.995903,7,109,1.137577
6806,1.218718,174.022807,0.705945,18.544878,7,6,51.438156,3.130469,3,1.23,...,20.299273,2.652840,11181.544922,10180,1.098384,1,264.500427,7,139,1.902881
6808,1.660480,174.100731,0.658922,18.813841,7,6,-0.134276,2.705428,3,1.08,...,19.540232,5.894922,11281.231445,10187,1.107414,1,128.886948,7,86,1.498685
6810,1.416672,174.109090,0.803909,19.116735,6,6,30.402906,2.429410,7,1.16,...,19.201540,4.154457,11528.666992,10187,1.131704,1,131.704849,7,97,1.357782
6816,6.795266,174.018971,0.665804,19.791199,6,5,40.775845,2.443867,1,0.24,...,20.982004,2.014942,10979.238281,10159,1.080740,1,15.411397,7,15,1.027426
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4308320,1.411434,140.506392,-0.930739,20.764076,2,2,-3.972553,4.019185,8,1.89,...,20.916992,2.178205,7588.776855,10187,0.744947,1,58.263245,7,50,1.165265
4319812,3.428647,217.170365,1.249318,20.719929,2,2,1.124861,-1.575818,6,1.60,...,-9999.000000,-9999.000000,-9999.000000,-9999,-9999.000000,0,-9999.000000,-9999,-9999,-9999.000000
4321280,1.989459,218.979370,1.185022,20.635006,2,2,13.811870,5.310869,8,1.12,...,8.332987,0.012094,9830.292969,10184,0.965268,1,19.913115,7,15,1.327541
4321789,2.199493,219.541754,1.203977,20.659449,2,2,5.793746,5.638339,8,1.05,...,20.707935,2.433210,9343.618164,10194,0.916580,1,38.873062,7,20,1.943653


In [15]:
df.iloc[:,1:4]

Unnamed: 0_level_0,DEC,Z,Distance
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
6802,0.72093,0.050529,216.124754
6816,0.66580,0.076399,324.889099
6821,0.81543,0.003795,16.397478
6830,0.70608,0.114022,480.743251
6837,0.78998,0.074283,316.042533
...,...,...,...
3901702,-1.04799,0.197850,817.906867
3901768,-0.97609,0.188094,779.391965
3973288,0.08631,0.040766,174.746045
4021944,0.99253,0.232831,954.432629


# Eta + Zeta
## Full Photometric + Gaussfit (+ colour)
28120 rows × 134 columns



In [14]:
#setup initial dataframe
# Sersic Photometry
SDSS = pandafy('fits/SersicCatSDSS.fits')
UKIDSS = pandafy('fits/SersicCatUKIDSS.fits')
dfm = pd.merge(SDSS,UKIDSS,right_index=True, left_index=True, how='inner') #116374 entries

# Spectral lines
simple = pandafy('fits/GaussFitSimple.fits')
simple = simple[simple['IS_BEST'] == True]  #select only the best fits from all
simple = simple[simple['SURVEY_CODE'] == 5 | 1]   #select only GAMA (5) or SDSS (1) fits
simple = simple[simple['SN'] > 3]   # cut of point for goodness of fit
dfm = pd.merge(simple,dfm,right_index=True, left_index=True, how='inner')

# Spectral lines
DS = pandafy('fits/DirectSummation.fits')
DS = DS[DS['IS_BEST'] == True]  #select only the best fits from all
DS = DS[DS['SURVEY_CODE'] == 5 | 1]   #select only GAMA (5) or SDSS (1) fits
DS = DS[DS['SN'] > 3]   # cut of point for goodness of fit
#dfm = pd.merge(DS,dfm,right_index=True, left_index=True, how='inner')


# galactic extinction
extinc = pandafy('fits/GalacticExtinction.fits').loc[:,'A_u':'A_K_UKIDSS']  #select only values we need
dfm = pd.merge(dfm,extinc,right_index=True, left_index=True, how='inner')

# kcorrection
kcor = pandafy('fits/kcorr_auto_z00.fits').iloc[:,4:13]   #select only the values we need
dfm = pd.merge(dfm,kcor,right_index=True, left_index=True, how='inner')


# Set up constants:
bands = "ugrizYJHK"  #All the bands we will iterate over
arcsec = (2*np.pi)/(360*3600)  #one arcsec in radians

#initiate a new dataframe  (87123 entries)
dfm2 = dfm.iloc[:,1:4]
dis = cosmo.comoving_distance(dfm.iloc[:,3])
dfm2['Distance'] = dis # units of mega parsec
#dfm2['Petrosian'] = dfm['R_PETRO_x']
dfm2


j = 0
for i in bands: 
    #Absolute magnitude, based on distance, kcorrection and galactic foreground extinction
    dfm2['absmag_%s'%(i)] = 5 + (dfm['GALMAG_%s'%(i)] -5*np.log10((dis.value*10**6))) \
    - contains(dfm,'KCORR').iloc[:,j] - dfm.loc[:,'A_u':'A_K_UKIDSS'].iloc[:,j] 
    
    #Absolute magnitute at 10 Re   #per band: Mv = mv - 2.5*log10((distance / 10 pc)**2) - kcorr
    dfm2['absmag10re_%s'%(i)] = 5 + (dfm['GALMAG10RE_%s'%(i)] -5*np.log10((dis.value*10**6))) \
    - contains(dfm,'KCORR').iloc[:,j] - dfm.loc[:,'A_u':'A_K_UKIDSS'].iloc[:,j] 
    
    #Radius (kpc) that fits 90% of the light)
    dfm2['size90_%s'%(i)] = (np.sin(dfm['GALR90_%s'%(i)]*arcsec)*dis.value)*10**3    
   
    #Radius (kpc) where light is at 50%
    dfm2['sizeRE_%s'%(i)] = (np.sin(dfm['GALRE_%s'%(i)]*arcsec)*dis.value)*10**3    
    
    #Sersic index of the galaxy 
    dfm2['SersicIndex_%s'%(i)] = dfm['GALINDEX_%s'%(i)]  
    
    #ellipticity of the galaxy,
    dfm2['Ellipticity_%s'%(i)] = dfm['GALELLIP_%s'%(i)] 
    
    #Central surface brightness in (absmag / arcsec^2)  #No sense changing this  
    dfm2['MU@0_%s'%(i)] = dfm['GALMU0_%s'%(i)]  
    
    #Effective surface brightness at effective radius (absmag / arcsec^2) #No sense changing this
    dfm2['MU@E_%s'%(i)] = dfm['GALMUE_%s'%(i)] 
    
    #Average Effective surface brightness within effective radius (absmag / arcsec^2)
    dfm2['MUEAVG_%s'%(i)] = dfm['GALMUEAVG_%s'%(i)]  
    j += 1
dfm2 = dfm2[dfm2 > -9999]  #to set some nan before we go to colours and spectral 

    
#-------------------------------------------------------------
#Add spectral information, 52 columns added
#[:,123:175]
#Exrtract some flux info some line fluxes
equivW = pd.merge(endswith(dfm,"EW"),endswith(DS,"EW"),right_index=True, left_index=True, how='inner')   #Grab all the continua
#add the 4000 A break strength 
dfm2['D4000N'] = dfm['D4000N']  
for i in range(len(equivW.columns)):
    dfm2[equivW.columns[i]] = equivW.iloc[:,i]
#-------------------------------------------------------------
#Convert the colours and add them to the dataframe, 36 in total
# Adding this to the end instead, as we are unlikely to use them
#[:,87:123]
"""
b=np.arange(len(bands))                                  #to make an combinations series
combi = pd.Series(list(it.combinations(b,2)))   #praise to atomh33ls at stackoverflow
for i in combi:                                
    dfm2['%s-%s'%(bands[i[0]],bands[i[1]])] = \
    (dfm2['absmag_%s'%(bands[i[0]])]-dfm2['absmag_%s'%(bands[i[1]])])
"""
dfm2 = dfm2[dfm2 > -99999].dropna() #36765 rows × 175 columns, with old method
df = dfm2   # 26214 rows × 173 columns with new method. Where is this difference from?
#Unclear! Possibly in the merging of files in topcat. Which means we had been working with faulty files for a long while! 
# Saving the initial file (Alpha)
phot = dfm2.iloc[:,4:85]
spectral = dfm2.iloc[:,85:98]
spec_ds = dfm2.iloc[:,98::]
df



Unnamed: 0_level_0,RA,DEC,Z,Distance,absmag_u,absmag10re_u,size90_u,sizeRE_u,SersicIndex_u,Ellipticity_u,...,HGVA125_EW,HGF_EW,HGA_EW,G4300_EW,CA4227_EW,CN2_EW,CN1_EW,HDF_EW,HDA_EW,OII_EW
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6802,174.005997,0.72093,0.050529,216.124754,-16.498930,-16.498930,1.693470,1.217394,0.089800,0.6091,...,0.196930,1.659956,2.376555,-7.079205,0.294137,0.228589,0.115305,-0.445192,5.106353,22.621220
6830,174.280500,0.70608,0.114022,480.743251,-20.563993,-20.445307,1573.234621,151.985781,7.280300,0.3254,...,0.096942,-3.308804,-0.873673,-0.850715,-1.320217,-0.030753,-0.032678,-4.939517,-3.480545,17.085100
6837,174.302750,0.78998,0.074283,316.042533,-18.530791,-18.530265,13.819605,4.843815,1.545500,0.6322,...,0.084154,-1.563378,-3.370488,4.110785,1.398956,-0.042376,-0.055087,-2.672220,-1.597525,31.437363
6838,174.305540,0.79034,0.074549,317.154217,-17.886899,-17.877645,24.967339,6.598801,2.455100,0.8897,...,0.191652,-7.249705,-5.492196,-2.449695,0.602401,-0.045479,-0.041801,-3.263513,-1.605200,4.142478
6839,174.335370,0.83656,0.106638,450.375932,-18.298163,-18.298163,9.534485,5.777714,0.333200,0.5407,...,0.055273,-0.616201,-0.107329,-6.215616,-2.410805,-0.028316,-0.021249,-4.233356,-1.318381,13.022843
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3901701,132.727790,-1.04692,0.103069,435.655876,-18.664665,-18.618874,25.032558,4.385122,4.128400,0.0564,...,0.046349,3.429878,10.539323,-2.207531,-2.036244,0.202093,0.172480,-0.988949,2.130985,-2.327094
3901702,132.732080,-1.04799,0.197850,817.906867,-19.268692,-19.268688,15.627759,6.619197,1.044400,0.6740,...,0.002959,-0.860333,-1.444047,-3.095410,-1.426697,-0.150364,-0.155667,-3.683531,-5.019410,10.128104
3901768,132.706580,-0.97609,0.188094,779.391965,-19.220608,-19.220547,21.666105,8.399797,1.267700,0.5981,...,0.072656,-0.881421,-0.387746,-3.123395,-1.092533,-0.005144,-0.081116,-3.093034,-5.734492,10.064606
3973288,183.976170,0.08631,0.040766,174.746045,-15.955128,-15.955128,3.670465,2.476511,0.176500,0.6153,...,-0.045059,-1.738272,-4.616337,-0.944286,-2.863228,-0.327484,-0.274016,-4.335427,-6.318155,-52.405647


In [16]:
pandafy('fits/GaussFitSimple.fits')



Unnamed: 0_level_0,SPECID,RA,DEC,Z,NQ,SURVEY,SURVEY_CODE,IS_BEST,IS_SBEST,SN,...,POS_OIR,POS_OIR_ERR,SIG_OIR,SIG_OIR_ERR,OIR_FLUX,OIR_FLUX_ERR,OIR_EW,OIR_EW_ERR,OIB_NPEG,OIR_NPEG
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6802,b'G12_Y3_017_187 ',174.005997,0.72093,0.050529,4,b'GAMA ',5,True,True,10.700412,...,36.653240,7.375686,9.924352,8.385731,-10.612795,17.580296,-0.880145,1.458204,0,0
6806,b'G12_Y1_AN1_238 ',174.022790,0.70594,0.331206,4,b'GAMA ',5,True,True,8.391569,...,39.226460,0.000000,6.350993,4.300473,8.926416,9.833449,0.802825,0.884472,1,1
6808,b'G12_Y1_AN1_235 ',174.100710,0.65891,0.229329,4,b'GAMA ',5,True,True,12.075199,...,42.658298,2.583113,1.864204,2.040785,2.046775,3.783602,0.227885,0.421267,0,0
6810,b'G12_Y2_041_164 ',174.109080,0.80390,0.326380,4,b'GAMA ',5,True,True,7.795063,...,49.013462,3.900790,1.687620,0.000000,-0.058938,2.235459,-0.007522,0.285293,1,1
6813,b'G12_Y3_017_177 ',174.063830,0.64454,0.484554,4,b'GAMA ',5,True,True,-99999.000000,...,-99999.000000,-99999.000000,-99999.000000,-99999.000000,-99999.000000,-99999.000000,-99999.000000,-99999.000000,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8003087,b'G02_Y6_017_254 ',37.836750,-4.05976,0.401419,2,b'GAMA ',5,True,True,-99999.000000,...,-99999.000000,-99999.000000,-99999.000000,-99999.000000,-99999.000000,-99999.000000,-99999.000000,-99999.000000,0,0
8003092,b'G02_Y6_017_246 ',37.647420,-4.02889,0.030410,2,b'GAMA ',5,True,True,11.608665,...,37.550610,2.204303,1.764049,1.685131,-2.160801,3.383549,-0.268825,0.420959,0,0
8003098,b'G02_Y6_017_196 ',37.146250,-4.19436,0.406779,2,b'GAMA ',5,True,True,-99999.000000,...,-99999.000000,-99999.000000,-99999.000000,-99999.000000,-99999.000000,-99999.000000,-99999.000000,-99999.000000,0,0
8003102,b'G02_Y6_012_081 ',38.665420,-4.74022,0.253869,2,b'GAMA ',5,True,True,0.397337,...,36.947567,0.000000,1.687620,0.000000,6.320209,5.637381,5.519176,5.176303,2,2


In [17]:
spec_ds

Unnamed: 0_level_0,BH_NAD_EW,BH_FC_EW,BH_MH_EW,BH_MGG_EW,BH_HB_EW,BH_G_EW,BH_CAI_EW,BH_HK_EW,BH_CNB_EW,SIIR_EW_y,...,HGVA125_EW,HGF_EW,HGA_EW,G4300_EW,CA4227_EW,CN2_EW,CN1_EW,HDF_EW,HDA_EW,OII_EW
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6802,-0.008743,0.044196,-0.033514,-0.023454,0.003366,0.437772,-0.097645,-0.017441,-0.724730,4.862064,...,0.196930,1.659956,2.376555,-7.079205,0.294137,0.228589,0.115305,-0.445192,5.106353,22.621220
6830,-0.041824,0.015372,0.023589,0.031431,-0.065708,0.059294,-0.103945,0.272424,-0.003160,3.186709,...,0.096942,-3.308804,-0.873673,-0.850715,-1.320217,-0.030753,-0.032678,-4.939517,-3.480545,17.085100
6837,0.005419,0.000799,-0.007715,0.031544,-0.051029,0.057177,-0.081960,0.233290,0.105987,5.740624,...,0.084154,-1.563378,-3.370488,4.110785,1.398956,-0.042376,-0.055087,-2.672220,-1.597525,31.437363
6838,0.001788,0.093629,0.066746,-0.060903,-0.043507,0.214525,0.072564,0.359894,0.177474,0.713500,...,0.191652,-7.249705,-5.492196,-2.449695,0.602401,-0.045479,-0.041801,-3.263513,-1.605200,4.142478
6839,0.016772,-0.018329,0.095979,-0.036622,0.056803,0.457743,0.004745,0.374186,-0.060589,3.875029,...,0.055273,-0.616201,-0.107329,-6.215616,-2.410805,-0.028316,-0.021249,-4.233356,-1.318381,13.022843
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3901701,0.065068,0.096110,0.018128,0.089248,0.039265,-0.178468,0.019684,0.391961,-0.121429,0.516896,...,0.046349,3.429878,10.539323,-2.207531,-2.036244,0.202093,0.172480,-0.988949,2.130985,-2.327094
3901702,0.054728,0.038396,-0.001372,0.010328,0.069122,0.189978,-0.011098,0.284348,0.166332,3.382962,...,0.002959,-0.860333,-1.444047,-3.095410,-1.426697,-0.150364,-0.155667,-3.683531,-5.019410,10.128104
3901768,-0.057609,-0.023099,0.023584,-0.016119,0.059766,0.102978,0.037121,0.159307,0.050241,1.354023,...,0.072656,-0.881421,-0.387746,-3.123395,-1.092533,-0.005144,-0.081116,-3.093034,-5.734492,10.064606
3973288,0.021775,-0.025369,-0.033720,-0.008591,-0.001896,-0.098911,0.069140,0.180783,-0.124684,1.083954,...,-0.045059,-1.738272,-4.616337,-0.944286,-2.863228,-0.327484,-0.274016,-4.335427,-6.318155,-52.405647


In [6]:
def save_db(db,dbname):
    db.to_hdf('support/%s/ThesisDB.h5'%(dbname), 'Dataframe')  
    db.to_csv('support/%s/ThesisDB.csv'%(dbname))  
    fittify(db,filename='support/%s/ThesisDB.fits'%(dbname))

    phot.to_hdf('support/%s/Parts_DB.h5'%(dbname), 'Photometric')  
    colour.to_hdf('support/%s/Parts_DB.h5'%(dbname), 'Colour') 
    spectral.to_hdf('support/%s/Parts_DB.h5'%(dbname), 'Spectral') 
    spec_ds.to_hdf('support/%s/Parts_DB.h5'%(dbname), 'Direct_Summation')

In [42]:
save_db(df, 'eta')

In [7]:
#Making new coumns with the prichi2 scores
merged = pd.merge(UKIDSS,SDSS,right_index=True, left_index=True, how='inner')
chi2 = startswith(merged,"PRICHI2_")

minchi = 0.5    
maxchi = 2  
chi2 = chi2[(chi2 > minchi) & (chi2 < maxchi)].dropna()
df_chi = pd.merge(dfm2,chi2, right_index = True, left_index = True, how='inner').iloc[:,0:-9]

print(' at prichi2 values between %.1f and  %.1f we have %i primarychi2 values to work with,\n \
      resulting in a final database of %i rows' %(minchi,maxchi,chi2.shape[0],df_chi.shape[0]))
print('This is a difference of %i compared to not filtering for chi2'%(dfm2.shape[0] - df_chi.shape[0]))


#create partial dataframes
phot = df_chi.iloc[:,4:85]
spectral = df_chi.iloc[:,85:98] 
colour = df_chi.iloc[:,98:]
save_db(df_chi, 'zeta')
colour

 at prichi2 values between 0.5 and  2.0 we have 20626 primarychi2 values to work with,
       resulting in a final database of 10047 rows
This is a difference of 15714 compared to not filtering for chi2


Unnamed: 0_level_0,BH_NAD_EW,BH_FC_EW,BH_MH_EW,BH_MGG_EW,BH_HB_EW,BH_G_EW,BH_CAI_EW,BH_HK_EW,BH_CNB_EW,SIIR_EW_y,...,HGVA125_EW,HGF_EW,HGA_EW,G4300_EW,CA4227_EW,CN2_EW,CN1_EW,HDF_EW,HDA_EW,OII_EW
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6802,-0.008743,0.044196,-0.033514,-0.023454,0.003366,0.437772,-0.097645,-0.017441,-0.724730,4.862064,...,0.196930,1.659956,2.376555,-7.079205,0.294137,0.228589,0.115305,-0.445192,5.106353,22.621220
6837,0.005419,0.000799,-0.007715,0.031544,-0.051029,0.057177,-0.081960,0.233290,0.105987,5.740624,...,0.084154,-1.563378,-3.370488,4.110785,1.398956,-0.042376,-0.055087,-2.672220,-1.597525,31.437363
6838,0.001788,0.093629,0.066746,-0.060903,-0.043507,0.214525,0.072564,0.359894,0.177474,0.713500,...,0.191652,-7.249705,-5.492196,-2.449695,0.602401,-0.045479,-0.041801,-3.263513,-1.605200,4.142478
6840,-0.011384,0.069849,0.057954,0.035413,0.022118,0.311013,0.119893,-0.116209,-0.324520,0.959405,...,0.152244,-0.876695,3.178101,-5.746708,-3.650215,-0.107605,0.034390,1.990391,-0.833084,-1.854510
6846,0.045265,0.031438,0.003481,0.059650,0.008445,0.447334,0.092075,0.251449,0.015348,1.626143,...,-0.018575,-0.474186,0.391324,-1.000964,-1.134056,-0.029514,-0.060556,-2.460188,-3.570271,8.357266
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3901140,0.094130,0.055315,0.034424,0.024990,0.077324,0.198662,-0.046283,0.221825,0.008978,0.259098,...,0.089189,0.479922,4.411610,1.984872,-1.751637,-0.040141,-0.077261,-3.309701,-3.840308,4.008181
3901178,0.035229,0.065539,0.043015,0.069225,0.059403,0.033743,0.004945,0.234959,0.040156,2.018534,...,0.062577,1.632197,0.099033,-2.650076,-1.143423,0.053334,0.004347,-3.426721,-3.283710,3.261348
3901184,0.014905,0.289580,-0.060386,0.047520,-0.069665,0.158187,-0.001962,0.392657,-0.142219,3.312513,...,-0.029447,-4.794718,-3.694124,7.527526,-1.669704,-0.111379,-0.199986,-2.915524,6.258958,90.554977
3901441,0.032045,0.079350,0.029012,0.005863,0.034492,0.085834,0.012338,0.191551,0.094265,3.467352,...,0.037110,-1.966722,-0.405885,-0.853234,-1.119621,-0.039646,-0.063568,-2.228087,-3.227602,18.153252


In [54]:
df_chi

Unnamed: 0_level_0,RA,DEC,Z,Distance,absmag_u,absmag10re_u,size90_u,sizeRE_u,SersicIndex_u,Ellipticity_u,...,z-Y,z-J,z-H,z-K,Y-J,Y-H,Y-K,J-H,J-K,H-K
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6802,174.005997,0.72093,0.050529,216.124754,-16.498930,-16.498930,1.693470,1.217394,0.0898,0.6091,...,-0.074355,0.054119,0.080334,-0.244385,0.128474,0.154689,-0.170030,0.026215,-0.298504,-0.324719
6837,174.302750,0.78998,0.074283,316.042533,-18.530791,-18.530265,13.819605,4.843815,1.5455,0.6322,...,-0.023039,-0.056638,0.314390,-0.133409,-0.033599,0.337429,-0.110370,0.371028,-0.076772,-0.447800
6838,174.305540,0.79034,0.074549,317.154217,-17.886899,-17.877645,24.967339,6.598801,2.4551,0.8897,...,-0.446172,-0.643587,-0.201072,-0.502923,-0.197415,0.245101,-0.056751,0.442516,0.140664,-0.301851
6840,174.346880,0.69645,0.193060,799.021212,-19.391659,-19.391626,14.594531,5.791895,1.2069,0.4717,...,-0.071671,-0.042171,0.552355,0.083708,0.029500,0.624026,0.155378,0.594526,0.125879,-0.468647
6846,174.439290,0.66226,0.075374,320.606235,-18.610924,-18.610924,17.420147,7.471441,1.0143,0.4178,...,0.045004,-0.274082,0.143344,-0.317933,-0.319086,0.098340,-0.362937,0.417426,-0.043850,-0.461277
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3901178,132.413170,-1.03312,0.217588,895.247581,-19.882443,-19.881783,38.374570,13.263538,1.5860,0.4871,...,0.452042,0.242949,0.943910,0.350133,-0.209093,0.491868,-0.101910,0.700961,0.107184,-0.593778
3901184,132.414960,-0.90883,0.177885,738.882025,-18.909851,-18.909851,11.606056,8.416164,0.0770,0.7276,...,0.314233,0.555982,0.806603,0.763355,0.241749,0.492370,0.449122,0.250621,0.207373,-0.043248
3901441,132.476290,-0.92563,0.056787,242.553587,-17.400572,-17.400572,6.448936,4.732271,0.0578,0.7700,...,0.284168,0.440234,0.450702,0.378112,0.156066,0.166534,0.093944,0.010467,-0.062122,-0.072590
3901679,132.693960,-0.91999,0.233353,956.449874,-20.154253,-19.934767,467.687257,18.560401,13.6021,0.9761,...,0.286492,0.204674,0.686936,0.317907,-0.081818,0.400444,0.031415,0.482262,0.113233,-0.369029


In [None]:
save_db()

In [None]:
#save partial forms
phot.to_hdf('support/beta/Parts_DB.h5', 'Photometric')  #132,233 KB
colour.to_hdf('support/beta/Parts_DB.h5', 'Colour')  #132,233 KB
spectral.to_hdf('support/beta/Parts_DB.h5', 'Spectral')  #132,233 KB

#Save the whole form:
df_chi.to_hdf('support/beta/ThesisDB.h5', 'Dataframe')
df_chi.to_csv('support/beta/ThesisDB.csv')  
fittify(df_chi,filename='support/beta/ThesisDB.fits')

In [40]:
dfm2.iloc[:,85:98] 

Unnamed: 0_level_0,D4000N,OIIR_EW,OIIB_EW,HB_EW,OIIIB_EW,OIIIR_EW,HA_EW,NIIB_EW,NIIR_EW,SIIB_EW,SIIR_EW,OIB_EW,OIR_EW
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
6802,1.811305,21.553307,7.789166,5.203340,2.655742,10.126300,27.802761,0.687129,5.807402,6.208872,5.112667,-1.989135,-0.880145
6816,6.795266,-34.111183,-11.811395,3.436451,7.093371,9.310982,17.182138,-1.516095,2.877985,6.346678,0.422289,1.549534,-0.758027
6821,0.483395,77.230042,27.027168,218.790573,426.813477,1269.069824,841.512207,4.165278,10.659965,23.259602,17.565214,4.357500,1.531288
6830,1.252495,16.846029,5.898776,3.278779,5.436171,-3.971652,14.210450,1.164525,4.816380,3.464169,2.662923,1.271554,-2.997967
6837,1.385110,34.734921,12.162887,6.860115,1.697633,5.026550,38.021099,3.087252,10.577848,8.620884,5.838690,1.671351,0.462878
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3901702,1.441157,5.877393,2.058462,0.980625,-0.061665,1.198282,19.152622,2.778607,8.338077,4.027606,3.370611,1.024944,-0.441419
3901768,1.210178,9.299099,3.255895,3.274175,1.561168,2.894626,24.023663,1.761152,5.314301,4.649630,2.519361,1.081602,0.360750
3973288,1.871647,-16.881485,-5.466939,2.358678,1.760166,5.767296,6.166339,-0.499680,0.451688,1.079470,1.227048,0.932151,0.407310
4021944,1.373193,2.294754,0.805568,-0.766657,-1.944655,3.100679,0.818355,0.352321,1.223661,1.704313,1.474381,1.425928,-1.308098


## Comparison file to compare differences betweeen StellarMasses and our calculations

In [24]:
#comparison!
# Function is now completely based on functions like pandafy, contains, merging etc
# No longer any topcat dependencies
# Recepi: SersicCatSDSS + UKIDSS + GFsimple + galex + Kcor, all on inner merges
# magnitude is self appointed
#setup initial dataframe
# Sersic Photometry
SDSS = pandafy('fits/SersicCatSDSS.fits')
UKIDSS = pandafy('fits/SersicCatUKIDSS.fits')
dfm = pd.merge(SDSS,UKIDSS,right_index=True, left_index=True, how='inner') #116374 entries

# Spectral lines
simple = pandafy('fits/GaussFitSimple.fits')
simple = simple[simple['IS_BEST'] == True]  #select only the best fits from all
simple = simple[simple['SURVEY_CODE'] == 5 | 1]   #select only GAMA (5) or SDSS (1) fits
simple = simple[simple['SN'] > 3]   # cut of point for goodness of fit
dfm = pd.merge(simple,dfm,right_index=True, left_index=True, how='inner')

# galactic extinction
extinc = pandafy('fits/GalacticExtinction.fits').loc[:,'A_u':'A_K_UKIDSS']  #select only values we need
dfm = pd.merge(dfm,extinc,right_index=True, left_index=True, how='inner')

# kcorrection
kcor = pandafy('fits/kcorr_auto_z00.fits').iloc[:,4:13]   #select only the values we need
dfm = pd.merge(dfm,kcor,right_index=True, left_index=True, how='inner')

# Set up constants:
bands = "ugrizYJHK"  #All the bands we will iterate over
arcsec = (2*np.pi)/(360*3600)  #one arcsec in radians

#initiate a new dataframe  (87123 entries)
dfm2 = dfm.loc[:,'RA':'Z']
dis = cosmo.comoving_distance(dfm['Z'])
dfm2['Distance'] = dis # units of mega parsec
dfm2['Petrosian'] = dfm['R_PETRO_x']
    
StellarMasses = pandafy("fits/StellarMasses.fits")
absmag = startswith(StellarMasses,"absmag").iloc[:,::2]
absmag.pop('absmag_X')
dfm = pd.merge(dfm,absmag,right_index=True, left_index=True, how='inner')


j = 0
for i in bands: 

    dfm2['absmag_%s'%(i)] = 5 + (dfm['GALMAG_%s'%(i)] -5*np.log10((dis.value*10**6))) \
    - contains(dfm,'KCORR').iloc[:,j] - dfm.loc[:,'A_u':'A_K_UKIDSS'].iloc[:,j] 

    #Absolute magnitute at 10 Re   #per band: Mv = mv - 2.5*log10((distance / 10 pc)**2) - kcorr
    dfm2['absmag10re_%s'%(i)] = 5 + (dfm['GALMAG10RE_%s'%(i)] -5*np.log10((dis.value*10**6))) \
    - contains(dfm,'KCORR').iloc[:,j] - dfm.loc[:,'A_u':'A_K_UKIDSS'].iloc[:,j] 

    # absolute magnitude from StellarMasses for comparison
    dfm2['absmagSM_%s'%(i)] = dfm.loc[:,'absmag_u':'absmag_K'].iloc[:,j]

dfm2 = dfm2[dfm2 > -99].dropna() #36765 rows × 175 columns, with old method
fittify(dfm2,filename="fits/absmag_comparison.fits")  #save it to fits
dfm2



Unnamed: 0_level_0,RA,DEC,Z,Distance,Petrosian,absmag_u,absmag10re_u,absmagSM_u,absmag_g,absmag10re_g,...,absmagSM_Y,absmag_J,absmag10re_J,absmagSM_J,absmag_H,absmag10re_H,absmagSM_H,absmag_K,absmag10re_K,absmagSM_K
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6802,174.005997,0.72093,0.050529,216.124754,18.622816,-16.498930,-16.498930,-16.782124,-17.607029,-17.607029,...,-16.782124,-18.793329,-18.793329,-16.782124,-18.801930,-18.801930,-16.782124,-18.606329,-18.606329,-16.782124
6806,174.022790,0.70594,0.331206,1325.118735,18.544878,-20.782284,-20.778824,-21.258411,-21.484083,-21.484083,...,-21.258411,-23.503384,-23.502935,-21.258411,-23.926083,-23.922815,-21.258411,-24.187384,-24.183153,-21.258411
6816,174.018960,0.66580,0.076399,324.889099,19.791199,-16.686041,-16.579640,-16.342297,-17.424839,-17.424622,...,-16.342297,-18.641440,-18.641440,-16.342297,-18.236540,-18.236540,-16.342297,-20.164841,-19.893921,-16.342297
6821,174.153120,0.81543,0.003795,16.397478,14.659349,-13.908197,-13.702327,-14.801155,-15.839296,-15.555927,...,-14.801155,-14.388196,-14.166390,-14.801155,-12.590797,-12.308145,-14.801155,-13.379897,-13.096559,-14.801155
6830,174.280500,0.70608,0.114022,480.743251,18.984713,-20.563993,-20.445307,-18.461985,-19.240993,-19.240993,...,-18.461985,-20.183092,-20.182190,-18.461985,-20.477793,-20.477186,-18.461985,-20.057892,-20.057878,-18.461985
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3901702,132.732080,-1.04799,0.197850,817.906867,18.584669,-19.268692,-19.268688,-19.923784,-20.659691,-20.658377,...,-19.923784,-21.300590,-21.260408,-19.923784,-22.575392,-22.539322,-19.923784,-22.752691,-22.694746,-19.923784
3901768,132.706580,-0.97609,0.188094,779.391965,18.920301,-19.220608,-19.220547,-19.567530,-20.150908,-20.150908,...,-19.567530,-21.500908,-21.500908,-19.567530,-21.833508,-21.833508,-19.567530,-21.883208,-21.883208,-19.567530
3973288,183.976170,0.08631,0.040766,174.746045,18.850441,-15.955128,-15.955128,-16.169090,-17.035029,-16.752305,...,-16.169090,-18.681729,-18.403893,-16.169090,-19.132128,-18.849394,-16.169090,-18.576430,-18.310954,-16.169090
4021944,215.298500,0.99253,0.232831,954.432629,19.180992,-19.822837,-19.546590,-19.973951,-20.531837,-20.412057,...,-19.973951,-20.873636,-20.837566,-19.973951,-20.876537,-20.786132,-19.973951,-22.157037,-22.052667,-19.973951


In [25]:
dfm2.describe()

Unnamed: 0,RA,DEC,Z,Distance,Petrosian,absmag_u,absmag10re_u,absmagSM_u,absmag_g,absmag10re_g,...,absmagSM_Y,absmag_J,absmag10re_J,absmagSM_J,absmag_H,absmag10re_H,absmagSM_H,absmag_K,absmag10re_K,absmagSM_K
count,29246.0,29246.0,29246.0,29246.0,29246.0,29246.0,29246.0,29246.0,29246.0,29246.0,...,29246.0,29246.0,29246.0,29246.0,29246.0,29246.0,29246.0,29246.0,29246.0,29246.0
mean,184.541856,0.363837,0.165456,683.846446,18.585306,-19.068399,-19.023094,-19.23036,-20.051816,-20.037322,...,-19.23036,-21.611374,-21.596052,-19.23036,-21.875537,-21.858514,-19.23036,-21.892507,-21.873482,-19.23036
std,35.450955,1.324094,0.073677,292.013447,0.634421,1.233027,1.218082,1.139965,1.051772,1.043843,...,1.139965,1.461004,1.454158,1.139965,1.416295,1.408905,1.139965,1.508015,1.500699,1.139965
min,129.00008,-1.99951,0.003795,16.397478,13.644097,-30.598766,-30.598647,-26.0592,-28.96533,-28.720906,...,-26.0592,-32.596253,-32.58389,-26.0592,-31.256765,-31.25674,-26.0592,-30.510034,-30.510034,-26.0592
25%,138.958535,-0.70475,0.113582,478.936841,18.199988,-19.833515,-19.787243,-20.007983,-20.720972,-20.706515,...,-20.007983,-22.572734,-22.556534,-20.007983,-22.860607,-22.841143,-20.007983,-22.958899,-22.9349,-20.007983
50%,184.820545,0.34906,0.154687,646.054999,18.62168,-19.110139,-19.073044,-19.309752,-20.154091,-20.143848,...,-19.309752,-21.769407,-21.757477,-19.309752,-22.038475,-22.024968,-19.309752,-22.07022,-22.052768,-19.309752
75%,216.975645,1.35379,0.20879,860.872036,18.970613,-18.351115,-18.317029,-18.559257,-19.503468,-19.495547,...,-18.559257,-20.818814,-20.8071,-18.559257,-21.056537,-21.04761,-18.559257,-21.012706,-21.000161,-18.559257
max,223.49883,2.99992,0.386161,1523.708549,24.178965,-10.581596,-10.366893,-10.33178,-11.002395,-10.765853,...,-10.33178,58.872402,58.882381,-10.33178,-11.619396,-11.568588,-10.33178,-11.101909,-11.098545,-10.33178


# Delta 2
## Trimmed down database. Try to figure out what is really important

In [3]:
#setup initial dataframe
# Sersic Photometry
SDSS = pandafy('fits/SersicCatSDSS.fits')
UKIDSS = pandafy('fits/SersicCatUKIDSS.fits')
dfm = pd.merge(SDSS,UKIDSS,right_index=True, left_index=True, how='inner') #116374 entries

# Spectral lines
simple = pandafy('fits/GaussFitSimple.fits')
simple = simple[simple['IS_BEST'] == True]  #select only the best fits from all
simple = simple[simple['SURVEY_CODE'] == 5 | 1]   #select only GAMA (5) or SDSS (1) fits
simple = simple[simple['SN'] > 3]   # cut of point for goodness of fit
dfm = pd.merge(simple,dfm,right_index=True, left_index=True, how='inner')

# Spectral lines
DS = pandafy('fits/DirectSummation.fits')
DS = DS[DS['IS_BEST'] == True]  #select only the best fits from all
DS = DS[DS['SURVEY_CODE'] == 5 | 1]   #select only GAMA (5) or SDSS (1) fits
DS = DS[DS['SN'] > 3]   # cut of point for goodness of fit
#dfm = pd.merge(DS,dfm,right_index=True, left_index=True, how='inner')


# galactic extinction
extinc = pandafy('fits/GalacticExtinction.fits').loc[:,'A_u':'A_K_UKIDSS']  #select only values we need
dfm = pd.merge(dfm,extinc,right_index=True, left_index=True, how='inner')

# kcorrection
kcor = pandafy('fits/kcorr_auto_z00.fits').iloc[:,4:13]   #select only the values we need
dfm = pd.merge(dfm,kcor,right_index=True, left_index=True, how='inner')


# Set up constants:
bands = "ugrizYJHK"  #All the bands we will iterate over
arcsec = (2*np.pi)/(360*3600)  #one arcsec in radians

#initiate a new dataframe  (87123 entries)
dfm2 = dfm.iloc[:,1:4]
dis = cosmo.comoving_distance(dfm.iloc[:,3])
dfm2['Distance'] = dis # units of mega parsec
#dfm2['Petrosian'] = dfm['R_PETRO_x']
dfm2


j = 0
for i in bands: 
    #Absolute magnitude, based on distance, kcorrection and galactic foreground extinction
    dfm2['absmag_%s'%(i)] = 5 + (dfm['GALMAG_%s'%(i)] -5*np.log10((dis.value*10**6))) \
    - contains(dfm,'KCORR').iloc[:,j] - dfm.loc[:,'A_u':'A_K_UKIDSS'].iloc[:,j] 
     
    #Radius (kpc) that fits 90% of the light)
    dfm2['size90_%s'%(i)] = (np.sin(dfm['GALR90_%s'%(i)]*arcsec)*dis.value)*10**3    
   
    #Radius (kpc) where light is at 50%
    dfm2['sizeRE_%s'%(i)] = (np.sin(dfm['GALRE_%s'%(i)]*arcsec)*dis.value)*10**3    
    
    #Sersic index of the galaxy 
    dfm2['SersicIndex_%s'%(i)] = dfm['GALINDEX_%s'%(i)]  
    
    #ellipticity of the galaxy,
    dfm2['Ellipticity_%s'%(i)] = dfm['GALELLIP_%s'%(i)] 
    
    #Central surface brightness in (absmag / arcsec^2)  #No sense changing this  
    dfm2['MU@0_%s'%(i)] = dfm['GALMU0_%s'%(i)]  
    
    #Effective surface brightness at effective radius (absmag / arcsec^2) #No sense changing this
    dfm2['MU@E_%s'%(i)] = dfm['GALMUE_%s'%(i)] 
    
    j += 1
dfm2 = dfm2[dfm2 > -9999]  #to set some nan before we go to colours and spectral 

    
#-------------------------------------------------------------
#Add spectral information, 52 columns added
#[:,123:175]
#Exrtract some flux info some line fluxes
equivW = pd.merge(endswith(dfm,"EW"),endswith(DS,"EW"),right_index=True, left_index=True, how='inner')   #Grab all the continua
#add the 4000 A break strength 
dfm2['D4000N'] = dfm['D4000N']  
for i in range(len(equivW.columns)):
    dfm2[equivW.columns[i]] = equivW.iloc[:,i]
#-------------------------------------------------------------
#Convert the colours and add them to the dataframe, 36 in total
# Adding this to the end instead, as we are unlikely to use them
#[:,87:123]

b=np.arange(len(bands))                                  #to make an combinations series
combi = pd.Series(list(it.combinations(b,2)))   #praise to atomh33ls at stackoverflow
for i in combi:                                
    dfm2['%s-%s'%(bands[i[0]],bands[i[1]])] = \
    (dfm2['absmag_%s'%(bands[i[0]])]-dfm2['absmag_%s'%(bands[i[1]])])

dfm2 = dfm2[dfm2 > -99999].dropna() #36765 rows × 175 columns, with old method
df = dfm2   # 26214 rows × 173 columns with new method. Where is this difference from?
#Unclear! Possibly in the merging of files in topcat. Which means we had been working with faulty files for a long while! 
# Saving the initial file (Alpha)




In [44]:
phot_feat = 7

phot = dfm2.iloc[:,4:9*phot_feat+4]  #9*phot feature number
spectral = dfm2.iloc[:,9*phot_feat+4:9*phot_feat+4+13] #13
spec_ds = dfm2.iloc[:,9*phot_feat+4+13:9*phot_feat+4+13+51] #51
colour = dfm2.iloc[:,9*phot_feat+4+13+51:]  #36
spec_ds

Unnamed: 0_level_0,BH_NAD_EW,BH_FC_EW,BH_MH_EW,BH_MGG_EW,BH_HB_EW,BH_G_EW,BH_CAI_EW,BH_HK_EW,BH_CNB_EW,SIIR_EW_y,...,HGVA125_EW,HGF_EW,HGA_EW,G4300_EW,CA4227_EW,CN2_EW,CN1_EW,HDF_EW,HDA_EW,OII_EW
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6802,-0.008743,0.044196,-0.033514,-0.023454,0.003366,0.437772,-0.097645,-0.017441,-0.724730,4.862064,...,0.196930,1.659956,2.376555,-7.079205,0.294137,0.228589,0.115305,-0.445192,5.106353,22.621220
6830,-0.041824,0.015372,0.023589,0.031431,-0.065708,0.059294,-0.103945,0.272424,-0.003160,3.186709,...,0.096942,-3.308804,-0.873673,-0.850715,-1.320217,-0.030753,-0.032678,-4.939517,-3.480545,17.085100
6837,0.005419,0.000799,-0.007715,0.031544,-0.051029,0.057177,-0.081960,0.233290,0.105987,5.740624,...,0.084154,-1.563378,-3.370488,4.110785,1.398956,-0.042376,-0.055087,-2.672220,-1.597525,31.437363
6838,0.001788,0.093629,0.066746,-0.060903,-0.043507,0.214525,0.072564,0.359894,0.177474,0.713500,...,0.191652,-7.249705,-5.492196,-2.449695,0.602401,-0.045479,-0.041801,-3.263513,-1.605200,4.142478
6839,0.016772,-0.018329,0.095979,-0.036622,0.056803,0.457743,0.004745,0.374186,-0.060589,3.875029,...,0.055273,-0.616201,-0.107329,-6.215616,-2.410805,-0.028316,-0.021249,-4.233356,-1.318381,13.022843
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3901701,0.065068,0.096110,0.018128,0.089248,0.039265,-0.178468,0.019684,0.391961,-0.121429,0.516896,...,0.046349,3.429878,10.539323,-2.207531,-2.036244,0.202093,0.172480,-0.988949,2.130985,-2.327094
3901702,0.054728,0.038396,-0.001372,0.010328,0.069122,0.189978,-0.011098,0.284348,0.166332,3.382962,...,0.002959,-0.860333,-1.444047,-3.095410,-1.426697,-0.150364,-0.155667,-3.683531,-5.019410,10.128104
3901768,-0.057609,-0.023099,0.023584,-0.016119,0.059766,0.102978,0.037121,0.159307,0.050241,1.354023,...,0.072656,-0.881421,-0.387746,-3.123395,-1.092533,-0.005144,-0.081116,-3.093034,-5.734492,10.064606
3973288,0.021775,-0.025369,-0.033720,-0.008591,-0.001896,-0.098911,0.069140,0.180783,-0.124684,1.083954,...,-0.045059,-1.738272,-4.616337,-0.944286,-2.863228,-0.327484,-0.274016,-4.335427,-6.318155,-52.405647


In [45]:
save_db(dfm2,'delta2')

In [7]:

def save_db(db,dbname):
    db.to_hdf('support/%s/ThesisDB.h5'%(dbname), 'Dataframe')  
    db.to_csv('support/%s/ThesisDB.csv'%(dbname))  
    fittify(db,filename='support/%s/ThesisDB.fits'%(dbname))

    phot.to_hdf('support/%s/Parts_DB.h5'%(dbname), 'Photometric')  
    colour.to_hdf('support/%s/Parts_DB.h5'%(dbname), 'Colour') 
    spectral.to_hdf('support/%s/Parts_DB.h5'%(dbname), 'Spectral')
    spec_ds.to_hdf('support/%s/Parts_DB.h5'%(dbname), 'Direct_Summation')

Unnamed: 0_level_0,OIIR_EW,OIIB_EW,HB_EW_x,OIIIB_EW_x,OIIIR_EW_x,HA_EW_x,NIIB_EW_x,NIIR_EW_x,SIIB_EW_x,SIIR_EW_x,...,HGVA125_EW,HGF_EW,HGA_EW,G4300_EW,CA4227_EW,CN2_EW,CN1_EW,HDF_EW,HDA_EW,OII_EW
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6802,21.553307,7.789166,5.203340,2.655742,10.126300,27.802761,0.687129,5.807402e+00,6.208872,5.112667,...,0.196930,1.659956,2.376555,-7.079205,0.294137,0.228589,0.115305,-0.445192,5.106353,22.621220
6806,7.365941,2.578125,1.909329,0.494267,0.753895,21.207283,3.821027,7.517714e+00,-99999.000000,-99999.000000,...,0.099774,-1.991010,-3.752701,0.457309,-0.766861,-0.037088,-0.092805,-4.209662,-5.741475,7.843846
6808,2.161983,0.756938,-0.434862,1.035163,-1.017726,0.110285,0.000012,8.330440e-07,0.571462,0.092501,...,-0.079656,0.886773,1.500855,-2.506638,-1.119810,-0.026407,-0.036579,-1.666878,-0.830661,3.648445
6810,6.066661,2.122922,0.205869,0.785484,1.310187,10.588799,2.938406,9.549119e+00,-99999.000000,-99999.000000,...,0.054843,0.799809,4.086884,-2.943739,-0.077908,0.113761,0.017161,-3.087701,0.414285,7.352146
6816,-34.111183,-11.811395,3.436451,7.093371,9.310982,17.182138,-1.516095,2.877985e+00,6.346678,0.422289,...,0.233495,62.945965,166.231918,-8.896704,-0.844083,0.315648,0.739713,1.438660,28.454725,-57.799805
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4308320,11.459296,4.007777,2.837507,-0.345922,0.573980,15.157228,3.671342,7.358073e+00,3.923401,3.260149,...,0.007765,-2.007984,-0.636263,-0.063137,-1.887590,0.075467,-0.005248,-0.018997,-0.287735,8.711409
4319812,31.332191,23.520430,0.507694,4.493574,1.272802,-99999.000000,-99999.000000,-9.999900e+04,-99999.000000,-99999.000000,...,0.366260,-5.503741,-7.335265,5.553703,1.725188,0.302364,0.242033,-4.300540,4.825197,33.909561
4321280,16.611036,56.898228,2.896986,2.263741,2.638978,8.887829,0.526345,7.861805e+00,4.743418,-0.054453,...,-0.620583,14.274184,7.964872,1399.420044,2.865519,-0.852766,-0.788883,13.462683,52.568886,-129.242676
4321789,-12.789153,-4.361274,2.650465,-1.000902,4.989584,13.593804,2.774072,4.359515e+00,-0.474219,-1.572092,...,-99999.000000,-47.072559,-47.866295,18.413559,-16.010683,-4.594132,-1.249743,-33.287228,-25.643772,-7.239183


# Old Delta and Epsilon

In [28]:
# Saving the initial files (delta)
phot = df.iloc[:,5:50]
spectral = df.iloc[:,50:63]
colour = df.iloc[:,63:]


#Full database
df.to_hdf('support/delta/ThesisDB.h5', 'Dataframe')  
df.to_csv('support/delta/ThesisDB.csv')  
fittify(df,filename='support/delta/ThesisDB.fits')

phot.to_hdf('support/delta/Parts_DB.h5', 'Photometric')  
colour.to_hdf('support/delta/Parts_DB.h5', 'Colour') 
spectral.to_hdf('support/delta/Parts_DB.h5', 'Spectral') 

In [29]:
spectral

Unnamed: 0_level_0,D4000N,OIIR_EW,OIIB_EW,HB_EW,OIIIB_EW,OIIIR_EW,HA_EW,NIIB_EW,NIIR_EW,SIIB_EW,SIIR_EW,OIB_EW,OIR_EW
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
6802,1.811305,21.553307,7.789166,5.203340,2.655742,10.126300,27.802761,0.687129,5.807402,6.208872,5.112667,-1.989135,-0.880145
6816,6.795266,-34.111183,-11.811395,3.436451,7.093371,9.310982,17.182138,-1.516095,2.877985,6.346678,0.422289,1.549534,-0.758027
6821,0.483395,77.230042,27.027168,218.790573,426.813477,1269.069824,841.512207,4.165278,10.659965,23.259602,17.565214,4.357500,1.531288
6830,1.252495,16.846029,5.898776,3.278779,5.436171,-3.971652,14.210450,1.164525,4.816380,3.464169,2.662923,1.271554,-2.997967
6837,1.385110,34.734921,12.162887,6.860115,1.697633,5.026550,38.021099,3.087252,10.577848,8.620884,5.838690,1.671351,0.462878
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3901702,1.441157,5.877393,2.058462,0.980625,-0.061665,1.198282,19.152622,2.778607,8.338077,4.027606,3.370611,1.024944,-0.441419
3901768,1.210178,9.299099,3.255895,3.274175,1.561168,2.894626,24.023663,1.761152,5.314301,4.649630,2.519361,1.081602,0.360750
3973288,1.871647,-16.881485,-5.466939,2.358678,1.760166,5.767296,6.166339,-0.499680,0.451688,1.079470,1.227048,0.932151,0.407310
4021944,1.373193,2.294754,0.805568,-0.766657,-1.944655,3.100679,0.818355,0.352321,1.223661,1.704313,1.474381,1.425928,-1.308098


# Epsilon
## One final dataset, filtered for 0.5 < prichi < 2, based on the new creation method

In [30]:
#Making new coumns with the prichi2 scores
merged = pd.merge(UKIDSS,SDSS,right_index=True, left_index=True, how='inner')
chi2 = startswith(merged,"PRICHI2_")

minchi = 0.5    
maxchi = 2  
chi2 = chi2[(chi2 > minchi) & (chi2 < maxchi)].dropna()
df_chi = pd.merge(df,chi2, right_index = True, left_index = True, how='inner').iloc[:,0:-9]

print(' at prichi2 values between %.1f and  %.1f we have %i primarychi2 values to work with,\n \
      resulting in a final database of %i rows' %(minchi,maxchi,chi2.shape[0],df_chi.shape[0]))
print('This is a difference of %i compared to not filtering for chi2'%(df.shape[0] - df_chi.shape[0]))

 at prichi2 values between 0.5 and  2.0 we have 20626 primarychi2 values to work with,
       resulting in a final database of 10941 rows
This is a difference of 17187 compared to not filtering for chi2


In [31]:
# Saving the initial files (epsilon)
df = df_chi
phot = df.iloc[:,5:50]
spectral = df.iloc[:,50:63]
colour = df.iloc[:,63:]

#Full database
df.to_hdf('support/epsilon/ThesisDB.h5', 'Dataframe')  
df.to_csv('support/epsilon/ThesisDB.csv')  
fittify(df,filename='support/epsilon/ThesisDB.fits')

phot.to_hdf('support/epsilon/Parts_DB.h5', 'Photometric')  
colour.to_hdf('support/epsilon/Parts_DB.h5', 'Colour') 
spectral.to_hdf('support/epsilon/Parts_DB.h5', 'Spectral') 

In [32]:
spectral

Unnamed: 0_level_0,D4000N,OIIR_EW,OIIB_EW,HB_EW,OIIIB_EW,OIIIR_EW,HA_EW,NIIB_EW,NIIR_EW,SIIB_EW,SIIR_EW,OIB_EW,OIR_EW
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
6802,1.811305,21.553307,7.789166,5.203340,2.655742,10.126300,27.802761,0.687129,5.807402,6.208872,5.112667,-1.989135,-0.880145
6837,1.385110,34.734921,12.162887,6.860115,1.697633,5.026550,38.021099,3.087252,10.577848,8.620884,5.838690,1.671351,0.462878
6838,1.349538,10.899780,3.815868,2.464753,1.263155,5.960536,13.821626,0.055579,3.648290,3.776059,2.119236,1.636801,0.542869
6840,1.672063,6.409152,2.362531,0.115800,-0.259979,0.501845,1.573605,5.874560,7.832719,0.974257,0.945084,0.188303,0.072725
6846,1.404212,9.112012,3.187400,1.720885,0.000390,0.000234,11.966150,2.106978,6.216982,2.646373,1.568011,0.460498,-0.019597
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3901178,1.482973,3.496359,1.224071,0.941938,2.954195,-0.913251,7.438535,1.510611,4.464102,1.133396,1.672038,0.907764,0.303785
3901184,1.122860,76.967346,27.053129,9.862450,3.676971,12.371185,39.961021,1.548970,11.018708,6.118247,3.645648,2.623581,0.868077
3901441,1.259529,14.125132,4.942568,2.773408,0.739347,3.117004,18.666033,1.843453,6.023756,4.886065,3.294517,0.722539,-0.125520
3901679,1.316518,7.426164,2.600290,0.224837,0.115570,0.483832,6.959416,1.252558,4.049732,1.862679,2.077888,1.376938,0.414553


In [257]:
contains(dfm,'GALRE_')[contains(dfm,'GALRE_') != -3389.660889].dropna()

Unnamed: 0_level_0,GALRE_u,GALRE_g,GALRE_r,GALRE_i,GALRE_z,GALRE_Y,GALRE_J,GALRE_H,GALRE_K
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
6802,1.161855,1.039883,1.107750,1.144532,1.321591,1.043476,1.161651,1.018661,1.057477
6806,2.829396,1.743070,1.690017,1.583740,1.217891,1.338067,1.090699,1.229011,1.174838
6816,3.047441,0.953743,0.944047,1.782699,0.833533,1.497736,0.401952,1.044730,1.173889
6821,2.449953,16.283424,7.375216,6.154240,4.491275,1.802226,11.620242,0.325304,4.003115
6830,65.210106,2.128886,1.943114,2.055560,1.909553,3.729814,1.770122,1.738155,1.467361
...,...,...,...,...,...,...,...,...,...
3901702,1.669270,1.642624,1.527195,1.479667,1.264334,0.422699,0.468532,0.809566,0.745969
3901768,2.222992,1.968878,1.860940,1.864093,1.968200,1.467972,1.587062,1.640489,1.424139
3973288,2.923197,1.901349,3.083849,0.719765,0.432666,0.324220,0.369341,0.367442,0.455074
4021944,0.105260,0.005695,0.017865,0.003424,0.002687,0.206654,0.016204,0.010373,1.278030


# Final and Final_chi2.
Removed u and Y band  
Removed absmag10RE and MUAVG  
New database:  57594 rows   
Prichi2: 10047 rows (unchanged)  

In [5]:
#setup initial dataframe
# Sersic Photometry
SDSS = pandafy('fits/SersicCatSDSS.fits')
UKIDSS = pandafy('fits/SersicCatUKIDSS.fits')
dfm = pd.merge(SDSS,UKIDSS,right_index=True, left_index=True, how='inner') #116374 entries

# Spectral lines
simple = pandafy('fits/GaussFitSimple.fits')
simple = simple[simple['IS_BEST'] == True]  #select only the best fits from all
simple = simple[simple['SURVEY_CODE'] == 5 | 1]   #select only GAMA (5) or SDSS (1) fits
simple = simple[simple['SN'] > 3]   # cut of point for goodness of fit
dfm = pd.merge(simple,dfm,right_index=True, left_index=True, how='inner')

# Spectral lines
DS = pandafy('fits/DirectSummation.fits')
DS = DS[DS['IS_BEST'] == True]  #select only the best fits from all
DS = DS[DS['SURVEY_CODE'] == 5 | 1]   #select only GAMA (5) or SDSS (1) fits
DS = DS[DS['SN'] > 3]   # cut of point for goodness of fit
#dfm = pd.merge(DS,dfm,right_index=True, left_index=True, how='inner')


# galactic extinction
extinc = pandafy('fits/GalacticExtinction.fits').loc[:,'A_u':'A_K_UKIDSS']  #select only values we need
dfm = pd.merge(dfm,extinc,right_index=True, left_index=True, how='inner')

# kcorrection
kcor = pandafy('fits/kcorr_auto_z00.fits').iloc[:,4:13]   #select only the values we need
dfm = pd.merge(dfm,kcor,right_index=True, left_index=True, how='inner')


# Set up constants:
bands = "grizJHK"  #All the bands we will iterate over
arcsec = (2*np.pi)/(360*3600)  #one arcsec in radians

#initiate a new dataframe  (87123 entries)
dfm2 = dfm.iloc[:,1:4]
dis = cosmo.comoving_distance(dfm.iloc[:,3])
dfm2['Distance'] = dis # units of mega parsec
#dfm2['Petrosian'] = dfm['R_PETRO_x']
dfm2


j = 0
for i in bands: 
    #Absolute magnitude, based on distance, kcorrection and galactic foreground extinction
    dfm2['absmag_%s'%(i)] = 5 + (dfm['GALMAG_%s'%(i)] -5*np.log10((dis.value*10**6))) \
    - contains(dfm,'KCORR').iloc[:,j] - dfm.loc[:,'A_u':'A_K_UKIDSS'].iloc[:,j] 
     
    #Radius (kpc) that fits 90% of the light)
    dfm2['size90_%s'%(i)] = (np.sin(dfm['GALR90_%s'%(i)]*arcsec)*dis.value)*10**3    
   
    #Radius (kpc) where light is at 50%
    dfm2['sizeRE_%s'%(i)] = (np.sin(dfm['GALRE_%s'%(i)]*arcsec)*dis.value)*10**3    
    
    #Sersic index of the galaxy 
    dfm2['SersicIndex_%s'%(i)] = dfm['GALINDEX_%s'%(i)]  
    
    #ellipticity of the galaxy,
    dfm2['Ellipticity_%s'%(i)] = dfm['GALELLIP_%s'%(i)] 
    
    #Central surface brightness in (absmag / arcsec^2)  #No sense changing this  
    dfm2['MU@0_%s'%(i)] = dfm['GALMU0_%s'%(i)]  
    
    #Effective surface brightness at effective radius (absmag / arcsec^2) #No sense changing this
    dfm2['MU@E_%s'%(i)] = dfm['GALMUE_%s'%(i)] 
    
    j += 1
dfm2 = dfm2[dfm2 > -9999]  #to set some nan before we go to colours and spectral 

    
#-------------------------------------------------------------
#Add spectral information, 52 columns added
#[:,123:175]
#Exrtract some flux info some line fluxes
equivW = pd.merge(endswith(dfm,"EW"),endswith(DS,"EW"),right_index=True, left_index=True, how='inner')   #Grab all the continua
#add the 4000 A break strength 
dfm2['D4000N'] = dfm['D4000N']  
for i in range(len(equivW.columns)):
    dfm2[equivW.columns[i]] = equivW.iloc[:,i]
#-------------------------------------------------------------
#Convert the colours and add them to the dataframe, 36 in total
# Adding this to the end instead, as we are unlikely to use them
#[:,87:123]

b=np.arange(len(bands))                                  #to make an combinations series
combi = pd.Series(list(it.combinations(b,2)))   #praise to atomh33ls at stackoverflow
for i in combi:                                
    dfm2['%s-%s'%(bands[i[0]],bands[i[1]])] = \
    (dfm2['absmag_%s'%(bands[i[0]])]-dfm2['absmag_%s'%(bands[i[1]])])

dfm2 = dfm2[dfm2 > -99999].dropna() #36765 rows × 175 columns, with old method
df = dfm2   # 26214 rows × 173 columns with new method. Where is this difference from?
#Unclear! Possibly in the merging of files in topcat. Which means we had been working with faulty files for a long while! 
# Saving the initial file (Alpha)




In [8]:
phot_feat = 7

phot = dfm2.iloc[:,4:7*phot_feat+4]  #9*phot feature number
spectral = dfm2.iloc[:,7*phot_feat+4:7*phot_feat+4+13] #13
spec_ds = dfm2.iloc[:,7*phot_feat+4+13:7*phot_feat+4+13+51] #51
colour = dfm2.iloc[:,7*phot_feat+4+13+51:]  #36
spec_ds

Unnamed: 0_level_0,BH_NAD_EW,BH_FC_EW,BH_MH_EW,BH_MGG_EW,BH_HB_EW,BH_G_EW,BH_CAI_EW,BH_HK_EW,BH_CNB_EW,SIIR_EW_y,...,HGVA125_EW,HGF_EW,HGA_EW,G4300_EW,CA4227_EW,CN2_EW,CN1_EW,HDF_EW,HDA_EW,OII_EW
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6802,-0.008743,0.044196,-0.033514,-0.023454,0.003366,0.437772,-0.097645,-0.017441,-0.724730,4.862064,...,0.196930,1.659956,2.376555,-7.079205,0.294137,0.228589,0.115305,-0.445192,5.106353,22.621220
6808,0.007116,0.034142,0.033207,0.008400,0.057128,0.131597,-0.072069,0.314184,0.116886,-0.292471,...,-0.079656,0.886773,1.500855,-2.506638,-1.119810,-0.026407,-0.036579,-1.666878,-0.830661,3.648445
6830,-0.041824,0.015372,0.023589,0.031431,-0.065708,0.059294,-0.103945,0.272424,-0.003160,3.186709,...,0.096942,-3.308804,-0.873673,-0.850715,-1.320217,-0.030753,-0.032678,-4.939517,-3.480545,17.085100
6835,-0.015647,-0.152090,-0.008213,0.198214,-0.146607,0.001324,0.061417,0.276546,0.219312,-0.647188,...,-0.263698,2.675990,3.353907,13.489329,-3.213815,0.304724,-0.067429,1.178801,2.598574,14.400934
6837,0.005419,0.000799,-0.007715,0.031544,-0.051029,0.057177,-0.081960,0.233290,0.105987,5.740624,...,0.084154,-1.563378,-3.370488,4.110785,1.398956,-0.042376,-0.055087,-2.672220,-1.597525,31.437363
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3901768,-0.057609,-0.023099,0.023584,-0.016119,0.059766,0.102978,0.037121,0.159307,0.050241,1.354023,...,0.072656,-0.881421,-0.387746,-3.123395,-1.092533,-0.005144,-0.081116,-3.093034,-5.734492,10.064606
3973288,0.021775,-0.025369,-0.033720,-0.008591,-0.001896,-0.098911,0.069140,0.180783,-0.124684,1.083954,...,-0.045059,-1.738272,-4.616337,-0.944286,-2.863228,-0.327484,-0.274016,-4.335427,-6.318155,-52.405647
4085058,0.145775,-0.046811,0.078473,0.006829,0.041559,0.226586,-0.494108,1.052881,-4.042395,6.052217,...,-0.587518,17.095642,4.478552,0.783282,3.953585,-0.224233,-0.258292,-10.952732,-16.951679,-31.066132
4321280,0.033678,0.119293,-0.098914,0.057388,-0.009421,0.578582,-2.648538,0.285573,0.815203,-0.257172,...,-0.620583,14.274184,7.964872,1399.420044,2.865519,-0.852766,-0.788883,13.462683,52.568886,-129.242676


In [14]:
save_db(df, 'final')

In [15]:
colour

Unnamed: 0_level_0,g-r,g-i,g-z,g-J,g-H,g-K,r-i,r-z,r-J,r-H,...,i-z,i-J,i-H,i-K,z-J,z-H,z-K,J-H,J-K,H-K
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6802,0.460136,0.756982,0.963199,1.057528,1.058473,0.834047,0.296846,0.503063,0.597392,0.598337,...,0.206217,0.300545,0.301491,0.077065,0.094329,0.095274,-0.129151,0.000945,-0.223480,-0.224425
6808,1.537980,1.617781,1.470795,1.648487,2.328021,2.044914,0.079801,-0.067185,0.110507,0.790041,...,-0.146986,0.030706,0.710239,0.427133,0.177692,0.857225,0.574119,0.679534,0.396427,-0.283106
6830,0.293670,0.610579,0.493401,0.680246,0.955129,0.518495,0.316909,0.199731,0.386576,0.661458,...,-0.117178,0.069667,0.344550,-0.092084,0.186845,0.461728,0.025094,0.274882,-0.161751,-0.436634
6835,0.981132,1.229935,1.527130,2.030056,2.512154,2.488694,0.248803,0.545998,1.048923,1.531022,...,0.297195,0.800120,1.282218,1.258759,0.502925,0.985024,0.961564,0.482098,0.458639,-0.023460
6837,0.458025,0.630515,0.695809,0.751165,1.075825,0.778627,0.172490,0.237784,0.293140,0.617801,...,0.065294,0.120650,0.445311,0.148113,0.055356,0.380017,0.082819,0.324661,0.027463,-0.297198
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3901768,0.721388,0.986654,0.954879,1.122536,1.489356,1.397846,0.265266,0.233491,0.401148,0.767968,...,-0.031775,0.135882,0.502703,0.411192,0.167657,0.534477,0.442967,0.366820,0.275310,-0.091510
3973288,1.321330,1.400969,0.967060,1.441474,1.872856,1.297539,0.079639,-0.354270,0.120144,0.551526,...,-0.433909,0.040505,0.471887,-0.103431,0.474414,0.905796,0.330479,0.431382,-0.143935,-0.575318
4085058,1.237941,1.535714,1.758121,2.249316,2.446348,2.971519,0.297773,0.520180,1.011375,1.208407,...,0.222406,0.713602,0.910633,1.435805,0.491195,0.688227,1.213398,0.197032,0.722203,0.525171
4321280,0.644223,0.384240,0.510355,0.594448,0.969209,1.195032,-0.259983,-0.133868,-0.049775,0.324986,...,0.126115,0.210208,0.584969,0.810793,0.084094,0.458854,0.684678,0.374761,0.600584,0.225823


In [19]:
#Making new coumns with the prichi2 scores
merged = pd.merge(UKIDSS,SDSS,right_index=True, left_index=True, how='inner')
chi2 = startswith(merged,"PRICHI2_")

minchi = 0.5    
maxchi = 2  
chi2 = chi2[(chi2 > minchi) & (chi2 < maxchi)].dropna()
df_chi = pd.merge(df,chi2, right_index = True, left_index = True, how='inner').iloc[:,0:-9]

print(' at prichi2 values between %.1f and  %.1f we have %i primarychi2 values to work with,\n \
      resulting in a final database of %i rows' %(minchi,maxchi,chi2.shape[0],df_chi.shape[0]))
print('This is a difference of %i compared to not filtering for chi2'%(df.shape[0] - df_chi.shape[0]))

dfm2 = df_chi

 at prichi2 values between 0.5 and  2.0 we have 20626 primarychi2 values to work with,
       resulting in a final database of 10047 rows
This is a difference of 47547 compared to not filtering for chi2


In [20]:
phot_feat = 7

phot = dfm2.iloc[:,4:7*phot_feat+4]  #9*phot feature number
spectral = dfm2.iloc[:,7*phot_feat+4:7*phot_feat+4+13] #13
spec_ds = dfm2.iloc[:,7*phot_feat+4+13:7*phot_feat+4+13+51] #51
colour = dfm2.iloc[:,7*phot_feat+4+13+51:]  #36
spec_ds

Unnamed: 0_level_0,BH_NAD_EW,BH_FC_EW,BH_MH_EW,BH_MGG_EW,BH_HB_EW,BH_G_EW,BH_CAI_EW,BH_HK_EW,BH_CNB_EW,SIIR_EW_y,...,HGVA125_EW,HGF_EW,HGA_EW,G4300_EW,CA4227_EW,CN2_EW,CN1_EW,HDF_EW,HDA_EW,OII_EW
CATAID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6802,-0.008743,0.044196,-0.033514,-0.023454,0.003366,0.437772,-0.097645,-0.017441,-0.724730,4.862064,...,0.196930,1.659956,2.376555,-7.079205,0.294137,0.228589,0.115305,-0.445192,5.106353,22.621220
6837,0.005419,0.000799,-0.007715,0.031544,-0.051029,0.057177,-0.081960,0.233290,0.105987,5.740624,...,0.084154,-1.563378,-3.370488,4.110785,1.398956,-0.042376,-0.055087,-2.672220,-1.597525,31.437363
6838,0.001788,0.093629,0.066746,-0.060903,-0.043507,0.214525,0.072564,0.359894,0.177474,0.713500,...,0.191652,-7.249705,-5.492196,-2.449695,0.602401,-0.045479,-0.041801,-3.263513,-1.605200,4.142478
6840,-0.011384,0.069849,0.057954,0.035413,0.022118,0.311013,0.119893,-0.116209,-0.324520,0.959405,...,0.152244,-0.876695,3.178101,-5.746708,-3.650215,-0.107605,0.034390,1.990391,-0.833084,-1.854510
6846,0.045265,0.031438,0.003481,0.059650,0.008445,0.447334,0.092075,0.251449,0.015348,1.626143,...,-0.018575,-0.474186,0.391324,-1.000964,-1.134056,-0.029514,-0.060556,-2.460188,-3.570271,8.357266
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3901140,0.094130,0.055315,0.034424,0.024990,0.077324,0.198662,-0.046283,0.221825,0.008978,0.259098,...,0.089189,0.479922,4.411610,1.984872,-1.751637,-0.040141,-0.077261,-3.309701,-3.840308,4.008181
3901178,0.035229,0.065539,0.043015,0.069225,0.059403,0.033743,0.004945,0.234959,0.040156,2.018534,...,0.062577,1.632197,0.099033,-2.650076,-1.143423,0.053334,0.004347,-3.426721,-3.283710,3.261348
3901184,0.014905,0.289580,-0.060386,0.047520,-0.069665,0.158187,-0.001962,0.392657,-0.142219,3.312513,...,-0.029447,-4.794718,-3.694124,7.527526,-1.669704,-0.111379,-0.199986,-2.915524,6.258958,90.554977
3901441,0.032045,0.079350,0.029012,0.005863,0.034492,0.085834,0.012338,0.191551,0.094265,3.467352,...,0.037110,-1.966722,-0.405885,-0.853234,-1.119621,-0.039646,-0.063568,-2.228087,-3.227602,18.153252


In [23]:
save_db(df, 'final_chi2')