In [53]:
### import libraries ###
from astropy.io import fits
from astropy import table
import os
import pandas as pd
from func_stat import statResid

In [2]:
### Working directories and file names ###
dirCatalogs=r'/mnt/Data/Work/Sources/COSMOS/Catalogs/DuplicateCheck'
os.chdir(dirCatalogs)

testDuplInFileName=r"Test_MSpecOrig_duplicatesIn.csv"

In [3]:
test=pd.read_csv(testDuplInFileName)

In [12]:
# Identifying and counting objects with multiple measurements
test_uniq=test['Seq'].value_counts()

In [23]:
# Creating a column where we calculate max difference between the measurements for every object
test['SpecScatter']=0

In [34]:
# INTify Seq column (aka ID)
test['Seq']=test['Seq'].apply(int)

In [43]:
# Calculating max difference between the measurements for every object 
for i in test_uniq[test_uniq>1].index:
    diff=test.loc[test['Seq']==i,'z_spec'].max()-test.loc[test['Seq']==i,'z_spec'].min()
    test.loc[test['Seq']==i,'SpecScatter']=diff

In [65]:
# Counting how many objects are with big differences within their groups
print(test[test['SpecScatter']>=0.1]['Seq'].nunique(),'objects with SpecScatter>0.1 out of',test['Seq'].nunique())

461 objects with SpecScatter>0.1 out of 5967


In [51]:
test.columns

Index(['RAJ2000', 'DEJ2000', 'Seq', 'Ksmagap3', 'Ymagap3', 'Hmagap3',
       'Jmagap3', 'Bmagap3', 'Vmagap3', 'ipmagap3', 'rmagap3', 'umagap3',
       'zppmagap3', 'IB574ap3', 'photoZ_SED', 'specZ', 'Instr_1', 'Q_f_1',
       'sc_Ksmagap3', 'sc_Ymagap3', 'sc_Hmagap3', 'sc_Jmagap3', 'sc_Bmagap3',
       'sc_Vmagap3', 'sc_ipmagap3', 'sc_rmagap3', 'sc_umagap3', 'sc_zppmagap3',
       'photoZ_ML', 'resid_ML', 'resid_SED', 'residML_SED', 'w_sc_Ksmagap3',
       'w_sc_Ymagap3', 'w_sc_Hmagap3', 'w_sc_Jmagap3', 'w_sc_Bmagap3',
       'w_sc_Vmagap3', 'w_sc_ipmagap3', 'w_sc_rmagap3', 'w_sc_umagap3',
       'w_sc_zppmagap3', 'quantErr', 'cellID_TrainSOM', 'cellID_RunSOM',
       'cellID_RunBigSOM', 'specZ_outlCoeff', 'photoZ_ML_outlCoeff',
       'photoZ_SED_outlCoeff', 'quantErr_outlCoeff', 'trainMapOccupation',
       'L_ID', 'Instr_2', 'ORI_RA', 'ORI_Dec', 'ORI_ID', 'z_spec', 'Q_f_2',
       'RA_corr', 'Dec_corr', 'ID', 'ALPHA_J2000', 'DELTA_J2000', 'GroupID_2',
       'GroupSize_2', 'Contact'

In [64]:
# Calculating stats for residuals for objects with big scatter
print('Stats for resids for ML photo-z for objects with big scatter')
print(statResid(test[test['SpecScatter']>=0.1].drop_duplicates(subset='Seq'),'resid_ML'))
print('Stats for resids for SED photo-z for objects with big scatter')
print(statResid(test[test['SpecScatter']>=0.1].drop_duplicates(subset='Seq'),'resid_SED'))

Stats for resids for ML photo-z for objects with big scatter
{'Num objects': 461, 'Std': 0.096, 'NMAD': 0.028, 'Mean': -0.0088, '% outl_15': 7.81}
Stats for resids for SED photo-z for objects with big scatter
{'Num objects': 461, 'Std': 0.121, 'NMAD': 0.018, 'Mean': -0.0129, '% outl_15': 7.16}


In [66]:
# Calculating stats for residuals for objects with small scatte
print('Stats for resids for ML photo-z for objects with small scatter')
print(statResid(test[(test['SpecScatter']>0) & (test['SpecScatter']<0.1)].drop_duplicates(subset='Seq'),'resid_ML'))
print('Stats for resids for SED photo-z for objects with small scatter')
print(statResid(test[(test['SpecScatter']>0) & (test['SpecScatter']<0.1)].drop_duplicates(subset='Seq'),'resid_SED'))

Stats for resids for ML photo-z for objects with small scatter
{'Num objects': 1879, 'Std': 0.024, 'NMAD': 0.016, 'Mean': 0.0015, '% outl_15': 0.21}
Stats for resids for SED photo-z for objects with small scatter
{'Num objects': 1879, 'Std': 0.057, 'NMAD': 0.009, 'Mean': 0.0014, '% outl_15': 0.27}


In [67]:
# Calculating stats for residuals for objects with only one measurement
print('Stats for resids for ML photo-z for objects with only one measurement')
print(statResid(test[test['SpecScatter']==0].drop_duplicates(subset='Seq'),'resid_ML'))
print('Stats for resids for SED photo-z for objects with only one measurement')
print(statResid(test[test['SpecScatter']==0].drop_duplicates(subset='Seq'),'resid_SED'))

Stats for resids for ML photo-z for objects with only one measurement
{'Num objects': 3627, 'Std': 0.048, 'NMAD': 0.02, 'Mean': -0.0034, '% outl_15': 1.6}
Stats for resids for SED photo-z for objects with only one measurement
{'Num objects': 3627, 'Std': 0.105, 'NMAD': 0.012, 'Mean': -0.0058, '% outl_15': 2.62}


In [88]:
# Calculating mean specz_outlCoeff for objects with big scatter
print('Mean specz_outlCoeff for objects with big scatter')
print(test[test['SpecScatter']>=0.1].drop_duplicates(subset='Seq')['specZ_outlCoeff'].astype(float).abs().median(),
     test[test['SpecScatter']>=0.1].drop_duplicates(subset='Seq')['specZ_outlCoeff'].astype(float).abs().min(),
     test[test['SpecScatter']>=0.1].drop_duplicates(subset='Seq')['specZ_outlCoeff'].astype(float).abs().max())

Mean specz_outlCoeff for objects with big scatter
0.6854191113268312 0.0010838265519637525 17.44538274914747


In [89]:
# Calculating mean specz_outlCoeff for objects with small scatter
print('Mean specz_outlCoeff for objects with small scatter')
print(test[(test['SpecScatter']>0) & (test['SpecScatter']<0.1)].drop_duplicates(subset='Seq')['specZ_outlCoeff'].astype(float).abs().median(),
     test[(test['SpecScatter']>0) & (test['SpecScatter']<0.1)].drop_duplicates(subset='Seq')['specZ_outlCoeff'].astype(float).abs().min(),
     test[(test['SpecScatter']>0) & (test['SpecScatter']<0.1)].drop_duplicates(subset='Seq')['specZ_outlCoeff'].astype(float).abs().max())

Mean specz_outlCoeff for objects with small scatter
0.5628471633129158 0.0004121590188503939 9.801720873893373


In [90]:
# Calculating mean specz_outlCoeff for objects with only one measurement
print('Mean specz_outlCoeff for objects with only one measurement')
print(test[test['SpecScatter']==0].drop_duplicates(subset='Seq')['specZ_outlCoeff'].astype(float).abs().median(),
     test[test['SpecScatter']==0].drop_duplicates(subset='Seq')['specZ_outlCoeff'].astype(float).abs().min(),
     test[test['SpecScatter']==0].drop_duplicates(subset='Seq')['specZ_outlCoeff'].astype(float).abs().max())

Mean specz_outlCoeff for objects with only one measurement
0.5795997366004416 2.3515171905236832e-05 inf
