### Table of contents
[Imports](#funcs)

[Parameters, filenames and directories](#params)

[Reading and normalizing datasets](#readCatalogs)

In [5]:
from func_dataset_preprocessing import residCol, scaleMags, mergeCatalogs
import pandas as pd
import os

### Parameters, filenames and directories
<a id='params'>#params<a>

In [2]:
### Files and directories ###
dirCatalogs=r'D:\Sources\COSMOS_photoZ\COSMOS_v3.0\catalogs\forPaper\v06'
dirMLPQNA=r'D:\Sources\COSMOS_photoZ\COSMOS_v3.0\experimentsArticle\exp022'

nameTrain=r'05_COSMOS_SpectrZ_Zlim_1p2.csv'
nameTest=r'05_COSMOS_SpectrZ_Zlim_1p2_Test.csv'
nameRun=r'04_COSMOS2015_run_WorkBands.csv'
nameDeimos=r'04_COSMOS_DEIMOS_QfFilter.csv'

nameTrainOutput=r'METAPHOR_trainout_05_COSMOS_SpectrZ_Zlim_1p2.csv'
nameTestOutput=r'METAPHOR_testout_05_COSMOS_SpectrZ_Zlim_1p2.csv'
nameRunOutput=r'METAPHOR_runout_04_COSMOS2015_run_WorkBands.csv'

In [3]:
### Column names for training and filtering ###
mags=['Ksmagap3','Ymagap3','Hmagap3','Jmagap3','Bmagap3','Vmagap3','ipmagap3','rmagap3','umagap3','zppmagap3']
magsScaled=['sc_'+s for s in mags]
idCol='Seq'
specZ='specZ'
photoZ_ML='photoZ_ML'
photoZ_SED='photoZ_SED'
residML='resid_ML'
residSED='resid_SED'
residML_SED='residML_SED'

### Reading and normalizing datasets
<a id='readCatalogs'>#readCatalogs<a>

In [6]:
### Read catalogs ###
dataTrainOrig=pd.read_csv(os.path.join(dirCatalogs,nameTrain))
dataRunOrig=pd.read_csv(os.path.join(dirCatalogs,nameRun))
dataDeimosOrig=pd.read_csv(os.path.join(dirCatalogs,nameDeimos))

dataTrainOutput=pd.read_csv(os.path.join(dirMLPQNA,nameTrainOutput))
dataTestOutput=pd.read_csv(os.path.join(dirMLPQNA,nameTestOutput))
dataRunOutput=pd.read_csv(os.path.join(dirMLPQNA,nameRunOutput))

In [7]:
### Normalization of magnitudes ###
dataTrainOrig=scaleMags(dataTrainOrig,mags,magsScaled)
dataRunOrig=scaleMags(dataRunOrig,mags,magsScaled,dataScaleTo=dataTrainOrig)
dataDeimosOrig=scaleMags(dataDeimosOrig,mags,magsScaled,dataScaleTo=dataRunOrig)

In [8]:
##### Merging #####
### Train ###
dataTrain=mergeCatalogs(dataTrainOrig,dataTrainOutput,colRename={'zphoto':photoZ_ML,'zphot':photoZ_SED,'z_spec_x':specZ},
                        colDelete='z_spec_y')

### Test ###
dataTest=mergeCatalogs(dataTrainOrig,dataTestOutput,colRename={'zphoto':photoZ_ML,'zphot':photoZ_SED,'z_spec_x':specZ},
                        colDelete='z_spec_y')

### DEIMOS ###
dataDeimos=mergeCatalogs(dataRunOutput,dataDeimosOrig,colRename={'zphoto':photoZ_ML,'zphot':photoZ_SED,'z_spec':specZ})

### Run ###
dataRun=mergeCatalogs(dataRunOutput,dataRunOrig,colRename={'zphoto':photoZ_ML,'zphot':photoZ_SED},
                        resids=False)
dataRun=residCol(dataRun,colSpectrZ=photoZ_SED,colPhotoZ=photoZ_ML,colResid=residML_SED)

In [10]:
os.mkdir(os.path.join(dirCatalogs,'SOMprepared'))
dataTrain.to_csv(os.path.join(dirCatalogs,'SOMprepared',nameTrain),index=False)
dataTest.to_csv(os.path.join(dirCatalogs,'SOMprepared',nameTest),index=False)
dataDeimos.to_csv(os.path.join(dirCatalogs,'SOMprepared',nameDeimos),index=False)
dataRun.to_csv(os.path.join(dirCatalogs,'SOMprepared',nameRun),index=False)