### Table of contents
[Imports](#funcs)

[Parameters, filenames and directories](#params)

[Reading datasets](#readCatalogs)

[Training SOMs](#SOM_Train)

[Calculating SOM_info for dataset](#SOM_info)

[Building and saving maps](#SOM_maps)

[Adding outlier coefficients for specZ, ML and SED photoZ and quantErr](#outlCoeff)

[Saving datasets with SOM_info](#save)

In [3]:
### import libraries ###
import time
import numpy as np
import minisom
import os
import pandas as pd
from func_SOM_read_write import createSOM, writeSOM, loadSOM
from func_add_SOM_info import addBMUWeights, addQuantErr, addCellAddress, calcOutlCoeff, addOccupation
from func_maps import buildMaps, writeMaps,readMaps

### Parameters, filenames and directories
<a id='params'>#params<a>

In [4]:
### Parameters of SOM experiment ###
nameOfExperiment='ex017'
width=25
height=28
widthBig=64
heightBig=67
num_features=10
epochs=6000
sigma=5
learning_rate=0.5
neighborhood_function='bubble'

In [5]:
### Files and directories ###
dirCatalogs=r'D:\Sources\COSMOS_photoZ\COSMOS_v3.0\catalogs\forPaper\v06\SOMprepared'
dirSOM=r'D:\Sources\COSMOS_photoZ\COSMOS_v3.0\SOM\forArticle'
dirMLPQNA=r'D:\Sources\COSMOS_photoZ\COSMOS_v3.0\experimentsArticle\exp022'

nameTrain=r'05_COSMOS_SpectrZ_Zlim_1p2.csv'
nameTest=r'05_COSMOS_SpectrZ_Zlim_1p2_Test.csv'
nameRun=r'04_COSMOS2015_run_WorkBands.csv'
nameDeimos=r'04_COSMOS_DEIMOS_QfFilter.csv'

In [7]:
### Create experiment folder ###
if os.path.isdir(dirSOM):
    os.mkdir(os.path.join(dirSOM,nameOfExperiment))
else:
    os.makedirs(os.path.join(dirSOM,nameOfExperiment)) 
timeName=time.strftime("%y%m%d_%H%M%S_", time.gmtime())

In [8]:
### Write parameters on log file ###
logName=timeName+'_log.txt'
with open(os.path.join(dirSOM,nameOfExperiment,logName), 'a+') as f:
    f.write('Catalog='+nameTrain+'\n')
    f.write('width='+str(width)+'\n')
    f.write('height='+str(height)+'\n')
    f.write('widthBig='+str(widthBig)+'\n')
    f.write('heightBig='+str(heightBig)+'\n')
    f.write('num_features='+str(num_features)+'\n')
    f.write('epochs='+str(epochs)+'\n')
    f.write('sigma='+str(sigma)+'\n')
    f.write('learning rate='+str(learning_rate)+'\n')
    f.write('neighborhood_function='+str(neighborhood_function)+'\n')

In [9]:
### Column names for training and filtering ###
mags=['Ksmagap3','Ymagap3','Hmagap3','Jmagap3','Bmagap3','Vmagap3','ipmagap3','rmagap3','umagap3','zppmagap3']
magsScaled=['sc_'+s for s in mags]
idCol='Seq'
specZ='specZ'
photoZ_ML='photoZ_ML'
photoZ_SED='photoZ_SED'
residML='resid_ML'
residSED='resid_SED'
residML_SED='residML_SED'

### Reading datasets
<a id='readCatalogs'>#readCatalogs<a>

In [10]:
### Read catalogs ###
dataTrain=pd.read_csv(os.path.join(dirCatalogs,nameTrain))
dataTest=pd.read_csv(os.path.join(dirCatalogs,nameTest))
dataRun=pd.read_csv(os.path.join(dirCatalogs,nameRun))
dataDeimos=pd.read_csv(os.path.join(dirCatalogs,nameDeimos))

### Training SOMs
<a id='SOM_Train'>#SOM_Train<a>

In [11]:
data=np.array(dataTrain[magsScaled])
somTrain=createSOM(data, epochs, height, width, num_features, sigma, learning_rate, 
                neighborhood_function, saveWeightsName=os.path.join(dirSOM,nameOfExperiment,'somTrainWeights.txt'))

In [12]:
data=np.array(dataRun[magsScaled])
somRun=createSOM(data, epochs, height, width, num_features, sigma, learning_rate, 
                neighborhood_function, saveWeightsName=os.path.join(dirSOM,nameOfExperiment,'somRunWeights.txt'))

In [13]:
somRunBig=createSOM(data, epochs, widthBig, widthBig, num_features, sigma, learning_rate, 
                neighborhood_function, saveWeightsName=os.path.join(dirSOM,nameOfExperiment,'somRunBigWeights.txt'))

In [22]:
### Checking that after writing/loading cycle SOM weights were the same ###
soms=[somTrain,somRun,somRunBig]
somNames=['somTrainWeights.txt','somRunWeights.txt','somRunBigWeights.txt']
for som,somName in zip(soms,somNames):
    somCheck=loadSOM(weightsFile=os.path.join(dirSOM,nameOfExperiment,somName),
                 sigma=sigma,learning_rate=learning_rate,neighborhood_function=neighborhood_function,random_seed=10)
    print(abs((som.get_weights()-somCheck.get_weights())).max())

4.998483404961007e-09
4.9994364204053454e-09
4.999866021204724e-09


### Calculating SOM_info for dataset
<a id='SOM_info'>#SOM_info<a>

In [36]:
### Adding weights of BMU for each galaxy in a dataset ###
dataTrain=addBMUWeights(somTrain,dataTrain,magsScaled)
dataTest=addBMUWeights(somTrain,dataTest,magsScaled)
dataDeimos=addBMUWeights(somTrain,dataDeimos,magsScaled)
dataRun=addBMUWeights(somTrain,dataRun,magsScaled)

In [37]:
### Adding quantization errors for each galaxy ###
dataTrain=addQuantErr(dataTrain,magsScaled,colWeights=None)
dataTest=addQuantErr(dataTest,magsScaled,colWeights=None)
dataDeimos=addQuantErr(dataDeimos,magsScaled,colWeights=None)
dataRun=addQuantErr(dataRun,magsScaled,colWeights=None)

In [None]:
### Adding cellIDs for each galaxy ###
soms=[somTrain,somRun,somRunBig]
prefixes=['_TrainSOM','_RunSOM','cellID_RunBigSOM']
for som,prefix in zip(soms,prefixes):
    dataTrain=addCellAddress(som, dataTrain, magsScaled, idCol,cellIDPrefix=prefix)
    dataTest=addCellAddress(som, dataTest, magsScaled, idCol,cellIDPrefix=prefix)
    dataDeimos=addCellAddress(som, dataDeimos, magsScaled, idCol,cellIDPrefix=prefix)
    dataRun=addCellAddress(som, dataRun, magsScaled, idCol,cellIDPrefix=prefix)

### Building and saving maps
<a id='SOM_maps'>#SOM_maps<a>

In [35]:
dataTrain.columns

Index(['RAJ2000', 'DEJ2000', 'Seq', 'Ksmagap3', 'Ymagap3', 'Hmagap3',
       'Jmagap3', 'Bmagap3', 'Vmagap3', 'ipmagap3', 'rmagap3', 'umagap3',
       'zppmagap3', 'IB574ap3', 'photoZ_SED', 'specZ', 'Instr', 'Q_f',
       'sc_Ksmagap3', 'sc_Ymagap3', 'sc_Hmagap3', 'sc_Jmagap3', 'sc_Bmagap3',
       'sc_Vmagap3', 'sc_ipmagap3', 'sc_rmagap3', 'sc_umagap3', 'sc_zppmagap3',
       'photoZ_ML', 'resid_ML', 'resid_SED', 'residML_SED'],
      dtype='object')

In [34]:
mapsAll={}
    
mapCols=[photoZ_SED,photoZ_ML,specZ,residSED,residML,residML_SED,'quantErr']
cellID='cellID_TrainSOM'
som=somTrain
mapsAll['mapsTrainOnTrain']=buildMaps(som,dataTrain,mapCols,idCol,cellID=cellID)
mapsAll['mapsDeimosOnTrain']=buildMaps(som,dataDeimos,mapCols,idCol,cellID=cellID)
mapsAll['mapsTestOnTrain']=buildMaps(som,dataTest,mapCols,idCol,cellID=cellID)
cellID='cellID_RunSOM'
som=somRun
mapsAll['mapsTrainOnRun']=buildMaps(som,dataTrain,mapCols,idCol,cellID=cellID)
mapsAll['mapsDeimosOnRun']=buildMaps(som,dataDeimos,mapCols,idCol,cellID=cellID)
mapsAll['mapsTestOnRun']=buildMaps(som,dataTest,mapCols,idCol,cellID=cellID)
cellID='cellID_RunBigSOM'
som=somRunBig
mapsAll['mapsTrainOnRunBig']=buildMaps(som,dataTrain,mapCols,idCol,cellID=cellID)
mapsAll['mapsDeimosOnRunBig']=buildMaps(som,dataDeimos,mapCols,idCol,cellID=cellID)
mapsAll['mapsTestOnRunBig']=buildMaps(som,dataTest,mapCols,idCol,cellID=cellID)

mapCols=[photoZ_SED,photoZ_ML,residML_SED,'quantErr']
cellID='cellID_TrainSOM'
som=somTrain
mapsAll['mapsRunOnTrain']=buildMaps(som,dataRun,mapCols,idCol,cellID=cellID)

cellID='cellID_RunSOM'
som=somRun
mapsAll['mapsRunOnRun']=buildMaps(som,dataRun,mapCols,idCol,cellID=cellID)

cellID='cellID_RunBigSOM'
som=somRunBig
mapsAll['mapsRunOnRunBig']=buildMaps(som,dataRun,mapCols,idCol,cellID=cellID)

KeyError: 'cellID_TrainSOM'

In [None]:
### Saving maps ###
dirMaps='maps'
os.mkdir(os.path.join(dirSOM,nameOfExperiment,dirMaps))
for key,val in mapsAll.items():
    writeMaps(val,prefixName=os.path.join(dirSOM,nameOfExperiment,dirMaps,key))

### Adding outlier coefficients for specZ, ML and SED photoZ and quantErr
<a id='outlCoeff'>#outlCoeff<a>

In [None]:
filterCols=[specZ,photoZ_ML,photoZ_SED,'quantErr']
maps=mapsAll['mapsTrainOnTrain']
cellID='cellID_TrainSOM'
for col in filterCols:
    dataTrain=calcOutlCoeff(dataTrain,maps['stdMaps'][col],maps['meanMaps'][col],col,cellID,prefix='')
    dataTest=calcOutlCoeff(dataTest,maps['stdMaps'][col],maps['meanMaps'][col],col,cellID,prefix='')
    dataDeimos=calcOutlCoeff(dataDeimos,maps['stdMaps'][col],maps['meanMaps'][col],col,cellID,prefix='')

In [None]:
filterCols=[photoZ_ML,photoZ_SED,'quantErr']
maps=mapsAll['mapsRunOnTrain']
cellID='cellID_TrainSOM'
for col in filterCols:
    dataRun=calcOutlCoeff(dataRun,maps['stdMaps'][col],maps['meanMaps'][col],col,cellID,prefix='')

### Saving datasets with SOM_info
<a id='save'>#save<a>

In [None]:
### Saving datasets with added data ###
dirDatasets='datasets'
datasets=[dataTrain,dataTest,dataDeimos,dataRun]
names=[nameTrain,nameTest,nameDeimos,nameRun]
for dataset,name in zip(datasets,names):
    dataset.to_csv(os.path.join(dirSOM,nameOfExperiment,dirDatasets,name.replace('.csv','_SOMinfo.csv')),index=False)