### Table of contents
[Imports](#funcs)

[Parameters, filenames and directories](#params)

[Reading datasets](#readCatalogs)

[Training SOMs](#SOM_Train)

[Calculating SOM_info for dataset](#SOM_info)

[Building and saving maps](#SOM_maps)

[Adding outlier coefficients for specZ, ML and SED photoZ and quantErr](#outlCoeff)

[Saving datasets with SOM_info](#save)

In [2]:
### import libraries ###
import time
import numpy as np
import minisom
import os
import pandas as pd
from func_SOM_read_write import createSOM, writeSOM, loadSOM
from func_add_SOM_info import addBMUWeights, addQuantErr, addCellAddress, calcOutlCoeff, addOccupation
from func_maps import buildMaps, writeMaps,readMaps

### Parameters, filenames and directories
<a id='params'>#params<a>

In [3]:
### Parameters of SOM experiment ###
nameOfExperiment='ex020'
width=25
height=28
widthBig=64
heightBig=67
num_features=10
epochs=6000
sigma=5
learning_rate=0.5
neighborhood_function='bubble'

In [4]:
### Files and directories ###
dirCatalogs=r'D:\Sources\COSMOS_photoZ\COSMOS_v3.0\catalogs\forPaper\v06\SOMprepared'
dirSOM=r'D:\Sources\COSMOS_photoZ\COSMOS_v3.0\SOM\forArticle'
dirMLPQNA=r'D:\Sources\COSMOS_photoZ\COSMOS_v3.0\experimentsArticle\exp022'

nameTrain=r'05_COSMOS_SpectrZ_Zlim_1p2.csv'
nameTest=r'05_COSMOS_SpectrZ_Zlim_1p2_Test.csv'
nameRun=r'04_COSMOS2015_run_WorkBands.csv'
nameDeimos=r'04_COSMOS_DEIMOS_QfFilter.csv'

In [5]:
### Create experiment folder ###
if os.path.isdir(dirSOM):
    os.mkdir(os.path.join(dirSOM,nameOfExperiment))
else:
    os.makedirs(os.path.join(dirSOM,nameOfExperiment)) 
timeName=time.strftime("%y%m%d_%H%M%S_", time.gmtime())

In [6]:
### Write parameters on log file ###
logName=timeName+'_log.txt'
with open(os.path.join(dirSOM,nameOfExperiment,logName), 'a+') as f:
    f.write('Catalog='+nameTrain+'\n')
    f.write('width='+str(width)+'\n')
    f.write('height='+str(height)+'\n')
    f.write('widthBig='+str(widthBig)+'\n')
    f.write('heightBig='+str(heightBig)+'\n')
    f.write('num_features='+str(num_features)+'\n')
    f.write('epochs='+str(epochs)+'\n')
    f.write('sigma='+str(sigma)+'\n')
    f.write('learning rate='+str(learning_rate)+'\n')
    f.write('neighborhood_function='+str(neighborhood_function)+'\n')

In [7]:
### Column names for training and filtering ###
mags=['Ksmagap3','Ymagap3','Hmagap3','Jmagap3','Bmagap3','Vmagap3','ipmagap3','rmagap3','umagap3','zppmagap3']
magsScaled=['sc_'+s for s in mags]
idCol='Seq'
specZ='specZ'
photoZ_ML='photoZ_ML'
photoZ_SED='photoZ_SED'
residML='resid_ML'
residSED='resid_SED'
residML_SED='residML_SED'
quantErr='quantErr'

### Reading datasets
<a id='readCatalogs'>#readCatalogs<a>

In [28]:
### Read catalogs ###
def readCats():
    dataTrain=pd.read_csv(os.path.join(dirCatalogs,nameTrain))
    dataTest=pd.read_csv(os.path.join(dirCatalogs,nameTest))
    dataDeimos=pd.read_csv(os.path.join(dirCatalogs,nameDeimos))
    return dataTrain,dataTest,dataDeimos

### Training SOMs
<a id='SOM_Train'>#SOM_Train<a>

In [46]:
def TrainSOM(mapID,dataTrain,mapIDD):
    data=np.array(dataTrain[magsScaled])
    somTrain=createSOM(data, epochs, height, width, num_features, sigma, learning_rate, 
                    neighborhood_function, random_seed=10+mapIDD,
                       saveWeightsName=os.path.join(dirSOM,nameOfExperiment,'somTrainWeights_'+str(mapID).zfill(3)+'.txt'))
    ### Checking that after writing/loading cycle SOM weights were the same ###
    soms=[somTrain]
    somNames=['somTrainWeights_'+str(mapID).zfill(3)+'.txt']
    for som,somName in zip(soms,somNames):
        somCheck=loadSOM(weightsFile=os.path.join(dirSOM,nameOfExperiment,somName),
                     sigma=sigma,learning_rate=learning_rate,
                         neighborhood_function=neighborhood_function,random_seed=10+mapIDD)
        print(abs((som.get_weights()-somCheck.get_weights())).max())
    return somTrain

### Calculating SOM_info for dataset
<a id='SOM_info'>#SOM_info<a>

In [32]:
### Adding cellIDs for each galaxy ###
def addCellID(somTrain,dataTrain,dataTest,dataDeimos):
    soms=[somTrain]
    prefixes=['_TrainSOM']
    for som,prefix in zip(soms,prefixes):
        dataTrain=addCellAddress(som, dataTrain, magsScaled, idCol,cellIDPrefix=prefix)
        dataTest=addCellAddress(som, dataTest, magsScaled, idCol,cellIDPrefix=prefix)
        dataDeimos=addCellAddress(som, dataDeimos, magsScaled, idCol,cellIDPrefix=prefix)
    return dataTrain,dataTest,dataDeimos

### Building and saving maps
<a id='SOM_maps'>#SOM_maps<a>

In [43]:
def allMaps(somTrain,mapID,dataTrain,dataTest,dataDeimos):
    mapsAll={}

    mapCols=[photoZ_SED,photoZ_ML,specZ,residSED,residML,residML_SED]

    cellID='cellID_TrainSOM'
    som=somTrain
    mapsAll['mapsTrainOnTrain']=buildMaps(som,dataTrain,mapCols,idCol,cellID=cellID)
    mapsAll['mapsDeimosOnTrain']=buildMaps(som,dataDeimos,mapCols,idCol,cellID=cellID)
    mapsAll['mapsTestOnTrain']=buildMaps(som,dataTest,mapCols,idCol,cellID=cellID)
    
    ### Saving maps ###
    dirMaps='maps'
    try:
        os.mkdir(os.path.join(dirSOM,nameOfExperiment,dirMaps))
    except:
        pass
    for key,val in mapsAll.items():
        writeMaps(val,prefixName=os.path.join(dirSOM,nameOfExperiment,dirMaps,key+'_'+str(mapID).zfill(3)))
    return mapsAll

### Adding occupation info
<a id='addOccupation'>#addOccupation<a>

In [48]:
def addOccupationInfo(mapsAll,dataTrain,dataTest,dataDeimos):
    cellID='cellID_TrainSOM'
    activMap=mapsAll['mapsTrainOnTrain']['activMap']
    datasets=[dataTrain,dataTest,dataDeimos]
    for dataset in datasets:
        dataset=addOccupation(dataset,cellID,activMap)
    return dataTrain,dataTest,dataDeimos

### Adding outlier coefficients for specZ, ML and SED photoZ and quantErr
<a id='outlCoeff'>#outlCoeff<a>

In [34]:
def outlCoeffs(somTrain,mapID,dataTrain,dataTest,dataDeimos):
    filterCols=[specZ,photoZ_ML,photoZ_SED]
    maps=mapsAll['mapsTrainOnTrain']
    cellID='cellID_TrainSOM'
    for col in filterCols:
        dataTrain=calcOutlCoeff(dataTrain,maps['stdMaps'][col],maps['meanMaps'][col],col,cellID,prefix='')
        dataTest=calcOutlCoeff(dataTest,maps['stdMaps'][col],maps['meanMaps'][col],col,cellID,prefix='')
        dataDeimos=calcOutlCoeff(dataDeimos,maps['stdMaps'][col],maps['meanMaps'][col],col,cellID,prefix='')
    return dataTrain,dataTest,dataDeimos

### Saving datasets with SOM_info
<a id='save'>#save<a>

In [44]:
### Saving datasets with added data ###
def saveDataWithSOMInfo(mapID,dataTrain,dataTest,dataDeimos):
    dirDatasets='datasets'
    try:
        os.mkdir(os.path.join(dirSOM,nameOfExperiment,dirDatasets))
    except:
        pass
    datasets=[dataTrain,dataTest,dataDeimos]
    names=[nameTrain,nameTest,nameDeimos]
    for dataset,name in zip(datasets,names):
        dataset.to_csv(os.path.join(dirSOM,nameOfExperiment,
                                    dirDatasets,name.replace('.csv','_SOMinfo_'+str(mapID).zfill(3)+'.csv')),index=False)

In [51]:
for mapID in range(4,11):
    print(mapID)
    dataTrain,dataTest,dataDeimos=readCats()
    print('Catalogues read')
    somTrain=TrainSOM(mapID,dataTrain,mapIDD=mapID)
    print('SOM trained')
    dataTrain,dataTest,dataDeimos=addCellID(somTrain,dataTrain,dataTest,dataDeimos)
    print('Cell IDs added')
    mapsAll=allMaps(somTrain,mapID,dataTrain,dataTest,dataDeimos)
    print('Maps built')
    dataTrain,dataTest,dataDeimos=addOccupationInfo(mapsAll,dataTrain,dataTest,dataDeimos)
    print('Occupation info added')
    dataTrain,dataTest,dataDeimos=outlCoeffs(somTrain,mapID,dataTrain,dataTest,dataDeimos)
    print('Outl coeffs calculated')
    saveDataWithSOMInfo(mapID,dataTrain,dataTest,dataDeimos)
    print('Catalogues saved')

4
Catalogues read
4.99933427988708e-13
SOM trained
Cell IDs added
Maps built
Occupation info added
Outl coeffs calculated
Catalogues saved
5
Catalogues read
4.999889391399392e-13
SOM trained
Cell IDs added
Maps built
Occupation info added
Outl coeffs calculated
Catalogues saved
6
Catalogues read
4.999889391399392e-13
SOM trained
Cell IDs added
Maps built
Occupation info added
Outl coeffs calculated
Catalogues saved
7
Catalogues read
4.998224056862455e-13
SOM trained
Cell IDs added
Maps built
Occupation info added
Outl coeffs calculated
Catalogues saved
8
Catalogues read
4.997668945350142e-13
SOM trained
Cell IDs added
Maps built
Occupation info added
Outl coeffs calculated
Catalogues saved
9
Catalogues read
4.99933427988708e-13
SOM trained
Cell IDs added
Maps built
Occupation info added
Outl coeffs calculated
Catalogues saved
10
Catalogues read
4.99933427988708e-13
SOM trained
Cell IDs added
Maps built
Occupation info added
Outl coeffs calculated
Catalogues saved
