In [7]:
from typing import Dict, Any
import os
from load_ini import *
import numpy as np

In [8]:
# grab mappings list from the csv file, which should be in the same directory as the script
DIR_NAME = os.getcwd()
FILE_NAME = "metadataNaming.csv"    # contains all the parameters we include in the new modelname -- don't change please
FILE_PATH = os.path.join(DIR_NAME, FILE_NAME)
data, header = read_csv(FILE_PATH)

In [3]:
# create the dictionnary of mappings (copied from database read in script)
mappings = create_mapping(data, header)

In [9]:
"""
"settings" is technically not needed if we are working on a simple local host, but can be changed to optimise search performance on a database that is hosted on a cluster and searched by multiple users.
"mappings" is required if you wish to explicitly map fields to specific values
"""

index_definition = {
    "settings": {
        "number_of_shards": 1,
    },
    "mappings": {"properties": mappings},
}

In [4]:
def makeModelName(directory: str, prefix: str, index_definition) -> Dict[str, Any]:
    """Load the prefix.in and prefix.setup files to get the required information about the phantom model

    Args:
        directory (str): directory of the simulation
        prefix (str): prefix used for the files
        index_definition (dict): dictionary containing the mappings 

    Returns:
        dict: a dictionary containing the info from the setup and .in files -- that we will use to make the new model name
        (!! check units, they are not all in SI or cgs)
    """
    import os
    import sys

    setup = {}
    checkTriple=False
    # load the prefix.setup file
    try:
        with open(os.path.join(directory, "%s.setup" % prefix), "r") as data:
            for line in data:
                if len(line) <= 1 or line.startswith("#"):
                    # remove empty lines and headers
                    continue
                # Get labels and values
                label, _, value, *_ = line.strip().split()
                # Change labels to abbreviations to add in modelName
                if label == "primary_mass":
                    label = "m1"
                elif label == "secondary_mass":
                    label = "m2"
                elif label == "secondary_racc":
                    label = "racc2"
                elif label == "semi_major_axis":
                    label = "a"
                elif label == "eccentricity":
                    label ="e"
                elif label == "wind_gamma" or label == "temp_exponent":
                    label = "gamma"
                # # For triples
                elif label == "binary2_a":
                    label = "aIn"
                elif label == "binary2_e":
                    label = "eIn"
                #quantities that we need for triples but don't want in the setup dictionary because we first need to do some calculations
                elif label =='q2':
                    q2 = float(value)
                elif label == "racc2b" or label == "accr2b":
                    racc2b = float(value)
                elif label == "racc2a" or label == "accr2a": #for subst=12
                    racc2a = float(value)


                # parameters that we usually don't change
                elif label == "inclination" and float(value)>0:
                    label = 'incl'
                elif label == "icompanion_star" and float(value)!=1:
                    label ="icompstar"
                    checkTriple = True

                # Store variable with the type stored in the index definition
                if (label in index_definition["mappings"]["properties"]) and (
                    index_definition["mappings"]["properties"][label]["type"] == "float"
                ):
                    setup[label] = np.round(float(value),3)
                elif (label in index_definition["mappings"]["properties"]) and (
                    index_definition["mappings"]["properties"][label]["type"]
                    == "integer"
                ):                   
                    setup[label] = int(value)

                elif (label in index_definition["mappings"]["properties"]) and (
                    index_definition["mappings"]["properties"][label]["type"]
                    == "string"
                ):
                    setup[label] = str(value)


    except FileNotFoundError:
        print("")
        print(" ERROR: No %s.setup file found!" % prefix)
        print(directory)
        print("")
        sys.exit()


    if checkTriple==True:
        if setup["icompstar"]==2:
            if setup['subst']==11:  
                #primary mass Mp is divided into m1 and m2, with Mp=m1+m2 and q=m2/m1, so m1=Mp/(1+q)
                setup["m1"]=np.round(float(setup["m1"]/(1+q2)),3)
                #tertiary mass is the original secondary
                setup["m3"]=setup["m2"]
                setup["racc3"]=setup["racc2"]
                #secondary mass is m1*q
                setup["m2"]=np.round(float(setup["m1"]*q2),3)
                setup["racc2"]= racc2b
            elif setup['subst']==12: #primary mass is original primary mass, original secondary is divided into m2 and m3
                setup["m2"]=np.round(float(setup["m2"]/(1+q2)),3)
                setup["m3"]=np.round(float(setup["m2"]*q2),3)
                setup["racc2"]=racc2a
                setup["racc3"]=racc2b
        # print('the stars masses are m1, m2, m3: ',setup["m1"],setup["m2"],setup["m3"])

    # load the prefix.in file
    try:
        with open(os.path.join(directory, "%s.in" % prefix), "r") as data:
            for line in data:
                if len(line) <= 1 or line.startswith("#"):
                    # remove empty lines and headers
                    continue
                # Get labels and values
                label, _, value, *_ = line.strip().split()
                # Change labels to make them easier to read for the human eye
                if label == "wind_mass_rate":
                    label = "mlr"
                elif label == "wind_velocity":
                    label = "vw"
                elif label == "wind_inject_radius":
                    label = "Rinj"
                elif label == "wind_temperature":
                    label = "Tw"
                # elif label == "mu":
                #     label = "mu"
                elif label == "ieos":
                    label = "eos"
                elif label == "outer_boundary":
                    label = "bound"
                elif label == "iwind_resolution":
                    label = "iwr"
                elif label == "wind_shell_spacing":
                    label = "wss"
                elif label == "excitation_HI":
                    label = "HIexcit"
                elif label == "icool_method":
                    label = "icoolmeth"

                # Add labels of parameters we generally don't change
                elif (label == 'C_cool' and float(value) != 0.05):
                    label = 'Ccool'
                    setup[label] = np.round(float(value),3)
                elif (label == 'C_force' and float(value) != 0.25):
                    label = 'Cforce'
                    setup[label] = np.round(float(value),3)


                # Booleans
                if value == "F":
                    value = 0


                # Store variable with the type stored in the index definition
                if (label in index_definition["mappings"]["properties"]) and (
                    index_definition["mappings"]["properties"][label]["type"] == "float"
                ):
                    if label =="mlr":
                        setup[label] = float(value)
                    else:
                        setup[label] = np.round(float(value),3)

                elif (label in index_definition["mappings"]["properties"]) and (
                    index_definition["mappings"]["properties"][label]["type"]
                    == "integer"
                ):
                    setup[label] = int(value)
                elif (label in index_definition["mappings"]["properties"]) and (
                    index_definition["mappings"]["properties"][label]["type"]
                    == "string"
                ):
                    setup[label] = str(value)

    except FileNotFoundError:
        print("")
        print(" ERROR: No %s.in file found!" % prefix)
        print("")
        sys.exit()


    return setup


In [6]:
def makeModelDirDatabase(DIR,MODELS,PREFIX,DATABASEDIR):
    # modelNames=[]
    i=1 # to count #models
    modelNames=set()
    modelDict={}
    duplicates={}
    for modelDir in MODELS:

        if 'modelsDatabase' in modelDir:
            # print('Model is already in the database, don't try to copy or move it again: ',modelDir)
            continue
        # elif '...' in modelDir:  # if you already copied some models that you don't want to copy again --- working with rsync would solve this ;)
            # print('already copied: ',modelDir)
            # continue
        else:
            # Make the modelName
            metaData=makeModelName(os.path.join(DIR, modelDir),PREFIX,index_definition)
            modelName =""
            for key in metaData:
                modelName+=(str(key)+'_'+str(metaData[key])+'_')
            # Remove last underscore
            modelName=modelName.strip('_')

            # If there is a model with the same modelName, add it to a duplicates dictionary so you can check it
            if modelName in modelNames:
                # print('modelName already exists. Check which model you want to keep!')
                duplicates[modelDir]=modelDict[modelName]
            
            # else: #(uncomment if you dont want to add duplicates)
            # Add modelname to modelNames set
            modelNames.add(modelName)
            # Add the original model folder to a modelDictionary if you want
            modelDict[modelName]=modelDir
            
            # Copy and move entire model folders:
            # Check if folder with new modelName already exists 
            if os.path.isdir(DATABASEDIR + modelName)==False:
                !mkdir /STER/hydroModels/jolienm/modelsDatabase/$modelName
                print('made directory',DATABASEDIR,modelName)

            # if ... in modelDir:  # If statement to distinguish between models you want to copy instead of move
            print('Copy model',modelDir)#, ' to ',modelName)
            !cp -r -p $DIR$modelDir/ $DATABASEDIR$modelName
                # continue
            # else: # If statement to distinguish between models you want to copy or move
                # print('moving model ',DIR,modelDir, ' to ',modelName)
                # !mv $DIR$modelDir/ $DATABASEDIR$modelName
            # print('model' ,i)
            i+=1 # to count #models

    if len(duplicates)>0:
        print('Check the duplicates dictionary, it is not empty!')
    else:
        print('There are no duplicates :) ')
    # print('there are ',i,' models')
    return modelNames,duplicates,modelDict

In [7]:
def searchModels(loc, prefix):
    result = []
    for path, directories, files in os.walk(loc):
        dumpFiles = list(filter(lambda x: (prefix+'_' in x and not '.tmp' in x and not '.txt' in x and not '.png' in x), files))
        setupFiles = list(filter(lambda x: (prefix+'.setup' in x), files))
        inFiles = list(filter(lambda x: (prefix+'.in' in x), files))       
        # Only include models with 20 dumpfiles, because the others can be trown away
        if len(dumpFiles) > 15 and len(setupFiles) >0 and len(inFiles)>0:
            slicedString = path.replace(loc, "")
            result.append(slicedString)
    return sorted(result)

In [8]:
PREFIX = "wind"
# Directory where you want to look for models
DIR='/STER/silkem/'
# Looks for all models in this directory that have a PREFIX.in, PREFIX.setup and >15 PREFIX_... (dump) files
MODELS = searchModels(DIR,PREFIX)
# Directory where you want to copy or move your models to
DATABASEDIR = '/STER/hydroModels/modelsDatabase/'


In [None]:
# print(MODELS) #OG 940 - 816 - 709 - 400 with correct filter - 341 - 334 - 320 - 313 - 309 

In [10]:
# Copy or move your model folders to the database directory
modelNames,duplicates,modelDict=makeModelDirDatabase(DIR,MODELS,PREFIX,DATABASEDIR)


Copy model ThesisMaster/Mastrodemos_test_full
Copy model ThesisMaster/phantom_Masterthesis/P40slow_Leen_20210402
Copy model ThesisMaster/phantom_Masterthesis/desktop_run20
Copy model ThesisMaster/phantom_Masterthesis/desktop_run21
Copy model ThesisMaster/phantom_Masterthesis/desktop_run22
Copy model ThesisMaster/phantom_Masterthesis/desktop_run23
Copy model ThesisMaster/phantom_Masterthesis/desktop_run24
Copy model ThesisMaster/phantom_Masterthesis/desktop_run25
Copy model ThesisMaster/phantom_Masterthesis/desktop_run26
Copy model ThesisMaster/phantom_Masterthesis/desktop_run27
Copy model ThesisMaster/phantom_Masterthesis/desktop_run28
Copy model ThesisMaster/phantom_Masterthesis/desktop_run29
Copy model ThesisMaster/phantom_Masterthesis/desktop_run30
Copy model ThesisMaster/phantom_Masterthesis/desktop_run31
Copy model ThesisMaster/phantom_Masterthesis/desktop_run32
Copy model ThesisMaster/phantom_Masterthesis/desktop_run33
Copy model ThesisMaster/phantom_Masterthesis/desktop_run34
Co

In [11]:
print(duplicates)

{'ThesisMaster/phantom_Masterthesis/desktop_run37': 'ThesisMaster/phantom_Masterthesis/desktop_run31'}


In [35]:
# def fixDatabase(DIR,MODELS,PREFIX):
#     # modelNames=[]
#     i=1
#     modelNames=set()
#     modelDict={}
#     duplicates={}
#     for modelDir in MODELS:
       
#         metaData=makeModelName(os.path.join(DIR, modelDir),PREFIX,index_definition)
#         modelName =""
#         for key in metaData:
#             modelName+=(str(key)+'_'+str(metaData[key])+'_')
#         modelName=modelName.strip('_')
#             # continue


#         modelNames.add(modelName)
#         modelDict[modelName]=modelDir
        
#         # To copy and move files
#         !cd $DIR
#         if os.path.isdir(modelName)==False:
#             print('Directory ',modelName,' didnt exist?')
#             !mkdir $modelName
            
#         print('moving model ',modelDir)
#         print('to ',modelName)
#         !mv $DIR$modelDir/ $modelName
#         # print('model' ,i)
#         i+=1

#         # modelNames.append(modelName)

#     print('there are ',i,' different models')
#     return modelNames,duplicates,modelDict

In [46]:
# Because I broke something
# PREFIX = "wind"
# DIR='/STER/hydroModels/jolienm/modelsDatabase/'
# MODELSindatabase = searchModels(DIR,PREFIX)
# # print(MODELS)
# databaseNames,databaseDupl,databseDict=fixDatabase(DIR,MODELSindatabase,PREFIX)