In [None]:
import pickle
import itertools
import pandas as pd

# 1) Setup

In [None]:
config_dict = {
    'sample': {'task': ['var-one'], #['var-one', 'kmmd-iso', 'mean-shift', 't-one', 'var-all'], 
               'v': [1.4], #[2.4, 1.6, 3, 1.4, 1.2], 
               'd': [2, 4, 8, 16],
               'k': [1, 3],
               'nP': [512],
               'nQ': [512],
               #---
               's_seed': [1234]
               },
    'optim': {'optim': ['ADAM'],
              'lr': [0.5], 
              'N': [2, 10],
              'tB': [100],
              'tI': [12],
              'lamb': [1],
              'optimizer': ['ADAM'],
              'device': ['cpu'],
              #---
              'o_seed': [1234],
              #---
              'rep_sample': [20],
              'rep_optim': [3],
              #---
              'smpl_type': ['box_2']
             }
}

In [None]:
# config_dict = {
#     'sample': {'task': ['var-one', 'kmmd-iso', 'mean-shift', 't-one', 'var-all'], 
#                'v': [2.4, 1.6, 3, 1.4, 1.2], 
#                'd': [2, 4, 8, 16],
#                'k': [0, 1, 2, 3],
#                'nP': [512],
#                'nQ': [512],
#                #---
#                's_seed': [1234]
#                },
#     'optim': {'optim': ['ADAM'],
#               'lr': [0.5], 
#               'N': [10],
#               'tB': [100],
#               'tI': [12],
#               'lamb': [1],
#               'optimizer': ['ADAM'],
#               'device': ['cpu'],
#               #---
#               'o_seed': [1234],
#               #---
#               'rep_sample': [100],
#               'rep_optim': [3],
#               #---
#               'smpl_type': ['box_2']
#              }
# }

In [None]:
# config_dict = {
#     'sample': {'task': ['var-one'],  #['var-one', 'kmmd-iso', 'mean-shift', 't-one', 'var-all'], 
#                'v': [1.4], #[2.4, 1.6, 3, 1.4, 1.2], 
#                'd': [2, 4, 8, 16],
#                'k': [1, 2, 3, 4],
#                'nP': [512],
#                'nQ': [512],
#                #---
#                's_seed': [1234]
#                },
#     'optim': {'optim': ['ADAM'],
#               'lr': [0.01, 0.5, 1, 5, 10], #[0.5],
#               'N': [1, 2, 5, 10],
#               'tB': [100],
#               'tI': [12],
#               'lamb': [1],
#               'optimizer': ['ADAM'],
#               'device': ['cpu'],
#               #---
#               'o_seed': [1234],
#               #---
#               'rep_sample': [20],
#               'rep_optim': [3],
#               #---
#               'smpl_type': ['box_2']
#              }
# }

## 1-1) Runtime estimation

In [None]:
# parallelization
cores = 36
one_rep_time = 130 # sec
rep_sample_adj = 5 # number of rep_sample used in `one_rep_time` observation
rep_optim_adj = 3 # number of rep_sample used in `one_rep_time` observation

# optim
setting = 9

# sample
for key in config_dict['sample'].keys():
    setting *= len(config_dict['sample'][key])

# calculate
rep_sample = config_dict['optim']['rep_sample'][0]
rep_optim  = config_dict['optim']['rep_optim'][0]
sec = one_rep_time * setting * (rep_sample /rep_sample_adj) * (rep_optim / rep_optim_adj)
h = sec / 3600
d = h / 24
print("Estimated:", d, "days.", end = " // ")
d = round(d/cores, 3)
h = d * 24
print(f"With {cores} cores, {d} days = {h} hours.")

## 1-2) Generate `config_all`

In [None]:
config_df_dict = {}
for t in ['sample', 'optim']:
    print(t)
    type_key = t+"_key"
    config = config_dict[t]
    keys = config.keys()
    output = pd.DataFrame(columns = [key for key in keys])
    
    combinations = list(itertools.product(*[config[key] for key in keys]))    
    
    for idx, combination in enumerate(combinations):
        one_config = {}
        for j, key in enumerate(keys):
            val = combination[j]
            one_config[key] = val
        
        one_config[type_key] = idx # Add sample_key or optim_key    
        
        ### --- Manual adjustment -- Start (if you need any adjustment, change this part)--- ###
        if t == 'sample':            
            # control v
            if one_config['nP'] != one_config['nQ']:
                continue
                
            if one_config['task'] == 'var-one':
                if (one_config['v'] != 1.4):
                    continue

            if one_config['task'] == 'var-all':
                if one_config['v'] != 1.2:
                    continue

            if one_config['task'] == 'mean-shift':
                if (one_config['v'] != 1.6):
                    continue

            if one_config['task'] == 'kmmd-iso':
                if (one_config['v'] != 2.4):
                    continue

            if one_config['task'] == 't-one':
                if (one_config['v'] != 3):
                    continue
                    
            print(one_config['task'], one_config['v'])
        ### --- Manual adjustment -- End --- ###
                    
        # Concatenate to 'output'
        one_config = pd.DataFrame(one_config, index = [idx])
        output = pd.concat([output, one_config])
    
    output[type_key] = output[type_key].astype(int)
    
    # Change columns order
    cols = output.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    output = output[cols]

    # Assign to 'config_df_dict'
    config_df_dict[t] = output
    print(output.shape)
    print(output, "\n")

In [None]:
config_all = config_df_dict['sample'].join(config_df_dict['optim'], how = 'cross')
config_all

In [None]:
# When k=0, remove other configurations except certain ones
mask_k     = (config_all['k'] == 0) 
mask_other = (config_all['lr'] != 0.5) | (config_all['N'] != 10)
mask_for_k0 = ~(mask_k & mask_other)
config_all = config_all[mask_for_k0]

config_all

## 1-3) Generate `array_dict`

In [None]:
# change this based on how many parallelization core you want
group_by_list = ['N', 'k', 'd'] 
#---
grouped = config_all.groupby(group_by_list).size()
grouped = grouped.reset_index()
array_dict = grouped[group_by_list]
array_dict

# 2) Save into two files

In [None]:
file_name = f"config_all.csv"
config_all.to_csv(file_name, index = False)
retrieve = pd.read_csv(file_name)
retrieve

In [None]:
file_name = f"array_dict.csv"
array_dict.to_csv(file_name, index = False)
retrieve = pd.read_csv(file_name)
retrieve

## 2-1) Check (not necessary)

In [None]:
"_".join(retrieve.columns.tolist())

In [None]:
array_this_time = retrieve.loc[[0]]
print(array_this_time)
array_keys = array_this_time.columns.tolist()
ARRAY_KEYS = "_".join(array_keys) # will be used at the end (saving file)

config_this_time = config_all
for key in array_keys:
    mask = (config_this_time[key] == array_this_time[key].item())
    # if key == 'v':
        # print(mask)
    config_this_time = config_this_time.loc[mask]

In [None]:
config_this_time