In [75]:
import numpy as np
import pandas as pd
import random2
import os #os의 경우 기본적으로 주어지기 때문에 setup.py에 하지 않는다.

## data

In [76]:
# change path to relative path - only for publishing
current_directory = os.path.dirname(os.path.abspath(__file__))
os.chdir(current_directory)

path = "./sampleData/concatenated_df.csv"
simul_data = pd.read_csv(path)

oPath = "./sampleData/"
O1 = pd.read_csv(oPath + 'O1.txt')
O2 = pd.read_csv(oPath + 'O2.txt')
O3 = pd.read_csv(oPath + 'O3.txt')

## simulation code

In [83]:
def simple_Simulation(p1: 'int', p2: 'int', p3: 'int', n = 10):
    '''
    to make simple simulation
    
    Parameters
    ----------
    p1 : parameter 1. range: 1 to 5
    p2 : parameter 2. range: 1 to 5
    p3 : parameter 3. range: 1 to 5
    n : the number of simulation runs

    Returns
    -------
    DataFrame
        A comma-separated values (csv) file is returned as two-dimensional
        data structure with labeled axes.

    Examples
    --------
    >>> simple_Simulation(p1 = 1, p2 = 3, p3 = 2, n = 11)
    '''
    
    global simul_data # globally declare
   
    # select data
    condition = (simul_data['p1'] == p1) & (simul_data['p2'] == p2) & (simul_data['p3'] == p3)
    filtered_df = simul_data[condition]
    
    dfs = []
    for i in range(n): # now, extracts by #n
        
        uniq_num = random2.choice(pd.unique(filtered_df['uniq_num']))
        chosen_df = filtered_df[filtered_df['uniq_num'] == uniq_num] #filter only uniq_num
    
        # now make new simulation data
        new_data = {
            'p1': [chosen_df['p1'].iloc[0]],
            'p2': [chosen_df['p2'].iloc[0]],
            'p3': [chosen_df['p3'].iloc[0]],
            'y1': [sorted(chosen_df['y1'].tolist())],
            'y2': [sorted(chosen_df['y2'].tolist())],
            'y3': [sorted(chosen_df['y3'].tolist())]
        }
        
        chosen_df = pd.DataFrame(new_data)

        dfs.append(chosen_df) # appended chosen_df
        
    result_df = pd.concat(dfs, axis=0, ignore_index=True) 
    
    # sort the list in the columns by ascending order
    def sort_list(lst):
        return sorted(lst)

    # apply 메서드를 사용하여 각 셀의 리스트들을 오름차순으로 정렬
    result_df['y1'] = result_df['y1'].apply(sort_list)
    result_df['y2'] = result_df['y2'].apply(sort_list)
    result_df['y3'] = result_df['y3'].apply(sort_list)

    
    return result_df

## 1) preprocessing (1) - Determine a criterions for calibration

In [90]:
# run multiple simulations

def multiple_simple_simulation(p1_list, p2_list, p3_list, M = 150, u = 0.1, k = 3):
    '''
    to make simple simulation results df by multiple parameters
    
    Parameters
    ----------
    p1: parameter 1. range: 1 to 5
    p2: parameter 2. range: 1 to 5
    p3: parameter 3. range: 1 to 5
    M: MonteCarlo index (default:100, too low:low accuracy, too high:computational intensity) 
    u = leniency index (default:0.1, too low:overfit, too high:uncertainty)
    k = the number of parameters (3)

    Returns
    -------
    DataFrame
        A comma-separated values (csv) file is returned as two-dimensional
        data structure with labeled axes.

    Examples
    --------
    >>> multi_simul_df = multiple_simple_simulation(p1_list, p2_list, p3_list, M = 150, u = 0.1, k = 3)
    '''    
    
    
    # list for saving all results dfs
    prep1_dfs = []
    
    for i in range(M*(2*k + 2)): #1200 times
        # set parameter space
        p_1 = random2.choice(p1_list)
        p_2 = random2.choice(p2_list)
        p_3 = random2.choice(p3_list)

        # run model and save
        tem_prep1_data = simple_Simulation(p1 = p_1, p2 = p_2, p3 = p_3, n = 1)

        # append temporal result to list
        prep1_dfs.append(tem_prep1_data)

    result_df = pd.concat(prep1_dfs, axis=0, ignore_index=True)

    return result_df

In [None]:
# Preprocessing (1): determining a criterion for calibration



In [89]:
# 사용자:
# 시뮬레이션을 무작위로 돌린다.

p1_list = [1,2,3,4,5]
p2_list = [1,2,3,4,5]
p3_list = [1,2,3,4,5]


# ---  run simulations for M(2k+2) times ---
multi_simul_df = multiple_simple_simulation(p1_list, p2_list, p3_list, M = 150, u = 0.1, k = 3) # set hyper parameters



In [91]:
multi_simul_df

Unnamed: 0,p1,p2,p3,y1,y2,y3
0,2,3,2,"[37.9, 85.2, 371.8, 409.3, 420.9, 484.7, 513.5...","[30.0, 60.0, 109.0, 112.0, 158.0, 178.0, 180.0...","[7.0, 10.0, 11.0, 13.0, 13.0, 15.0, 15.0, 16.0..."
1,4,2,3,"[21.9, 44.3, 56.3, 83.0, 110.3, 152.4, 175.7, ...","[15.0, 22.0, 23.0, 66.0, 91.0, 95.0, 111.0, 11...","[4.0, 5.0, 6.0, 9.0, 10.0, 10.0, 12.0, 13.0, 1..."
2,5,5,3,"[36.8, 40.6, 44.6, 79.4, 101.6, 139.0, 145.5, ...","[29.0, 33.0, 61.0, 65.0, 88.0, 127.0, 131.0, 1...","[6.0, 8.0, 8.0, 10.0, 11.0, 11.0, 12.0, 13.0, ..."
3,4,2,2,"[17.2, 23.8, 27.0, 27.3, 38.6, 72.9, 96.4, 120...","[4.0, 8.0, 15.0, 17.0, 21.0, 46.0, 65.0, 100.0...","[7.0, 8.0, 8.0, 8.0, 9.0, 10.0, 10.0, 13.0, 13..."
4,3,5,2,"[12.0, 13.49134262, 70.56986831, 125.9689488, ...","[5.2, 62.13213842, 73.97208761, 90.4286195, 12...","[5.281409106, 5.9, 16.43369918, 19.22397063, 1..."
...,...,...,...,...,...,...
1195,2,4,1,"[207.4, 271.7, 307.0, 537.9, 747.3, 829.1, 940...","[38.0, 58.0, 111.0, 156.0, 157.0, 179.0, 179.0...","[15.0, 18.0, 18.0, 20.0, 20.0, 20.0, 21.0, 21...."
1196,1,1,4,"[7.5, 8.3, 19.6, 25.0, 26.3, 29.2, 33.5, 34.3,...","[9.0, 14.0, 20.0, 21.0, 24.0, 28.0, 29.0, 30.0...","[2.0, 3.0, 4.0, 5.0, 6.0, 6.0, 7.0, 8.0, 8.0, ..."
1197,1,2,4,"[1.5, 10.4, 11.1, 13.9, 15.4, 20.0, 21.7, 30.4...","[3.0, 11.0, 13.0, 14.0, 16.0, 17.0, 21.0, 22.0...","[1.0, 3.0, 3.0, 5.0, 6.0, 6.0, 6.0, 6.0, 8.0, ..."
1198,2,1,2,"[50.9, 75.7, 144.4, 203.1, 231.5, 232.3, 358.8...","[40.0, 44.0, 61.0, 63.0, 80.0, 125.0, 137.0, 1...","[7.0, 12.0, 13.0, 14.0, 14.0, 14.0, 14.0, 15.0..."


In [84]:


prep1_dfs = []

for i in range(M*(2*k + 2)): #1200 times
    # set parameter space
    p_1 = random2.choice(p1_list)
    p_2 = random2.choice(p2_list)
    p_3 = random2.choice(p3_list)

    # run model and save
    tem_prep1_data = simple_Simulation(p1 = p_1, p2 = p_2, p3 = p_3, n = 1)
    
    # append temporal result to list
    prep1_dfs.append(tem_prep1_data)
    
result_df = pd.concat(prep1_dfs, axis=0, ignore_index=True)


In [85]:
result_df

Unnamed: 0,p1,p2,p3,y1,y2,y3
0,2,3,3,"[20.9, 22.5, 25.1, 38.2, 100.1, 122.9, 128.4, ...","[11.0, 20.0, 20.0, 24.0, 50.0, 64.0, 78.0, 105...","[6.0, 7.0, 7.0, 8.0, 8.0, 9.0, 10.0, 11.0, 11...."
1,5,1,2,"[149.5, 189.9, 222.4, 230.9, 268.8, 277.7, 297...","[66.0, 69.0, 73.0, 78.0, 79.0, 80.0, 86.0, 109...","[12.0, 14.0, 15.0, 16.0, 16.0, 16.0, 16.0, 17...."
2,1,5,5,"[0.0, 11.2, 12.8, 13.3, 14.8, 16.4, 24.0, 26.5...","[0.0, 16.0, 16.0, 17.0, 17.0, 19.0, 30.0, 31.0...","[0.0, 3.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, ..."
3,5,4,1,"[53.8, 56.1, 176.3, 274.9, 416.7, 481.4, 526.9...","[27.0, 28.0, 61.0, 99.0, 112.0, 148.0, 227.0, ...","[6.0, 7.0, 13.0, 14.0, 17.0, 17.0, 22.0, 23.0,..."
4,4,2,2,"[29.6, 58.0, 114.1, 127.7, 130.2, 139.0, 179.4...","[28.0, 38.0, 55.0, 61.0, 62.0, 95.0, 106.0, 11...","[7.0, 9.0, 13.0, 13.0, 14.0, 14.0, 14.0, 14.0,..."
...,...,...,...,...,...,...
1195,3,3,5,"[17.3, 22.6, 23.1, 27.3, 33.3, 40.9, 45.3, 45....","[18.0, 19.0, 19.0, 21.0, 26.0, 26.0, 35.0, 36....","[5.0, 6.0, 6.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, ..."
1196,3,1,4,"[3.6, 4.0, 11.9, 17.0, 18.8, 19.1, 21.1, 24.4,...","[4.0, 4.0, 9.0, 11.0, 19.0, 21.0, 21.0, 21.0, ...","[1.0, 2.0, 5.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, ..."
1197,5,5,5,"[92.1, 102.3, 154.9, 202.8, 261.0, 286.3, 310....","[65.0, 82.0, 84.0, 86.0, 94.0, 97.0, 148.0, 14...","[13.0, 16.0, 17.0, 19.0, 20.0, 23.0, 23.0, 23...."
1198,4,5,4,"[23.0, 47.6, 50.5, 51.0, 77.3, 91.4, 112.8, 11...","[25.0, 44.0, 44.0, 46.0, 78.0, 85.0, 98.0, 116...","[4.0, 6.0, 7.0, 7.0, 9.0, 9.0, 9.0, 10.0, 11.0..."


In [78]:
simul_data['uniq'] = simul_data['p1'].astype(str) + simul_data['p2'].astype(str) + simul_data['p3'].astype(str)

simul_data['uniq'] = simul_data['uniq'].astype(int)

In [71]:
all_list[0]

111