In [8]:
import ast
import numpy as np
import pandas as pd
import scipy.stats as stats


class DataGenerator:
    def __init__(self,
                 block_data_path,
                 bay_data_path,
                 num_blocks=50,
                 time_horizon=30,
                 iat_avg=0.1,
                 buffer_avg=1.5,
                 weight_factor=0.7,
                 fix_time_horizon=False):

        self.block_data_path = block_data_path
        self.bay_data_path = bay_data_path
        self.num_blocks = num_blocks
        self.time_horizon = time_horizon
        self.iat_avg = iat_avg
        self.buffer_avg = buffer_avg
        self.weight_factor = weight_factor
        self.fix_time_horizon = fix_time_horizon

        self.df_bay = pd.read_excel(bay_data_path, sheet_name="bays", engine="openpyxl")
        self.df_count = pd.read_excel(block_data_path, sheet_name="count", engine="openpyxl") # 그룹 개수에 대한 데이터프레임
        self.df_length = pd.read_excel(block_data_path, sheet_name="length", engine="openpyxl")  # 그룹 별 블록 길이 분포에 대한 데이터프레임
        self.df_breadth = pd.read_excel(block_data_path, sheet_name="breadth", engine="openpyxl")  # 그룹 별 블록 폭 분포에 대한 데이터프레임
        self.df_height = pd.read_excel(block_data_path, sheet_name="height", engine="openpyxl")  # 그룹 별 블록 높이 분포에 대한 데이터프레임
        self.df_weight =  pd.read_excel(block_data_path, sheet_name="weight", engine="openpyxl") # 그룹 별 중량 모델에 대한 데이터프레임
        self.df_h1 = pd.read_excel(block_data_path, sheet_name="h1", engine="openpyxl") # 그룹 별 H01 모델에 대한 데이터프레임
        self.df_h2 = pd.read_excel(block_data_path, sheet_name="h2", engine="openpyxl") # 그룹 별 H02 모델에 대한 데이터프레임
        self.df_duration = pd.read_excel(block_data_path, sheet_name="duration", engine="openpyxl") # 그룹 별 duration 모델에 대한 데이터프레임
        self.df_sample = pd.read_excel(block_data_path, sheet_name="sample", engine="openpyxl")

    def generate_group(self):  # 그룹을 선택한 후 선종과 블록 종류로 나누기 위한 함수
        # 그룹을 랜덤으로 선택->선종과 블록 타입으로 분리
        group_code = np.random.choice(self.df_count['group'], p=self.df_count['proportion'])
        ship_type = group_code[0:2]
        block_type = group_code[-1]

        return group_code, ship_type, block_type

    def generate_process(self, group_code):         # 공종 명칭 생성 함수, 공정이 나오는 비율에 맞춰서 데이터 생성
        df_process_count = self.df_count[self.df_count['group'] == group_code]

        # 각 count 값을 올바르게 추출
        count = df_process_count['count'].values[0]
        panel_proportion = df_process_count['panel_count'].values[0] / count
        curve_proportion = df_process_count['curve_count'].values[0] / count
        big_proportion = df_process_count['big_count'].values[0] / count
        final_proportion = df_process_count['final_count'].values[0] / count

        proportion_list = [panel_proportion, curve_proportion, big_proportion, final_proportion]
        process_type = np.random.choice(['평중조', '곡중조', '대조중조', 'Final조립'], p=proportion_list)

        return process_type

    def generate_property(self, group_code, process_type, property='L'):
        if property == 'L':
            df_property = self.df_length.copy(deep=False)
        elif property == 'B':
            df_property = self.df_breadth.copy(deep=False)
        elif property == 'H':
            df_property = self.df_height.copy(deep=False)
        else:
            raise Exception("Invalid property")

        df_property['best_params'] = df_property['best_params'].apply(ast.literal_eval)
        df_property = df_property[(df_property['group'] == group_code)
                                  & (df_property['process_type'] == process_type)]
        best_distribution_name = df_property['best_distribution_name'].values[0]
        best_params = df_property['best_params'].values[0]
        min_value = df_property['min'].values[0]
        max_value = df_property['max'].values[0]

        if best_distribution_name == 'cauchy':
            property_value = stats.cauchy.rvs(*best_params)
        elif best_distribution_name == 'expon':
            property_value = stats.expon.rvs(*best_params)
        elif best_distribution_name == 'gamma':
            property_value = stats.gamma.rvs(*best_params)
        elif best_distribution_name == 'norm':
            property_value = stats.norm.rvs(*best_params)
        elif best_distribution_name == 'exponpow':
            property_value = stats.exponpow.rvs(*best_params)
        elif best_distribution_name == 'lognorm':
            property_value = stats.lognorm.rvs(*best_params)
        elif best_distribution_name == 'powerlaw':
            property_value = stats.powerlaw.rvs(*best_params)
        elif best_distribution_name == 'reyleigh':
            property_value = stats.reyleigh.rvs(*best_params)
        elif best_distribution_name == 'uniform':
            property_value = stats.uniform.rvs(*best_params)
        else:
            property_value = 0
            # raise Exception("Invalid distriution")

        if property_value > max_value:
            property_value = max_value
        elif property_value < min_value:
            property_value = min_value

        property_value = np.floor(property_value * 10) / 10

        return property_value

    def generate_weight(self, group_code, process_type, length, breadth, height):
        if group_code not in ['CN_T', 'LN_D', 'VL_D']:
            df_weight = self.df_weight[self.df_weight['group'] == group_code]
        else:
            if group_code == 'CN_T': # CN_T: CN_D의 모델 사용
                df_weight = self.df_weight[self.df_weight['group'] == 'CN_D']
            elif group_code == 'LN_D': # LN_D: LN_E의 모델 사용
                df_weight = self.df_weight[self.df_weight['group'] == 'LN_E']
            elif group_code == 'VL_D': # VL_D: VL_B의 모델 사용
                df_weight = self.df_weight[self.df_weight['group'] == 'VL_B']

        reg_coef = df_weight['coef'].values[0]
        noise = df_weight['std'].values[0]
        min_value = df_weight['min'].values[0]
        max_value = df_weight['max'].values[0]

        volume = length * breadth * height

        if process_type == 'Final조립':
            weight = reg_coef * volume + np.random.normal(0, noise)
        else: # 중조 무게 피팅
            weight = reg_coef * volume * self.weight_factor + np.random.normal(0, noise)

        if weight < min_value:
            weight = min_value
        elif weight > max_value:
            weight = max_value

        weight = np.int64(weight)

        return weight

    def generate_workload_h1(self, group_code, length, breadth):
        df_h1 = self.df_h1[self.df_h1['group'] == group_code]

        reg_coef = [df_h1['coef_0'].values[0],
                    df_h1['coef_1'].values[0],
                    df_h1['coef_2'].values[0]]
        noise = df_h1['std'].values[0]
        min_value = df_h1['min'].values[0]

        workload_h1 = (reg_coef[0] * length + reg_coef[1] * breadth
                       + reg_coef[2] * (length * breadth) + np.random.normal(0, noise))

        if workload_h1 < min_value:
            workload_h1 = min_value

        workload_h1 = np.int64(workload_h1)

        return workload_h1

    def generate_workload_h2(self, group_code, workload_h1):
        df_h2 = self.df_h2[self.df_h2['group'] == group_code]

        reg_coef = df_h2['coef'].values[0]
        noise = df_h2['std'].values[0]
        min_value = df_h2['min'].values[0]
        max_value = df_h2['max'].values[0]

        workload_h2 = reg_coef * workload_h1 + np.random.normal(0, noise)

        if workload_h2 < min_value:
            workload_h2 = min_value
        elif workload_h2 > max_value:
            workload_h2 = max_value

        workload_h2 = np.int64(workload_h2)

        return workload_h2

    def generate_duration(self, group_code, workload_h1, workload_h2, weight):
        df_duration = self.df_duration[self.df_duration['group'] == group_code]

        reg_coef = [df_duration['coef_0'].values[0],
                    df_duration['coef_1'].values[0],
                    df_duration['coef_2'].values[0]]
        noise = df_duration['std'].values[0]
        min_value = df_duration['min'].values[0]

        duration = (reg_coef[0] * workload_h1 + reg_coef[1] * workload_h2
                    + reg_coef[2] * weight + np.random.normal(0, noise))

        if duration < min_value:
            duration = min_value

        duration = np.int64(duration)

        return duration

    def calculate_buffer(self, process_type):  # column에 들어가는 값은 아님
        if process_type == 'Final조립':
            buffer = 2
        else:
            p = 1 / (1 + self.buffer_avg)
            buffer = stats.geom.rvs(p) - 1

        return buffer

    def check_eligibility(self, group_code, breadth, height, weight):
        df_eligible_bay = self.df_bay[(breadth <= self.df_bay["block_breadth"]) &
                                      (height <= self.df_bay["block_height"]) &
                                      (weight <= self.df_bay["block_weight"])]

        if len(df_eligible_bay) == 0:
            df_possible_bay = self.df_breadth.copy()
            df_possible_bay['bay'] = df_possible_bay['bay'].apply(ast.literal_eval)
            df_possible_bay = df_possible_bay[df_possible_bay['group'] == group_code]

            possible_properties = []
            for bay in df_possible_bay['bay'].values[0]:
                bay_breadth = self.df_bay[self.df_bay['bay_name'] == bay]['block_breadth'].values[0]
                bay_height = self.df_bay[self.df_bay['bay_name'] == bay]['block_height'].values[0]
                bay_properties = (bay_breadth, bay_height)
                possible_properties.append(bay_properties)

            idx = np.random.choice(len(possible_properties))
            possible_property = possible_properties[idx]
            breadth = possible_property[0]
            height = possible_property[1]

            df_weight = self.df_bay["block_weight"][(breadth <= self.df_bay["block_breadth"]) &
                                                    (height <= self.df_bay["block_height"])]

            weight = df_weight.max()

        return breadth, height, weight

    def generate(self, file_path=None):
        columns = ["block_name", "block_id", "ship_type", "block_type", "process_type",
                   "length", "breadth", "height", "weight", "workload_h1", "workload_h2",
                   "start_date", "duration", "due_date", "pre_buffer", "post_buffer"]

        df_blocks = []

        num_blocks = 0
        start_date = 0

        while True:
            if self.fix_time_horizon:
                if start_date >= self.time_horizon:
                    flag = True
                    del df_blocks[-1]
                else:
                    flag = False
            else:
                if num_blocks == self.num_blocks:
                    flag = True
                else:
                    flag = False

            if flag:
                break

            name = "J-%d" % num_blocks
            id = num_blocks

            # 데이터 생성
            group_code, ship_type, block_type = self.generate_group()
            process_type = self.generate_process(group_code)

            if num_blocks == 0:
                start_date = 0  # 첫번째 착수일은 0으로 고정
            else:
                p = 1 / (1 + self.iat_avg)
                start_date += stats.geom.rvs(p) - 1  # 이전 착수일에 interval을 더하는 형식으로 계산

            if group_code not in ['BC_A', 'BC_S', 'PT_D', 'PT_L', 'PT_R',
                                  'VL_A', 'VL_B', 'VL_D', 'VL_E', 'VL_F', 'VL_S']:

                length = self.generate_property(group_code, process_type, 'L')
                breadth = self.generate_property(group_code, process_type, 'B')
                height = self.generate_property(group_code, process_type, 'H')

                weight = self.generate_weight(group_code, process_type, length, breadth, height)

                breadth, height, weight = self.check_eligibility(group_code, breadth, height, weight)

                workload_h1 = self.generate_workload_h1(group_code, length, breadth)
                workload_h2 = self.generate_workload_h2(group_code, workload_h1)

                duration = self.generate_duration(group_code, workload_h1, workload_h2, weight)

            else: # 샘플링된 그룹에 대한 처리, 한 행의 데이터를 그대로 가져오는 식으로 구현
                df_sample = self.df_sample[self.df_sample['group'] == group_code]
                df_sample = df_sample[df_sample['process_type'] == process_type]
                df_sample.reset_index(inplace=True)
                idx = np.random.choice(range(df_sample.shape[0]))

                length = df_sample.loc[idx, 'length']
                breadth = df_sample.loc[idx, 'breadth']
                height = df_sample.loc[idx, 'height']

                if process_type == 'Final조립':
                    weight = df_sample.loc[idx, 'weight']
                else:
                    weight = self.generate_weight(group_code, process_type, length, breadth, height)

                workload_h1 = df_sample.loc[idx, 'workload_h1']
                workload_h2 = df_sample.loc[idx, 'workload_h2']

                duration = df_sample.loc[idx, 'duration']

            pre_buffer = 5
            post_buffer = self.calculate_buffer(process_type)

            due_date = start_date + duration + post_buffer - 1

            row = [name, id, ship_type, block_type, process_type,
                   length, breadth, height, weight, workload_h1, workload_h2,
                   start_date, duration, due_date, pre_buffer, post_buffer]

            df_blocks.append(row)
            num_blocks += 1

        df_blocks = pd.DataFrame(df_blocks, columns=columns)

        if file_path is not None:
            writer = pd.ExcelWriter(file_path)
            df_blocks.to_excel(writer, sheet_name="blocks", index=False)
            writer.close()

        return df_blocks


if __name__ == '__main__':
    import os

    # validation data generation
    block_data_path = "../input/configurations/block_data.xlsx"
    bay_data_path = "../input/configurations/bay_data.xlsx"
    # num_blocks = 50
    time_horizon = 30

    data_src = DataGenerator(block_data_path,
                             bay_data_path,
                             time_horizon=time_horizon,
                             fix_time_horizon=True)

    file_dir = "../input/validation/"
    if not os.path.exists(file_dir):
        os.makedirs(file_dir)

    iteration = 20
    for i in range(1, iteration + 1):
        file_path = file_dir + "instance-{0}.xlsx".format(i)
        df_blocks = data_src.generate(file_path)

In [92]:
block_data_path = "../input/configurations/block_data.xlsx"
bay_data_path = "../input/configurations/bay_data.xlsx"
data_gen = DataGenerator(block_data_path, bay_data_path)
weight_ex = data_gen.generate_weight('CN_S', 'Final조립', 17.4, 22.5, 10.7)

In [93]:
data_gen.df_length

Unnamed: 0,group,process_type,best_distribution_name,best_params,min,max,bay
0,BC_A,Final조립,chi2,"(1.1039935775373761, 16.499999999999996, 0.795...",16.5,16.5,"('M033', 'M031')"
1,BC_A,대조중조,chi2,"(1.0339940476928495, 17.999999999999996, 0.962...",18.0,18.0,"('M032',)"
2,BC_B,Final조립,chi2,"(1.0339996837908543, 18.499999999999996, 0.962...",18.5,18.5,"('M025-2',)"
3,BC_B,곡중조,exponpow,"(0.15267893342443767, 8.999999999999998, 1.645...",9.0,20.0,"('M222', 'M012', 'M011', 'M023', 'M221')"
4,BC_E,Final조립,chi2,"(0.8591294242278575, 10.499999999999998, 1.164...",10.5,21.0,"('M212', 'M211', 'M031')"
...,...,...,...,...,...,...,...
121,VL_F,Final조립,powerlaw,"(0.13762224637431697, 9.499999999999998, 6.000...",9.5,15.5,"('M032', 'M025-2', 'M033', 'M031')"
122,VL_F,곡중조,chi2,"(1.103993593407678, 6.999999999999999, 0.79518...",7.0,7.0,"('M022', 'M011', 'M012')"
123,VL_F,대조중조,chi2,"(1.1039935968961743, 17.499999999999996, 0.795...",17.5,17.5,"('M032', 'M031')"
124,VL_S,Final조립,gamma,"(0.27551004048270705, 17.499999999999996, 1.34...",17.5,20.0,"('M033', 'M025-2', 'M025-1', 'M031')"


In [48]:
weight_ex

234

In [16]:
H01_ex = data_gen.generate_workload_h1('CN_S', 21.5, 22.5)
H02_ex = data_gen.generate_workload_h2('CN_S', H01_ex)

In [17]:
H01_ex

1540

In [18]:
H02_ex

1556

In [19]:
duration_ex = data_gen.generate_duration('CN_S', H01_ex, H02_ex, weight_ex)

In [20]:
duration_ex

14

In [33]:
column = ['iat_avg', 'buffer_avg', 'num_instance',
          'block_max_avg', 'area_max_avg', 'workload_h1_max_avg', 'workload_h2_max_avg',
          'block_avg_avg', 'area_avg_avg', 'workload_h1_avg_avg', 'workload_h2_avg_avg']

df_capacity = []

buffer_avg_list = []

for i in range(1, 16):
    buffer_avg = i * 0.1
    buffer_avg = np.floor(buffer_avg * 10) / 10
    buffer_avg_list.append(buffer_avg)

for buffer in buffer_avg_list:
    iat = 0.1
    file_path = f'../data/validation_result/validation_result_iat{iat}_buffer{buffer}.xlsx'
    validation_df = pd.read_excel(file_path)


    block_max_avg = validation_df['block_max'].mean()
    area_max_avg = validation_df['area_max'].mean()
    workload_h1_max_avg = validation_df['workload_h1_max'].mean()
    workload_h2_max_avg = validation_df['workload_h2_max'].mean()

    block_avg_avg = validation_df['block_avg'].mean()
    area_avg_avg = validation_df['area_avg'].mean()
    workload_h1_avg_avg = validation_df['workload_h1_avg'].mean()
    workload_h2_avg_avg = validation_df['workload_h2_avg'].mean()

    row = [iat, buffer, 20,
           block_max_avg, area_max_avg, workload_h1_max_avg, workload_h2_max_avg,
           block_avg_avg, area_avg_avg, workload_h1_avg_avg, workload_h2_avg_avg]

    df_capacity.append(row)

df_capacity = pd.DataFrame(df_capacity, columns=column)
df_capacity.to_excel('../data/validation_result/avg_result/validation_iat_0.1_buffer.xlsx', index=False)

In [49]:
breadth = 22.5
height = 10.7
df_weight = data_gen.df_bay["block_weight"][(breadth <= data_gen.df_bay["block_breadth"]) &
                                                    (height <= data_gen.df_bay["block_height"])]

In [51]:
data_gen.check_eligibility(breadth, height, weight_ex)

(22.5, 10.7, nan)

In [50]:
df_weight

Series([], Name: block_weight, dtype: int64)

In [34]:
data_gen.df_bay

Unnamed: 0,bay_name,bay_id,team_name,capacity_h1,capacity_h2,bay_breadth,bay_length,block_breadth,block_height,block_weight,block_turnover_weight
0,M012,0,A03+A04,1900.0,1300,22,40,22.0,6.5,130,50
1,M011,1,B07,1700.0,1800,22,50,22.0,6.5,130,50
2,M010,2,A11,8200.0,12800,25,180,20.5,100.0,52,500
3,M021,3,B04,4500.0,3500,16,18,39.0,8.0,90,30
4,M022,4,A01+A02,900.0,900,17,80,39.0,8.0,70,30
5,M023,5,B04,1300.0,1200,17,35,39.0,8.0,70,30
6,M024,6,B04,1200.0,1300,17,30,39.0,8.0,313,200
7,M025-1,7,A05+B06,1700.0,1900,18,90,39.0,8.0,313,200
8,M025-2,8,B01,1799.85,1900,18,90,39.0,8.0,313,200
9,M031,9,B05,1200.0,1200,23,64,25.2,10.5,436,200


In [35]:
df_bay = data_gen.df_bay

bay_area_sum = 0

for i in range(df_bay.shape[0]):
    bay_area = df_bay.loc[i, 'bay_breadth'] * df_bay.loc[i, 'bay_length']
    bay_area_sum += bay_area

In [36]:
file_path = '../data/validation_result/avg_result/validation_avg_result.xlsx'
df_iat_fix = pd.read_excel(file_path, sheet_name='iat_0.1_fix')
area_max_proportion_list = []
area_avg_proportion_list = []

for i in range(df_iat_fix.shape[0]):
    area_max_proportion = df_iat_fix.loc[i, 'area_max_avg'] / bay_area_sum
    area_avg_proportion = df_iat_fix.loc[i, 'area_avg_avg'] / bay_area_sum
    area_max_proportion_list.append(area_max_proportion)
    area_avg_proportion_list.append(area_avg_proportion)

df_iat_fix.insert(7, 'area_max_proportion', area_max_proportion_list)
df_iat_fix['area_avg_proportion'] = area_avg_proportion_list

In [37]:
df_iat_fix

Unnamed: 0,iat_avg,buffer_avg,num_instance,block_max_avg,area_max_avg,workload_h1_max_avg,workload_h2_max_avg,area_max_proportion,block_avg_avg,area_avg_avg,workload_h1_avg_avg,workload_h2_avg_avg,area_avg_proportion
0,0.1,0.1,20,77.7,14024.75,7284.667574,6693.000039,0.492252,33.286887,6466.602548,2928.764488,2671.821942,0.22697
1,0.1,0.2,20,79.5,14398.8,7426.869911,6995.101697,0.505381,35.044148,6690.746071,3030.97145,2771.36372,0.234837
2,0.1,0.3,20,77.6,13682.3,7257.227737,7110.318733,0.480232,33.592386,6389.450962,2934.84946,2663.550395,0.224262
3,0.1,0.4,20,79.35,14490.3,7451.289443,6918.084444,0.508592,33.645043,6551.303372,2916.411959,2641.763748,0.229943
4,0.1,0.5,20,73.3,13120.65,6742.93624,6391.346363,0.460519,32.161974,6164.653359,2821.572853,2559.776381,0.216372
5,0.1,0.6,20,73.55,13078.2,7007.970454,6662.020293,0.459029,31.282571,6062.602385,2726.471657,2519.364233,0.21279
6,0.1,0.7,20,77.35,14229.1,7531.487495,6820.453826,0.499424,32.640224,6236.754535,2892.572802,2650.898824,0.218903
7,0.1,0.8,20,77.2,14048.15,7229.420607,6629.261858,0.493073,30.80422,5928.553055,2690.222919,2445.765496,0.208085
8,0.1,0.9,20,79.1,14257.35,7492.324719,6921.683828,0.500416,33.315753,6394.437087,2897.228126,2658.965845,0.224437
9,0.1,1.0,20,76.5,13993.5,7175.264694,6716.169872,0.491155,32.23383,6240.330748,2833.275706,2553.534037,0.219028


In [38]:
df_case_list = []

for sheet in ['buffer_1.5_fix', 'iat+buffer_1.6']:
    df_case = pd.read_excel(file_path, sheet_name=sheet)

    area_max_proportion_list = []
    area_avg_proportion_list = []

    for i in range(df_case.shape[0]):
        area_max_proportion = df_case.loc[i, 'area_max_avg'] / bay_area_sum
        area_avg_proportion = df_case.loc[i, 'area_avg_avg'] / bay_area_sum
        area_max_proportion_list.append(area_max_proportion)
        area_avg_proportion_list.append(area_avg_proportion)

    df_case.insert(7, 'area_max_proportion', area_max_proportion_list)
    df_case['area_avg_proportion'] = area_avg_proportion_list

    df_case_list.append(df_case)

In [43]:
df_case_list.pop(0)
df_case_list.pop(0)
df_case_list.pop(0)

Unnamed: 0,iat_avg,buffer_avg,num_instance,block_max_avg,area_max_avg,workload_h1_max_avg,workload_h2_max_avg,area_max_proportion,block_avg_avg,area_avg_avg,workload_h1_avg_avg,workload_h2_avg_avg,area_avg_proportion
0,0.1,0.1,20,77.7,14024.75,7284.667574,6693.000039,0.492252,33.286887,6466.602548,2928.764488,2671.821942,0.22697
1,0.1,0.2,20,79.5,14398.8,7426.869911,6995.101697,0.505381,35.044148,6690.746071,3030.97145,2771.36372,0.234837
2,0.1,0.3,20,77.6,13682.3,7257.227737,7110.318733,0.480232,33.592386,6389.450962,2934.84946,2663.550395,0.224262
3,0.1,0.4,20,79.35,14490.3,7451.289443,6918.084444,0.508592,33.645043,6551.303372,2916.411959,2641.763748,0.229943
4,0.1,0.5,20,73.3,13120.65,6742.93624,6391.346363,0.460519,32.161974,6164.653359,2821.572853,2559.776381,0.216372
5,0.1,0.6,20,73.55,13078.2,7007.970454,6662.020293,0.459029,31.282571,6062.602385,2726.471657,2519.364233,0.21279
6,0.1,0.7,20,77.35,14229.1,7531.487495,6820.453826,0.499424,32.640224,6236.754535,2892.572802,2650.898824,0.218903
7,0.1,0.8,20,77.2,14048.15,7229.420607,6629.261858,0.493073,30.80422,5928.553055,2690.222919,2445.765496,0.208085
8,0.1,0.9,20,79.1,14257.35,7492.324719,6921.683828,0.500416,33.315753,6394.437087,2897.228126,2658.965845,0.224437
9,0.1,1.0,20,76.5,13993.5,7175.264694,6716.169872,0.491155,32.23383,6240.330748,2833.275706,2553.534037,0.219028


In [44]:
df_case_list

[    iat_avg  buffer_avg  num_instance  block_max_avg  area_max_avg  \
 0       0.1         0.1            20          77.70      14024.75   
 1       0.1         0.2            20          79.50      14398.80   
 2       0.1         0.3            20          77.60      13682.30   
 3       0.1         0.4            20          79.35      14490.30   
 4       0.1         0.5            20          73.30      13120.65   
 5       0.1         0.6            20          73.55      13078.20   
 6       0.1         0.7            20          77.35      14229.10   
 7       0.1         0.8            20          77.20      14048.15   
 8       0.1         0.9            20          79.10      14257.35   
 9       0.1         1.0            20          76.50      13993.50   
 10      0.1         1.1            20          74.60      13665.80   
 11      0.1         1.2            20          76.65      14087.10   
 12      0.1         1.3            20          79.55      14209.50   
 13   

In [46]:
sheet = ['iat_0.1_fix', 'buffer_1.5_fix', 'iat+buffer_1.6']

for i in range(3):
    save_path = f'../data/validation_result/avg_result/validation_avg_result_{sheet[i]}.xlsx'

    df_case_list[i].to_excel(save_path, sheet_name=sheet[i], index=False)

In [52]:
df_length = data_gen.df_length

df_length

Unnamed: 0,group,process_type,best_distribution_name,best_params,min,max
0,BC_A,Final조립,chi2,"(1.1039935775373761, 16.499999999999996, 0.795...",16.5,16.5
1,BC_A,대조중조,chi2,"(1.0339940476928495, 17.999999999999996, 0.962...",18.0,18.0
2,BC_B,Final조립,chi2,"(1.0339996837908543, 18.499999999999996, 0.962...",18.5,18.5
3,BC_B,곡중조,exponpow,"(0.15267893342443767, 8.999999999999998, 1.645...",9.0,20.0
4,BC_E,Final조립,chi2,"(0.8591294242278575, 10.499999999999998, 1.164...",10.5,21.0
...,...,...,...,...,...,...
121,VL_F,Final조립,powerlaw,"(0.13762224637431697, 9.499999999999998, 6.000...",9.5,15.5
122,VL_F,곡중조,chi2,"(1.103993593407678, 6.999999999999999, 0.79518...",7.0,7.0
123,VL_F,대조중조,chi2,"(1.1039935968961743, 17.499999999999996, 0.795...",17.5,17.5
124,VL_S,Final조립,gamma,"(0.27551004048270705, 17.499999999999996, 1.34...",17.5,20.0


In [53]:
df = pd.read_excel('../data/블록-계획데이터(예제)_수정_선종블록 추가.xlsx')

In [54]:
df

Unnamed: 0.1,Unnamed: 0,index,정반_코드,호선_코드,블록,단위블록_CODE,공종_명칭,stage_코드,선종_코드,취부팀_코드,...,실적공기,H00,H01,H02,W,B,L,H,A,선종_블록
0,0,2,M212,CN047,S22S0,11A0,Final조립,S032,CN,B02,...,22,0,1469,766,150,17.5,21.0,8.0,367.5,CN_S
1,1,3,M024,CN011,E22P0,11A0,곡중조,S021,CN,B04,...,7,17,475,914,0,13.0,18.0,4.5,234.0,CN_E
2,2,4,M211,BC001,F51P0,05A0,Final조립,S032,BC,B02,...,15,0,623,386,126,17.5,23.0,6.0,402.5,BC_F
3,3,9,M012,TK011,F51S0,11B0,곡중조,S021,VL,A03,...,7,40,267,145,0,6.5,7.0,9.5,45.5,VL_F
4,4,12,M025,LN017,B19S0,03A0,Final조립,S032,LN,A05,...,11,70,492,243,124,18.0,19.0,3.0,342.0,LN_B
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6933,6933,11955,M221,CN034,B20P0,03B0,대조중조,S031,CN,B09,...,7,0,521,593,0,8.0,19.5,2.5,156.0,CN_B
6934,6934,11959,M011,LP004,S13P0,03A0,곡중조,S221,LP,B07,...,6,0,318,0,0,5.0,19.5,6.5,97.5,LP_S
6935,6935,11960,M011,LP004,S13S0,03A0,곡중조,S221,LP,B07,...,5,0,318,0,0,5.0,19.5,6.5,97.5,LP_S
6936,6936,11971,M033,LN004,S11P0,11A0,Final조립,S032,LN,B08,...,5,0,427,435,121,24.5,19.0,9.5,465.5,LN_S


In [55]:
df_count = data_gen.df_count

df_count

Unnamed: 0,group,count,proportion,panel_count,curve_count,big_count,final_count
0,BC_A,3,0.000432,0,0,1,2
1,BC_B,11,0.001585,0,10,0,1
2,BC_E,14,0.002018,1,6,1,6
3,BC_F,14,0.002018,0,8,2,4
4,BC_S,10,0.001441,1,0,5,4
5,CN_A,249,0.035889,8,60,76,105
6,CN_B,808,0.11646,8,446,293,61
7,CN_D,119,0.017152,43,0,40,36
8,CN_E,1192,0.171807,110,241,317,524
9,CN_F,397,0.057221,3,217,46,131


In [78]:
for i in range(df.shape[0]):
    if df.loc[i, '정반_코드'] == 'M025':
        if df.loc[i, '취부팀_코드'] == 'B01':
            df.loc[i, '정반_코드'] = 'M025-2'
        elif (df.loc[i, '취부팀_코드'] == 'A05') or (df.loc[i, '취부팀_코드'] == 'B06'):
            df.loc[i, '정반_코드'] = 'M025-1'

In [79]:
df

Unnamed: 0.1,Unnamed: 0,index,정반_코드,호선_코드,블록,단위블록_CODE,공종_명칭,stage_코드,선종_코드,취부팀_코드,...,실적공기,H00,H01,H02,W,B,L,H,A,선종_블록
0,0,2,M212,CN047,S22S0,11A0,Final조립,S032,CN,B02,...,22,0,1469,766,150,17.5,21.0,8.0,367.5,CN_S
1,1,3,M024,CN011,E22P0,11A0,곡중조,S021,CN,B04,...,7,17,475,914,0,13.0,18.0,4.5,234.0,CN_E
2,2,4,M211,BC001,F51P0,05A0,Final조립,S032,BC,B02,...,15,0,623,386,126,17.5,23.0,6.0,402.5,BC_F
3,3,9,M012,TK011,F51S0,11B0,곡중조,S021,VL,A03,...,7,40,267,145,0,6.5,7.0,9.5,45.5,VL_F
4,4,12,M025-1,LN017,B19S0,03A0,Final조립,S032,LN,A05,...,11,70,492,243,124,18.0,19.0,3.0,342.0,LN_B
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6933,6933,11955,M221,CN034,B20P0,03B0,대조중조,S031,CN,B09,...,7,0,521,593,0,8.0,19.5,2.5,156.0,CN_B
6934,6934,11959,M011,LP004,S13P0,03A0,곡중조,S221,LP,B07,...,6,0,318,0,0,5.0,19.5,6.5,97.5,LP_S
6935,6935,11960,M011,LP004,S13S0,03A0,곡중조,S221,LP,B07,...,5,0,318,0,0,5.0,19.5,6.5,97.5,LP_S
6936,6936,11971,M033,LN004,S11P0,11A0,Final조립,S032,LN,B08,...,5,0,427,435,121,24.5,19.0,9.5,465.5,LN_S


In [90]:
df_breadth = data_gen.df_breadth

df_breadth['bay'] = None

for group in df_count['group'].values:
    for process in ['Final조립', '평중조', '곡중조', '대조중조']:
        df_group = df[df['선종_블록'] == group]
        df_group = df_group[df_group['공종_명칭'] == process]
        idx = df_breadth[(df_breadth['group'] == group) & (df_breadth['process_type'] == process)].index
        bay_list = []
        for bay in df_bay['bay_name'].values:
            if bay in df_group['정반_코드'].values:
                bay_list.append(bay)

        bay_set = set(bay_list)
        bay_tuple = tuple(bay_set)
        df_length.loc[idx, 'bay'] = str(bay_tuple)

In [91]:
df_breadth.to_excel('../data/breadth data.xlsx', index=False)

In [83]:
df_length

Unnamed: 0,group,process_type,best_distribution_name,best_params,min,max,bay
0,BC_A,Final조립,chi2,"(1.1039935775373761, 16.499999999999996, 0.795...",16.5,16.5,"('M033', 'M031')"
1,BC_A,대조중조,chi2,"(1.0339940476928495, 17.999999999999996, 0.962...",18.0,18.0,"('M032',)"
2,BC_B,Final조립,chi2,"(1.0339996837908543, 18.499999999999996, 0.962...",18.5,18.5,"('M025-2',)"
3,BC_B,곡중조,exponpow,"(0.15267893342443767, 8.999999999999998, 1.645...",9.0,20.0,"('M222', 'M012', 'M011', 'M023', 'M221')"
4,BC_E,Final조립,chi2,"(0.8591294242278575, 10.499999999999998, 1.164...",10.5,21.0,"('M212', 'M211', 'M031')"
...,...,...,...,...,...,...,...
121,VL_F,Final조립,powerlaw,"(0.13762224637431697, 9.499999999999998, 6.000...",9.5,15.5,"('M032', 'M025-2', 'M033', 'M031')"
122,VL_F,곡중조,chi2,"(1.103993593407678, 6.999999999999999, 0.79518...",7.0,7.0,"('M022', 'M011', 'M012')"
123,VL_F,대조중조,chi2,"(1.1039935968961743, 17.499999999999996, 0.795...",17.5,17.5,"('M032', 'M031')"
124,VL_S,Final조립,gamma,"(0.27551004048270705, 17.499999999999996, 1.34...",17.5,20.0,"('M033', 'M025-2', 'M025-1', 'M031')"


In [84]:
df_length['bay'] = df_length['bay'].apply(ast.literal_eval)

In [87]:
bay_ex = df_length.loc[1, 'bay']

In [113]:
df_length = data_gen.df_length

In [114]:
df_length

Unnamed: 0,group,process_type,best_distribution_name,best_params,min,max,bay
0,BC_A,Final조립,chi2,"(1.1039935775373761, 16.499999999999996, 0.795...",16.5,16.5,"(M033, M031)"
1,BC_A,대조중조,chi2,"(1.0339940476928495, 17.999999999999996, 0.962...",18.0,18.0,"(M032,)"
2,BC_B,Final조립,chi2,"(1.0339996837908543, 18.499999999999996, 0.962...",18.5,18.5,"(M025-2,)"
3,BC_B,곡중조,exponpow,"(0.15267893342443767, 8.999999999999998, 1.645...",9.0,20.0,"(M222, M012, M011, M023, M221)"
4,BC_E,Final조립,chi2,"(0.8591294242278575, 10.499999999999998, 1.164...",10.5,21.0,"(M212, M211, M031)"
...,...,...,...,...,...,...,...
121,VL_F,Final조립,powerlaw,"(0.13762224637431697, 9.499999999999998, 6.000...",9.5,15.5,"(M032, M025-2, M033, M031)"
122,VL_F,곡중조,chi2,"(1.103993593407678, 6.999999999999999, 0.79518...",7.0,7.0,"(M022, M011, M012)"
123,VL_F,대조중조,chi2,"(1.1039935968961743, 17.499999999999996, 0.795...",17.5,17.5,"(M032, M031)"
124,VL_S,Final조립,gamma,"(0.27551004048270705, 17.499999999999996, 1.34...",17.5,20.0,"(M033, M025-2, M025-1, M031)"


In [115]:
# df_length['bay'] = df_length['bay'].apply(ast.literal_eval)
df_length = df_length[(df_length['group'] == 'CN_S') & (df_length['process_type'] == 'Final조립')]

In [116]:
df_length

Unnamed: 0,group,process_type,best_distribution_name,best_params,min,max,bay
33,CN_S,Final조립,norm,"(19.11007025761124, 1.8949746792177762)",15.0,25.5,"(M025-1, M033, M212, M012, M211, M011, M221, M..."


In [117]:
df_length['bay']

33    (M025-1, M033, M212, M012, M211, M011, M221, M...
Name: bay, dtype: object

In [118]:
if 'M033' in df_length['bay'].values[0]:
    print('good')

good


In [119]:
for bay_name in df_length['bay'].values[0]:
    print(bay_name)

M025-1
M033
M212
M012
M211
M011
M221
M112
M032
M025-2
M024
M031


In [120]:
df_bay = data_gen.df_bay
df_bay

Unnamed: 0,bay_name,bay_id,team_name,capacity_h1,capacity_h2,bay_breadth,bay_length,block_breadth,block_height,block_weight,block_turnover_weight
0,M012,0,A03+A04,1900.0,1300,22,40,22.0,6.5,130,50
1,M011,1,B07,1700.0,1800,22,50,22.0,6.5,130,50
2,M010,2,A11,8200.0,12800,25,180,20.5,100.0,52,500
3,M021,3,B04,4500.0,3500,16,18,39.0,8.0,90,30
4,M022,4,A01+A02,900.0,900,17,80,39.0,8.0,70,30
5,M023,5,B04,1300.0,1200,17,35,39.0,8.0,70,30
6,M024,6,B04,1200.0,1300,17,30,39.0,8.0,313,200
7,M025-1,7,A05+B06,1700.0,1900,18,90,39.0,8.0,313,200
8,M025-2,8,B01,1799.85,1900,18,90,39.0,8.0,313,200
9,M031,9,B05,1200.0,1200,23,64,25.2,10.5,436,200


In [122]:
bay_ex = list(df_length['bay'].values[0])
bay_ex

['M025-1',
 'M033',
 'M212',
 'M012',
 'M211',
 'M011',
 'M221',
 'M112',
 'M032',
 'M025-2',
 'M024',
 'M031']

In [125]:
breadth_list = []
for bay_name in df_length['bay'].values[0]:
    df_bay_breadth = df_bay[(df_bay['bay_name'] == bay_name)]
    breadth_list.append(df_bay_breadth['block_breadth'].values[0])

In [126]:
breadth_list

[39.0, 26.0, 8.0, 22.0, 8.0, 22.0, 6.0, 13.8, 25.0, 39.0, 39.0, 25.2]

In [128]:
min(breadth_list)

6.0

In [141]:
df_height = data_gen.df_height

In [142]:
df_height

Unnamed: 0,group,process_type,best_distribution_name,best_params,min,max,bay
0,BC_A,Final조립,chi2,"(1.1039935858859617, 8.999999999999998, 0.7951...",9.0,9.0,"('M033', 'M031')"
1,BC_A,대조중조,chi2,"(1.0339940476928495, 8.999999999999998, 0.9623...",9.0,9.0,"('M032',)"
2,BC_B,Final조립,chi2,"(1.0339913498082178, 4.999999999999999, 0.9623...",5.0,5.0,"('M025-2',)"
3,BC_B,곡중조,powerlaw,"(0.13872067700855417, 2.9999999999999996, 2.00...",3.0,5.0,"('M222', 'M012', 'M011', 'M023', 'M221')"
4,BC_E,Final조립,exponpow,"(0.1307347434500824, 5.999999999999998, 0.7376...",6.0,6.5,"('M212', 'M211', 'M031')"
...,...,...,...,...,...,...,...
121,VL_F,Final조립,powerlaw,"(0.1335828741563855, 5.499999999999999, 4.0000...",5.5,9.5,"('M032', 'M025-2', 'M033', 'M031')"
122,VL_F,곡중조,chi2,"(1.1039935904231943, 9.499999999999998, 0.7951...",9.5,9.5,"('M022', 'M011', 'M012')"
123,VL_F,대조중조,exponpow,"(0.5936103414678744, 7.499999999999999, 0.0082...",7.5,7.5,"('M032', 'M031')"
124,VL_S,Final조립,powerlaw,"(0.11035519569851655, 4.999999999999999, 3.500...",5.0,8.5,"('M033', 'M025-2', 'M025-1', 'M031')"


In [143]:
df_height.loc[0, 'bay']

"('M033', 'M031')"

In [144]:
df_height['bay'] = df_height['bay'].apply(ast.literal_eval)

In [145]:
df_bay = data_gen.df_bay

In [146]:
df_height['min_bay'] = 0
df_height['max_bay'] = 0

for i in range(df_height.shape[0]):
    height_list = []
    for bay_name in df_height.loc[i, 'bay']:
        df_bay_height = df_bay[(df_bay['bay_name'] == bay_name)]
        height_list.append(df_bay_height['block_height'].values[0])

    min_bay = min(height_list)
    max_bay = max(height_list)

    df_height.loc[i, 'min_bay'] = min_bay
    df_height.loc[i, 'max_bay'] = max_bay

  df_height.loc[i, 'min_bay'] = min_bay
  df_height.loc[i, 'max_bay'] = max_bay


In [147]:
df_height

Unnamed: 0,group,process_type,best_distribution_name,best_params,min,max,bay,min_bay,max_bay
0,BC_A,Final조립,chi2,"(1.1039935858859617, 8.999999999999998, 0.7951...",9.0,9.0,"(M033, M031)",10.5,10.5
1,BC_A,대조중조,chi2,"(1.0339940476928495, 8.999999999999998, 0.9623...",9.0,9.0,"(M032,)",10.5,10.5
2,BC_B,Final조립,chi2,"(1.0339913498082178, 4.999999999999999, 0.9623...",5.0,5.0,"(M025-2,)",8.0,8.0
3,BC_B,곡중조,powerlaw,"(0.13872067700855417, 2.9999999999999996, 2.00...",3.0,5.0,"(M222, M012, M011, M023, M221)",6.5,21.1
4,BC_E,Final조립,exponpow,"(0.1307347434500824, 5.999999999999998, 0.7376...",6.0,6.5,"(M212, M211, M031)",10.5,23.0
...,...,...,...,...,...,...,...,...,...
121,VL_F,Final조립,powerlaw,"(0.1335828741563855, 5.499999999999999, 4.0000...",5.5,9.5,"(M032, M025-2, M033, M031)",8.0,10.5
122,VL_F,곡중조,chi2,"(1.1039935904231943, 9.499999999999998, 0.7951...",9.5,9.5,"(M022, M011, M012)",6.5,8.0
123,VL_F,대조중조,exponpow,"(0.5936103414678744, 7.499999999999999, 0.0082...",7.5,7.5,"(M032, M031)",10.5,10.5
124,VL_S,Final조립,powerlaw,"(0.11035519569851655, 4.999999999999999, 3.500...",5.0,8.5,"(M033, M025-2, M025-1, M031)",8.0,10.5


In [148]:
df_height.to_excel('../data/height data.xlsx', index=False)

In [149]:
df_bay = data_gen.df_bay

In [151]:
df_bay[df_bay['bay_name'] == 'M012']['block_breadth'].values[0]

22.0

In [9]:
for bay in df_breadth['bay'].values:
    print(bay)

NameError: name 'df_breadth' is not defined

In [11]:
data_gen = DataGenerator(block_data_path = "../input/configurations/block_data.xlsx",
bay_data_path = "../input/configurations/bay_data.xlsx")

In [12]:
df_breadth = data_gen.df_breadth

In [13]:
df_breadth

Unnamed: 0,group,process_type,best_distribution_name,best_params,min,max,bay,min_bay,max_bay
0,BC_A,Final조립,chi2,"(1.1039935858859617, 17.999999999999996, 0.795...",18.0,18.0,"('M033', 'M031')",25.2,26.0
1,BC_A,대조중조,chi2,"(1.0339913498082178, 19.999999999999996, 0.962...",20.0,20.0,"('M032',)",25.0,25.0
2,BC_B,Final조립,chi2,"(1.033993969224635, 12.999999999999996, 0.9623...",13.0,13.0,"('M025-2',)",39.0,39.0
3,BC_B,곡중조,powerlaw,"(0.1341769528585874, 4.499999999999999, 5.5000...",4.5,10.0,"('M222', 'M012', 'M011', 'M023', 'M221')",6.0,39.0
4,BC_E,Final조립,chi2,"(0.8953473888546646, 11.499999999999998, 1.181...",11.5,22.5,"('M212', 'M211', 'M031')",8.0,25.2
...,...,...,...,...,...,...,...,...,...
121,VL_F,Final조립,powerlaw,"(0.0731264518954659, 17.499999999999996, 2.500...",17.5,20.0,"('M032', 'M025-2', 'M033', 'M031')",25.0,39.0
122,VL_F,곡중조,exponpow,"(0.27510909882854395, 3.9999999999999996, 2.23...",4.0,6.5,"('M022', 'M011', 'M012')",22.0,39.0
123,VL_F,대조중조,chi2,"(1.1039935858859617, 17.999999999999996, 0.795...",18.0,18.0,"('M032', 'M031')",25.0,25.2
124,VL_S,Final조립,powerlaw,"(0.12022458674074693, 19.499999999999996, 1.00...",19.5,20.5,"('M033', 'M025-2', 'M025-1', 'M031')",25.2,39.0


In [14]:
df_breadth['bay'] = df_breadth['bay'].apply(ast.literal_eval)

In [15]:
breadth_ex = df_breadth.loc[0]

In [16]:
breadth_ex

group                                                                  BC_A
process_type                                                        Final조립
best_distribution_name                                                 chi2
best_params               (1.1039935858859617, 17.999999999999996, 0.795...
min                                                                    18.0
max                                                                    18.0
bay                                                            (M033, M031)
min_bay                                                                25.2
max_bay                                                                26.0
Name: 0, dtype: object

In [18]:
for bay_name in breadth_ex['bay']:
    print(bay_name)

M033
M031


In [27]:
df_possible_bay = df_breadth[df_breadth['group'] == 'BC_A']
df_possible_bay = df_possible_bay[df_possible_bay['process_type'] == '대조중조']

In [28]:
df_possible_bay

Unnamed: 0,group,process_type,best_distribution_name,best_params,min,max,bay,min_bay,max_bay
1,BC_A,대조중조,chi2,"(1.0339913498082178, 19.999999999999996, 0.962...",20.0,20.0,"(M032,)",25.0,25.0


In [29]:
for bay in df_possible_bay['bay'].values[0]:
    print(bay)

M032
