In [19]:
import pandas as pd
import numpy as np
from dotenv import load_dotenv

load_dotenv()
CATEGORY_CODES = ['100', '200', '300', '400']

In [42]:
product_cost_2017 = pd.read_csv('data/product_cost_2017.csv', encoding='utf-8')
product_cost_2018 = pd.read_csv('data/product_cost_2018.csv', encoding='utf-8')
product_cost_2019 = pd.read_csv('data/product_cost_2019.csv', encoding='utf-8')
product_cost_2020 = pd.read_csv('data/product_cost_2020.csv', encoding='utf-8')
product_cost_2021 = pd.read_csv('data/product_cost_2021.csv', encoding='utf-8')
product_cost_2022 = pd.read_csv('data/product_cost_2022.csv', encoding='utf-8')
product_cost_2023 = pd.read_csv('data/product_cost_2023.csv', encoding='utf-8')

data = pd.concat(
    [product_cost_2017, product_cost_2018, product_cost_2019, product_cost_2020, product_cost_2021,
     product_cost_2022, product_cost_2023], axis=0, ignore_index=True)

In [44]:
result = {}
fluctuation = pd.DataFrame(
    columns=['COUNT_PER_TEN_DAYS', 'PRODUCT_CODE', 'MIN_PRODUCT_COST',
             'MAX_PRODUCT_COST', 'AVG_PRODUCT_COST', 'MIN_FLUCTUATION_RATE', 'MAX_FLUCTUATION_RATE']
).astype({
    'COUNT_PER_TEN_DAYS': 'int',
    'PRODUCT_CODE': 'str',
    'MIN_PRODUCT_COST': 'float',
    'MAX_PRODUCT_COST': 'float',
    'AVG_PRODUCT_COST': 'float',
    'MIN_FLUCTUATION_RATE': 'float',
    'MAX_FLUCTUATION_RATE': 'float'
})
product_code_list = data['PRODUCT_CODE'].unique()

for product_code in product_code_list:
    result[product_code] = [{
        'TOTAL_COUNT': 0,
        'TOTAL_MIN_COST': float('inf'),
        'TOTAL_MAX_COST': 0,
        'TOTAL_AVG_COST': 0,
        'TOTAL_MIN_RATIO': 0,
        'TOTAL_MAX_RATIO': 0
    } for _ in range(36)]

    product_data = data.loc[data['PRODUCT_CODE'] == product_code, :].reset_index(drop=True)
    for index, row in product_data.iloc[1:].iterrows():
        prev_avg_cost = product_data.iloc[index]['AVG_PRODUCT_COST']
        min_ratio = 1 + (row['MIN_PRODUCT_COST'] - prev_avg_cost) / prev_avg_cost
        max_ratio = 1 + (row['MAX_PRODUCT_COST'] - prev_avg_cost) / prev_avg_cost
        acc_result = result[product_code][int(row['COUNT_PER_TEN_DAYS'])]
        acc_result['TOTAL_COUNT'] += 1
        acc_result['TOTAL_MIN_COST'] = np.min([acc_result['TOTAL_MIN_COST'], row['MIN_PRODUCT_COST']])
        acc_result['TOTAL_MAX_COST'] = np.max([acc_result['TOTAL_MAX_COST'], row['MAX_PRODUCT_COST']])
        acc_result['TOTAL_AVG_COST'] += row['AVG_PRODUCT_COST']
        acc_result['TOTAL_MIN_RATIO'] += min_ratio
        acc_result['TOTAL_MAX_RATIO'] += max_ratio
        
    for index, info in enumerate(result[product_code]):
        value = {
            'COUNT_PER_TEN_DAYS': index,
            'PRODUCT_CODE': product_code,
            'MIN_PRODUCT_COST': 0,
            'MAX_PRODUCT_COST': 0,
            'AVG_PRODUCT_COST': 0,
            'MIN_FLUCTUATION_RATE': 0,
            'MAX_FLUCTUATION_RATE': 0
        }
        if info['TOTAL_COUNT'] != 0:
            value['MIN_PRODUCT_COST'] = info['TOTAL_MIN_COST']
            value['MAX_PRODUCT_COST'] = info['TOTAL_MAX_COST']
            value['AVG_PRODUCT_COST'] = info['TOTAL_AVG_COST'] / info['TOTAL_COUNT']
            value['MIN_FLUCTUATION_RATE'] = info['TOTAL_MIN_RATIO'] / info['TOTAL_COUNT']
            value['MAX_FLUCTUATION_RATE'] = info['TOTAL_MAX_RATIO'] / info['TOTAL_COUNT']
            
        fluctuation = pd.concat([fluctuation, pd.DataFrame([value])], ignore_index=True)
        
        

In [45]:
fluctuation

Unnamed: 0,COUNT_PER_TEN_DAYS,PRODUCT_CODE,MIN_PRODUCT_COST,MAX_PRODUCT_COST,AVG_PRODUCT_COST,MIN_FLUCTUATION_RATE,MAX_FLUCTUATION_RATE
0,0,111,38300.0,55240.0,47586.190476,0.998526,1.000882
1,1,111,38400.0,55750.0,47557.440476,0.998788,1.001827
2,2,111,38700.0,56050.0,47788.214286,0.996377,1.003267
3,3,111,40120.0,56220.0,47938.757440,0.998356,1.002956
4,4,111,40800.0,56620.0,48026.527778,0.999378,1.001582
...,...,...,...,...,...,...,...
2047,31,280,24110.0,26100.0,25436.666667,0.947844,1.026078
2048,32,280,19930.0,24100.0,22020.000000,0.905086,1.094460
2049,33,280,20860.0,22840.0,21898.000000,0.952598,1.043018
2050,34,280,20100.0,23240.0,21265.000000,0.945215,1.092876


In [54]:
selected_basic_product_info = pd.read_csv("data/selected_basic_product_info.csv", encoding='utf-8')

In [56]:
selected_basic_product_info['M_DISTCTNS'] = ['ALL' for _ in range(len(selected_basic_product_info))]

In [58]:
selected_basic_product_info

Unnamed: 0,PRODUCT_ID,PRODUCT_CODE,PRODUCT_NAME,M_DISTCTNS,PRDCTN__ERA
0,0,111,쌀,ALL,1536
1,1,112,찹쌀,ALL,1536
2,2,141,콩,ALL,0
3,3,142,팥,ALL,1536
4,4,143,녹두,ALL,1536
5,5,144,메밀,ALL,3072
6,6,151,고구마,ALL,1536
7,7,152,감자,ALL,1984
8,14,211,배추,ALL,6144
9,15,212,양배추,ALL,224


In [30]:
season = {}

for product_code in product_code_list:
    season[product_code] = {
        'ALL':0,
        'SPRING':0,
        'SUMMER':0,
        'FALL':0,
        'WINTER':0
    }
    
    all_filter = 0
    for i in range(12):
        bit = (1 < i)
        all_filter += bit
        
    

In [59]:
a = [1,2,3,4]
b = ['a','b','c','d']

for i, j in zip(a, b):
    print(i, j)

1 a
2 b
3 c
4 d
