In [42]:
import pandas as pd
import numpy as np
from dotenv import load_dotenv

load_dotenv()
CATEGORY_CODES = ['100', '200', '300', '400']

In [43]:
product_cost_2017 = pd.read_csv('data/product_cost_2017.csv', encoding='utf-8')
product_cost_2018 = pd.read_csv('data/product_cost_2018.csv', encoding='utf-8')
product_cost_2019 = pd.read_csv('data/product_cost_2019.csv', encoding='utf-8')
product_cost_2020 = pd.read_csv('data/product_cost_2020.csv', encoding='utf-8')
product_cost_2021 = pd.read_csv('data/product_cost_2021.csv', encoding='utf-8')
product_cost_2022 = pd.read_csv('data/product_cost_2022.csv', encoding='utf-8')
product_cost_2023 = pd.read_csv('data/product_cost_2023.csv', encoding='utf-8')

data = pd.concat(
    [product_cost_2017, product_cost_2018, product_cost_2019, product_cost_2020, product_cost_2021,
     product_cost_2022, product_cost_2023], axis=0, ignore_index=True)

In [44]:
result = {}
fluctuation = pd.DataFrame(
    columns=['COUNT_PER_TEN_DAYS', 'PRODUCT_CODE', 'MIN_PRODUCT_COST',
             'MAX_PRODUCT_COST', 'AVG_PRODUCT_COST', 'MIN_FLUCTUATION_RATE', 'MAX_FLUCTUATION_RATE']
).astype({
    'COUNT_PER_TEN_DAYS': 'int',
    'PRODUCT_CODE': 'str',
    'MIN_PRODUCT_COST': 'float',
    'MAX_PRODUCT_COST': 'float',
    'AVG_PRODUCT_COST': 'float',
    'MIN_FLUCTUATION_RATE': 'float',
    'MAX_FLUCTUATION_RATE': 'float'
})
product_code_list = data['PRODUCT_CODE'].unique()
season_fixed = []
# 작물별 누적 개수, 최소, 최대 가격, 전날 대비 변화율 누적
for product_code in product_code_list:
    result[product_code] = [{
        'TOTAL_COUNT': 0,
        'TOTAL_MIN_COST': float('inf'),
        'TOTAL_MAX_COST': 0,
        'TOTAL_AVG_COST': 0,
        'TOTAL_MIN_RATIO': 0,
        'TOTAL_MAX_RATIO': 0
    } for _ in range(36)]

    # 하나의 작물의 36개 변화량 6년치
    product_data = data.loc[data['PRODUCT_CODE'] == product_code, :].reset_index(drop=True)
    for index, row in product_data.iloc[1:].iterrows():
        prev_avg_cost = product_data.iloc[index - 1]['AVG_PRODUCT_COST']
        min_ratio = 1 + (row['MIN_PRODUCT_COST'] - prev_avg_cost) / prev_avg_cost
        max_ratio = 1 + (row['MAX_PRODUCT_COST'] - prev_avg_cost) / prev_avg_cost
        acc_result = result[product_code][int(row['COUNT_PER_TEN_DAYS'])]
        acc_result['TOTAL_COUNT'] += 1
        acc_result['TOTAL_MIN_COST'] = np.min(
            [acc_result['TOTAL_MIN_COST'], row['MIN_PRODUCT_COST']])
        acc_result['TOTAL_MAX_COST'] = np.max(
            [acc_result['TOTAL_MAX_COST'], row['MAX_PRODUCT_COST']])
        acc_result['TOTAL_AVG_COST'] += row['AVG_PRODUCT_COST']
        acc_result['TOTAL_MIN_RATIO'] += min_ratio
        acc_result['TOTAL_MAX_RATIO'] += max_ratio

    count = 0

    season = {
        'SPRING': 0,
        'SUMMER': 0,
        'FALL': 0,
        'WINTER': 0
    }
    
    for index, info in enumerate(result[product_code]):
        value = {
            'COUNT_PER_TEN_DAYS': index,
            'PRODUCT_CODE': product_code,
            'MIN_PRODUCT_COST': 0,
            'MAX_PRODUCT_COST': 0,
            'AVG_PRODUCT_COST': 0,
            'MIN_FLUCTUATION_RATE': 0,
            'MAX_FLUCTUATION_RATE': 0,
        }

        if info['TOTAL_COUNT'] != 0:
            value['MIN_PRODUCT_COST'] = info['TOTAL_MIN_COST']
            value['MAX_PRODUCT_COST'] = info['TOTAL_MAX_COST']
            value['AVG_PRODUCT_COST'] = info['TOTAL_AVG_COST'] / info['TOTAL_COUNT']
            value['MIN_FLUCTUATION_RATE'] = info['TOTAL_MIN_RATIO'] / info['TOTAL_COUNT']
            value['MAX_FLUCTUATION_RATE'] = info['TOTAL_MAX_RATIO'] / info['TOTAL_COUNT']

            count += 1
            if index < 6:
                season['WINTER'] += 1
            elif index < 15:
                season['SPRING'] += 1
            elif index < 24:
                season['SUMMER'] += 1
            elif index < 33:
                season['FALL'] += 1
            else:
                season['WINTER'] += 1

    
        fluctuation = pd.concat([fluctuation, pd.DataFrame([value])], ignore_index=True)
    
    
    max_count = np.max(list(map(int,season.values())))
    total = sum(list(map(int, season.values())))
    if 0 in season.values() or total <= 27:
        for q in season.keys():
            if season[q] == max_count:
                print(f'{product_code}:{q}')
                season_fixed.append((product_code, q))
                
selected_basic_product_info = pd.read_csv("data/selected_basic_product_info.csv", encoding='utf-8')
for product_code, season in season_fixed:
    selected_basic_product_info.loc[selected_basic_product_info['PRODUCT_CODE'] == product_code, 'M_DISTCTNS'] = season
    

226:WINTER
416:WINTER
222:SPRING
222:SUMMER
425:SUMMER
413:SUMMER
279:FALL
280:SUMMER


In [45]:
fluctuation

Unnamed: 0,COUNT_PER_TEN_DAYS,PRODUCT_CODE,MIN_PRODUCT_COST,MAX_PRODUCT_COST,AVG_PRODUCT_COST,MIN_FLUCTUATION_RATE,MAX_FLUCTUATION_RATE
0,0,111,1915.00,2762.00,2379.309524,0.998565,1.000921
1,1,111,1920.00,2787.50,2377.872024,0.998142,1.001187
2,2,111,1935.00,2802.50,2389.410714,1.001804,1.008860
3,3,111,2006.00,2811.00,2396.937872,1.001984,1.006645
4,4,111,2040.00,2831.00,2401.326389,1.001417,1.003635
...,...,...,...,...,...,...,...
2047,31,280,3013.75,3262.50,3179.583333,1.083170,1.172573
2048,32,280,2491.25,3012.50,2752.500000,0.783515,0.947451
2049,33,280,2607.50,2855.00,2737.250000,0.947321,1.037239
2050,34,280,2512.50,2905.00,2658.125000,0.917892,1.061284


In [46]:
selected_basic_product_info.to_csv('data/fixed_selected_basic_product_info.csv', encoding='utf-8', index=False)

In [47]:
fluctuation.to_csv('data/product_cost_fluctuation.csv', encoding='utf-8', index=False )