In [132]:
import os

import requests
import json
import pandas as pd
import numpy as np
import aiohttp
import asyncio
from dotenv import load_dotenv

load_dotenv()

KAMIS_CERT_KEY = os.getenv('KAMIS_CERT_KEY')
KAMIS_CERT_ID = os.getenv('KAMIS_CERT_ID')
CATEGORY_CODES = ['100', '200', '300', '400']

In [133]:
# 18-01-01 ~ 23-12-31
# 사용할 작물별로 그날의 평균값
# 10일 단위로 최대 최소 가격
# 10일 단위 전체 평균 가격

In [134]:
# api 호출
async def get_daily_price(date: str, category_code: str, session: aiohttp.client.ClientSession) -> json:
    headers = {'Content-Type': 'application/json', 'charset': 'UTF-8', 'Accept': '*/*'}
    request_url = 'http://www.kamis.or.kr/service/price/xml.do?action=dailyPriceByCategoryList'
    params = {
        'p_cert_key': KAMIS_CERT_KEY,
        'p_cert_id': KAMIS_CERT_ID,
        'p_item_category_code': category_code,
        'p_regday': date,
        'p_returntype': 'json'
    }

    for param in params.keys():
        request_url += f"&{param}={params[param]}"
        
    try:
        async with session.get(request_url) as response:
            return await response.json(content_type='text/plain')
    except Exception as e:
        return await get_daily_price(date, category_code, session)

In [135]:
# 하루동안 모든 카테고리에 속하는 작물의 모든 등급에 대한 가격 평균
async def get_daily_product_cost(date: str) -> dict:
    product_acc_cost = {}
    tasks = []
    async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(20)) as session:
        for cat in CATEGORY_CODES:
            task = asyncio.create_task(get_daily_price(date, cat, session))
            tasks.append(task)
    
        response = await asyncio.gather(*tasks)

        for res in response:
            data = res['data']
            
            # 비정상 응답, error_code - 001, 200, 900
            if type(data) == list:
                continue
    
            for item in data['item']:
                item_code = int(item['item_code'])
                item_name = item['item_name']
                product_cost = item['dpr1'].replace(',', '')
    
                if product_cost == '-':
                    continue
    
                product_cost = int(product_cost)
    
                if product_acc_cost.get(item_code) is None:
                    product_acc_cost[item_code] = {
                        'total_cost': 0,
                        'total_count': 0,
                    }
    
                product_acc_cost[item_code]['total_cost'] += product_cost
                product_acc_cost[item_code]['total_count'] += 1

    result = {'date': date}
    product_avg_cost = {}

    for key in product_acc_cost.keys():
        acc_cost = product_acc_cost.get(key)
        product_avg_cost[key] = acc_cost['total_cost'] / acc_cost['total_count']
        
    result["cost"] = product_avg_cost
    return result

In [136]:
async def get_monthly_product_cost_df(year: str, month: str) -> pd.DataFrame:
    div, mod = divmod(int(month), 12)
    tasks = []
    for date in pd.date_range(f'{year}-{month}-01', f'{int(year)+div}-{mod+1}-01', inclusive='left').strftime('%Y-%m-%d'):
        task = asyncio.create_task(get_daily_product_cost(date))
        tasks.append(task)
        
    daily_product_cost_list = await asyncio.gather(*tasks)
    
    result = pd.DataFrame(
    columns=['YEAR', 'COUNT_PER_TEN_DAYS', 'PRODUCT_CODE', 'MIN_PRODUCT_COST',
             'MAX_PRODUCT_COST', 'AVG_PRODUCT_COST']).astype({
        'YEAR':'int',
        'COUNT_PER_TEN_DAYS':'int',
        'PRODUCT_CODE':'str',
        'MIN_PRODUCT_COST':'float',
        'MAX_PRODUCT_COST': 'float',
        'AVG_PRODUCT_COST': 'float'
    })
    
    first_third = {}
    second_third = {}
    final_third = {}
    
    for product_cost in daily_product_cost_list:
        date = pd.to_datetime(product_cost['date'])
        cost = product_cost['cost']
        day = date.day
    
        target_third = None
    
        if day <= 10:
            target_third = first_third
        elif day <= 20:
            target_third = second_third
        else:
            target_third = final_third
    
        for key in cost.keys():
            if target_third.get(key) is None:
                target_third[key] = {
                    'MIN_PRODUCT_COST': float('inf'),
                    'MAX_PRODUCT_COST': 0,
                    'TOTAL_AVG_PRODUCT_COST': 0,
                    'TOTAL_COUNT': 0
                }
    
            avg_cost = cost[key]
            target_third[key]['MIN_PRODUCT_COST'] = np.min([target_third[key]['MIN_PRODUCT_COST'], avg_cost])
            target_third[key]['MAX_PRODUCT_COST'] = np.max([target_third[key]['MAX_PRODUCT_COST'], avg_cost])
            target_third[key]['TOTAL_AVG_PRODUCT_COST'] += avg_cost
            target_third[key]['TOTAL_COUNT'] += 1    
    
    for key in first_third.keys():
        info = first_third[key]
        result = pd.concat([result, pd.DataFrame({
            'YEAR': [int(year)],
            'COUNT_PER_TEN_DAYS': [(int(month) - 1) * 3],
            'PRODUCT_CODE': [key],
            'MIN_PRODUCT_COST': [info['MIN_PRODUCT_COST']],
            'MAX_PRODUCT_COST': [info['MAX_PRODUCT_COST']],
            'AVG_PRODUCT_COST': [info['TOTAL_AVG_PRODUCT_COST'] / info['TOTAL_COUNT']]
        })], ignore_index=True)
        
    for key in second_third.keys():
        info = second_third[key]
        result = pd.concat([result, pd.DataFrame({
            'YEAR': [int(year)],
            'COUNT_PER_TEN_DAYS': [(int(month) - 1) * 3 + 1],
            'PRODUCT_CODE': [key],
            'MIN_PRODUCT_COST': [info['MIN_PRODUCT_COST']],
            'MAX_PRODUCT_COST': [info['MAX_PRODUCT_COST']],
            'AVG_PRODUCT_COST': [info['TOTAL_AVG_PRODUCT_COST'] / info['TOTAL_COUNT']]
        })], ignore_index=True)
        
    for key in final_third.keys():
        info = final_third[key]
        result = pd.concat([result, pd.DataFrame({
            'YEAR': [int(year)],
            'COUNT_PER_TEN_DAYS': [(int(month) - 1) * 3 + 2],
            'PRODUCT_CODE': [key],
            'MIN_PRODUCT_COST': [info['MIN_PRODUCT_COST']],
            'MAX_PRODUCT_COST': [info['MAX_PRODUCT_COST']],
            'AVG_PRODUCT_COST': [info['TOTAL_AVG_PRODUCT_COST'] / info['TOTAL_COUNT']]
        })], ignore_index=True)
        
    print(f'{year}-{month} done')
    return result
    

In [139]:
async def get_yearly_product_cost_df(year: str) -> pd.DataFrame:
    tasks = []
    
    # for month in range(1, 7):
    #     task = asyncio.create_task(get_monthly_product_cost_df(year, str(month)))
    #     tasks.append(task)
    # first_half = await asyncio.gather(*tasks)
    # 
    # await asyncio.sleep(10)
    # 
    # tasks = []
    # 
    # for month in range(7, 13):
    #     task = asyncio.create_task(get_monthly_product_cost_df(year, str(month)))
    #     tasks.append(task)
    #     
    # last_half = await asyncio.gather(*tasks)
    
    monthly_product_cost_list = []
    
    for month in range(1, 13):
        # task = asyncio.create_task(get_monthly_product_cost_df(year, str(month)))
        # tasks.append(task)
        monthly_product_cost = await get_monthly_product_cost_df(year, str(month))
        monthly_product_cost_list.append(monthly_product_cost)
        
    # monthly_product_cost_list = await asyncio.gather(*tasks)
    
    result = pd.DataFrame(
    columns=['YEAR', 'COUNT_PER_TEN_DAYS', 'PRODUCT_CODE', 'MIN_PRODUCT_COST',
             'MAX_PRODUCT_COST', 'AVG_PRODUCT_COST']).astype({
        'YEAR':'int',
        'COUNT_PER_TEN_DAYS':'int',
        'PRODUCT_CODE':'str',
        'MIN_PRODUCT_COST':'float',
        'MAX_PRODUCT_COST': 'float',
        'AVG_PRODUCT_COST': 'float'
    })
    
    # monthly_product_cost = [*first_half, *last_half]
    
    result = pd.concat([result, *monthly_product_cost_list], ignore_index=True)
    return result

In [140]:
product_cost_2018 = await get_yearly_product_cost_df('2018')
product_cost_2019 = await get_yearly_product_cost_df('2019')
product_cost_2020 = await get_yearly_product_cost_df('2020')
product_cost_2021 = await get_yearly_product_cost_df('2021')
product_cost_2022 = await get_yearly_product_cost_df('2022')
product_cost_2023 = await get_yearly_product_cost_df('2023')


2019-1 done
2019-2 done
2019-3 done
2019-4 done
2019-5 done
2019-6 done
2019-7 done
2019-8 done
2019-9 done
2019-10 done
2019-11 done
2019-12 done
2020-1 done
2020-2 done
2020-3 done
2020-4 done
2020-5 done
2020-6 done
2020-7 done
2020-8 done
2020-9 done
2020-10 done
2020-11 done
2020-12 done
2021-1 done
2021-2 done
2021-3 done
2021-4 done
2021-5 done
2021-6 done
2021-7 done
2021-8 done
2021-9 done
2021-10 done
2021-11 done
2021-12 done
2022-1 done
2022-2 done
2022-3 done
2022-4 done
2022-5 done
2022-6 done
2022-7 done
2022-8 done
2022-9 done
2022-10 done
2022-11 done
2022-12 done
2023-1 done
2023-2 done
2023-3 done
2023-4 done
2023-5 done
2023-6 done
2023-7 done
2023-8 done
2023-9 done
2023-10 done
2023-11 done
2023-12 done


In [143]:
product_cost_2018.to_csv("data/product_cost_2018.csv", encoding='utf-8')
product_cost_2019.to_csv("data/product_cost_2019.csv", encoding='utf-8')
product_cost_2020.to_csv("data/product_cost_2020.csv", encoding='utf-8')
product_cost_2021.to_csv("data/product_cost_2021.csv", encoding='utf-8')
product_cost_2022.to_csv("data/product_cost_2022.csv", encoding='utf-8')
product_cost_2023.to_csv("data/product_cost_2023.csv", encoding='utf-8')

In [142]:
product_cost_2019

Unnamed: 0,YEAR,COUNT_PER_TEN_DAYS,PRODUCT_CODE,MIN_PRODUCT_COST,MAX_PRODUCT_COST,AVG_PRODUCT_COST
0,2019,0,111,48550.0,48750.000000,48630.000000
1,2019,0,112,133000.0,133000.000000,133000.000000
2,2019,0,141,177800.0,178133.333333,178002.714286
3,2019,0,142,348400.0,349733.333333,348590.476190
4,2019,0,143,337600.0,338933.333333,338552.380952
...,...,...,...,...,...,...
1830,2019,35,418,22200.0,22400.000000,22300.000000
1831,2019,35,419,31500.0,33500.000000,32566.666667
1832,2019,35,420,20300.0,21300.000000,20733.333333
1833,2019,35,424,54900.0,59600.000000,57833.333333


In [141]:
product_cost_2018

Unnamed: 0,YEAR,COUNT_PER_TEN_DAYS,PRODUCT_CODE,MIN_PRODUCT_COST,MAX_PRODUCT_COST,AVG_PRODUCT_COST
0,2018,0,111,38300.0,38400.0,38385.714286
1,2018,0,112,97400.0,97400.0,97400.000000
2,2018,0,141,158476.0,158476.0,158476.000000
3,2018,0,142,272800.0,272800.0,272800.000000
4,2018,0,143,342800.0,342800.0,342800.000000
...,...,...,...,...,...,...
1829,2018,35,419,34150.0,34550.0,34400.000000
1830,2018,35,420,19600.0,20900.0,19866.666667
1831,2018,35,421,48437.5,53250.0,50565.972222
1832,2018,35,424,66400.0,66400.0,66400.000000
