In [125]:
import numpy as np
import pandas as pd
import json
pd.options.display.float_format = '{:,}'.format
pd.set_option('display.max_rows', None)

In [126]:
with open("data/response.json", 'rt', encoding='utf-8') as f:
  data = json.load(f)
  
product_info_data = pd.json_normalize(data, record_path=['Grid_20171128000000000572_1','row'])
product_info_data = product_info_data.drop(['M_DISTCTNS_ITM', 'PRDLST_CL', 'MTC_NM', 'MAIN_SPCIES_NM', 'EFFECT', 'PURCHASE_MTH', 'COOK_MTH', 'TRT_MTH', 'URL', 'IMG_URL', 'REGIST_DE'], axis=1)
product_info_data['M_DISTCTNS'] = product_info_data['M_DISTCTNS'].apply(lambda x: x.replace("월", ""))
product_info_data['PRDCTN__ERA'] = product_info_data["PRDCTN__ERA"].apply(lambda x: x.replace("월", ""))
product_info_data['PRDCTN__ERA'] = product_info_data['PRDCTN__ERA'].where(product_info_data['PRDCTN__ERA'].str.contains(r'^\d+~\d+$'), np.nan)

In [127]:
display(product_info_data)

Unnamed: 0,ROW_NUM,IDNTFC_NO,PRDLST_NM,M_DISTCTNS,PRDCTN__ERA
0,1,227366,산마늘,4,3~5
1,2,227365,울외,6,3~7
2,3,227364,밤콩,7,
3,4,227363,노각,4,4~7
4,5,227362,삼채,6,
5,6,227361,방아잎,8,8~11
6,7,227360,돼지감자,12,
7,8,227358,섬초,11,11~3
8,9,227354,아마란스,10,
9,10,227351,솔부추,1,12~1


In [128]:
product_code_data = pd.read_csv('data/productname.csv')

In [129]:
display(product_code_data)

Unnamed: 0,PRODUCT_CODE,PRODUCT_NAME
0,111,쌀
1,112,찹쌀
2,141,콩
3,142,팥
4,143,녹두
5,144,메밀
6,151,고구마
7,152,감자
8,161,귀리
9,113,혼합곡


In [130]:
def month_to_Season(x: int) -> str:
  if pd.isna(x):
    return 'ALL'
  
  if type(x) == str:
    x = int(x)

  if x in [12, 1, 2]:
    return 'WINTER'
  elif x in [3, 4, 5]:
    return 'SPRING'
  elif x in [6, 7, 8]:
    return 'SUMMER'
  else:
    return 'FALL'
  
def era_to_bitmask(x: str) -> int:
  result = 0
  if pd.isna(x):
    return result
  
  start = 0
  end = 0
  
  temp = list(map(int,x.split(sep='~')))
    
  if len(temp) == 1:
    start = temp[0]
    end = temp[0]
  else:
    start = temp[0]
    end = temp[1]
      
  for i in range(start, end + 1):
    bit = 1
    result += (bit << i)
      
  return result

In [131]:
product_info = pd.merge(left=product_code_data, right=product_info_data,how="left", left_on='PRODUCT_NAME', right_on='PRDLST_NM')
product_info = product_info.drop(['IDNTFC_NO', 'PRDLST_NM', 'ROW_NUM'], axis=1)
product_info['M_DISTCTNS'] = product_info['M_DISTCTNS'].apply(month_to_Season)
product_info['PRDCTN__ERA'] = product_info['PRDCTN__ERA'].apply(era_to_bitmask)

In [132]:
display(product_info)

Unnamed: 0,PRODUCT_CODE,PRODUCT_NAME,M_DISTCTNS,PRDCTN__ERA
0,111,쌀,FALL,1536
1,112,찹쌀,FALL,1536
2,141,콩,ALL,0
3,142,팥,FALL,1536
4,143,녹두,FALL,1536
5,144,메밀,FALL,3072
6,151,고구마,FALL,1536
7,152,감자,SUMMER,1984
8,161,귀리,FALL,1536
9,113,혼합곡,ALL,0


In [133]:
product_info.to_csv('data/basic_product_info.csv', encoding='utf-8', index=False)