In [None]:
##########################################################################
#########################                        #########################
#########################   데이터 전처리 파트   #########################
#########################                        #########################
##########################################################################

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from datetime import datetime, timedelta

'''
포스 데이터 처리
'''
def pos_data(pos_path):
    
    odr_data = pd.read_csv(pos_path, encoding='cp949')
    
    # 사용하는 컬럼
    odr_ = odr_data[['STORE_OPEN_YMD', 'SALE_UID', 'PDT_UID','PDT_CNT','PDT_AMT']]
    
    odr = odr_.copy()
    odr['td_amt']=odr.loc[:,'PDT_CNT'] * odr.loc[:,'PDT_AMT']
    
    # 일별 , 상품별 매출액
    df_amt_ = pd.DataFrame(odr.groupby(['STORE_OPEN_YMD', 'PDT_UID'])['td_amt'].sum()).reset_index()
    
    # 일별 , 상품별 매출액 pivot
    df_amt = df_amt_.pivot(index='STORE_OPEN_YMD', columns='PDT_UID', values='td_amt').fillna(0)
    df_amt = df_amt.astype(int)
    
    return df_amt

'''
날씨, 연휴 데이터 처리
'''
def weather_holiday(weather_path, holiday_path):
    
    weather=pd.read_csv(weather_path)
    
    # 결측치 제거
    weather['weather_tmp'].fillna(method='ffill', inplace=True)
    weather['weather_rain_mm'].fillna('0', inplace=True)
    weather['weather_hum'].fillna(method='ffill', inplace=True)

    weather['weather_rain_mm'] = weather['weather_rain_mm'].apply(lambda x: x[:-2] if 'mm' in x else x)
    weather['weather_rain_mm'] = weather['weather_rain_mm'].apply(lambda x: '0' if x == '강수없음' else x)
    weather['weather_rain_mm'] = weather['weather_rain_mm'].apply(lambda x: float(x))

    # 이상치 제거
    weather['weather_tmp'] = weather['weather_tmp'].apply(lambda x: weather['weather_tmp'].mean() if x < -30 else x)
    weather['weather_hum'] = weather['weather_hum'].apply(lambda x: weather['weather_hum'].mean() if x < 0 else x)
    weather['weather_rain_mm'] = weather['weather_rain_mm'].apply(lambda x: 0 if x < 0 else x)

    # 일별 날씨 데이터 : mean, sum
    tmp_hum = weather.groupby('date')[['weather_tmp', 'weather_hum']].mean()
    rain_mm = weather.groupby('date')['weather_rain_mm'].sum()

    # 일별 날씨 데이터 : merge
    weather = pd.concat([tmp_hum, rain_mm], axis=1).reset_index()
    
    # ----------------------------------------------------------------------------------------------------------------
    # ----------------------------------------------------------------------------------------------------------------
    
    holiday=pd.read_csv(holiday_path, encoding='cp949').drop(columns='isholiday')
    
    # ----------------------------------------------------------------------------------------------------------------
    # ----------------------------------------------------------------------------------------------------------------
    
    weather_holiday = pd.merge(weather, holiday, on='date', how='left')
    weather_holiday['holiday_name'].fillna('N', inplace=True)
    
    return weather_holiday

'''
머지 pos / weather / holiday data  
'''
def merge_data(pos_data, weather_holiday_data):
    
    merge_pos_wea_holi = pd.merge(pos_data, weather_holiday_data, left_index=True, right_on='date', how='inner')
    merge_pos_wea_holi['date'] = merge_pos_wea_holi['date'].apply(lambda x : str(x))
    merge_pos_wea_holi['date'] = merge_pos_wea_holi['date'].apply(lambda x : x[:4]+ '-' + x[4:6] + '-' + x[6:])
    
    return merge_pos_wea_holi