In [33]:
# !pip install holidays
# !pip install matplotlib

In [34]:
import holidays
import pandas as pd
import numpy as np
import json
## AWS
import boto3
import sagemaker

### Data Load

In [35]:
region = boto3.session.Session().region_name
bucket_name = sagemaker.session.Session().default_bucket()
data = "s3://{}/한국전력거래소_시간별 전력수요량_20211231.csv".format(bucket_name)
data = pd.read_csv(data)

## Data Preprocessing

In [36]:
## 컬럼명 변경
data.rename(columns = {'날짜' : 'date'}, inplace = True)

## 형변환 (1차원 데이터로 변경)
data = pd.melt(data,id_vars = 'date', value_vars=list(data.columns[1:]),var_name = 'time',value_name = 'MWh')

## 시간 컬럼 형식 변환
data['time'] = data['time'].str.replace('시', '').astype('int') - 1
data['time'] = data['time'].astype('str').str.zfill(2)
data['date_time'] = pd.to_datetime(data['date'] + ' ' + data['time'])
data = data[['date_time', 'MWh']]
data.sort_values('date_time', inplace = True)
data.reset_index(drop = True, inplace = True)

## Train/Test Split

In [37]:
## 휴일 표시

## 공휴일
kr_holidays = holidays.KR()
data['holiday'] = data.date_time.apply(lambda x: 'holiday' if x in kr_holidays else 'non-holiday')
## 주말
data['weekend'] = data.date_time.apply(lambda x: 'weekend' if x.weekday() in [5,6] else 'weekday')
## 쉬는날 통합
data['off_day'] = np.where((data['weekend'] == 'weekend')|(data['holiday'] == 'holiday'),1,0)

data.to_csv('data/raw_data.csv')

In [38]:
data

Unnamed: 0,date_time,MWh,holiday,weekend,off_day
0,2021-01-01 00:00:00,64942,holiday,weekday,1
1,2021-01-01 01:00:00,62593,holiday,weekday,1
2,2021-01-01 02:00:00,60905,holiday,weekday,1
3,2021-01-01 03:00:00,59889,holiday,weekday,1
4,2021-01-01 04:00:00,59638,holiday,weekday,1
...,...,...,...,...,...
8755,2021-12-31 19:00:00,72976,non-holiday,weekday,0
8756,2021-12-31 20:00:00,71602,non-holiday,weekday,0
8757,2021-12-31 21:00:00,69383,non-holiday,weekday,0
8758,2021-12-31 22:00:00,68874,non-holiday,weekday,0


### Custom Script Result Data Load

In [22]:
batch_output = f's3://implementation-data/batch-inference/test_x_500.json.out'
batch_output = pd.read_json(batch_output)
batch_output = np.array(sum(batch_output.values.tolist(), []))
batch_output = np.reshape(batch_output, batch_output.shape[:2])

In [23]:
import pickle
## inverse_transform (scaling)
with open('AWS/Implementation/minmax_scl.pickle',"rb") as fr:
    scl_model = pickle.load(fr)
    
batch_output = pd.DataFrame(data = scl_model.inverse_transform(batch_output))
batch_output['date_time'] = test_data['date_time']

temp = [i for i in range(0,24)]
temp.insert(0, 'date_time')
batch_output = batch_output[temp]

batch_output.to_csv('data/custom_result.csv')

### Forecast Result Data Load

In [31]:
forecast_result = pd.read_csv("s3://implementation-data/batch-inference/forecast_export_2022-06-10T02-42-30Z_part1.csv")
forecast_result.to_csv('data/forecast_result.csv')

### Upload data

In [32]:
700*17

11900

In [27]:
quicksight_dataset = {
    "fileLocations": 
    [
        {
            "URIs":
            [
                "s3://implementation-data/quicksight_test/custom_result.csv"
            ]
        }
    ],                                       
    "globalUploadSettings": {
        "format": "CSV"
    }
}

with open("data/custom_result.json", "w") as json_file:
    json.dump(quicksight_dataset, json_file)

In [29]:
sagemaker.Session().upload_data(bucket='implementation-data', path='data', key_prefix='quicksight_test')                

's3://implementation-data/quicksight_test'