In [None]:
# !pip install IPython
from IPython.display import display, HTML, Image

In [None]:
!git clone https://github.com/KU-DIC/LG_time_series_day05.git #코랩 사용

# [머신러닝 기반 시계열 분석 2 실습]
# ANN
## [ANN - Regression (수치예측) 1]

##### jupyter notebook 단축키

- ctrl+enter: 셀 실행   
- shift+enter: 셀 실행 및 다음 셀 이동   
- alt+enter: 셀 실행, 다음 셀 이동, 새로운 셀 생성
- a: 상단에 새로운 셀 만들기
- b: 하단에 새로운 셀 만들기
- dd: 셀 삭제(x: 셀 삭제)
- 함수 ( ) 안에서 shift+tab: arguments description. shift+tab+tab은 길게 볼 수 있도록

## 1. 모듈 불러오기

In [None]:
''' 기본 모듈 및 시각화 모듈 '''
from IPython.display import display, HTML
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

''' 데이터 전처리 모듈 '''
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

''' Neural Network Regressor 모듈 '''
from sklearn.neural_network import MLPRegressor

''' 결과 평가용 모듈 '''
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

''' 기타 optional'''
import warnings, itertools
warnings.filterwarnings(action='ignore')
pd.set_option('display.max_columns', None)

## 2. 분석 데이터셋: Toyota Corolla 
<center><a href='https://www.kaggle.com/klkwak/toyotacorollacsv'> https://www.kaggle.com/klkwak/toyotacorollacsv </a></center>

### 설명변수 (X)
- Mfg_Month
- Mfg_Year
- KM
- Fuel_Type
...
- Radio_cassette
- Tow_Bar

### 반응변수 (Y)
- Price

### 데이터 불러오기

In [None]:
data = pd.read_csv('/content/LG_time_series_day05/data/ToyotaCorolla.csv')
# data = pd.read_csv('./data/ToyotaCorolla.csv') # 로컬

## 1-1. 데이터 전처리 및 탐색적 데이터 분석

### 데이터 확인

In [None]:
print('Data shape: {}'.format(data.shape))
data.head()

### 불필요한 변수 제거

In [None]:
data = data.drop(labels=['Id', 'Model'], axis=1)

### 설명변수(X)와 반응변수(Y) 정의

In [None]:
x = data.drop(labels='Price', axis=1)
y = data['Price']

In [None]:
display(x.head())
display(y.head())

### 범주형 설명변수에 대한 Dummy 변수 생성

In [None]:
data['Fuel_Type']

In [None]:
data.groupby('Fuel_Type')['Fuel_Type'].count()

In [None]:
Image('/content/LG_time_series_day05/image/intro7.png')

In [None]:
x = x.drop(labels='Fuel_Type', axis=1)

In [None]:
x_dummy = pd.get_dummies(data=data['Fuel_Type'], prefix='Fuel_Type', drop_first=True)

In [None]:
x_dummy

In [None]:
x_dummied = pd.concat(objs=[x,x_dummy], axis=1)

In [None]:
x_dummied.head()

### 학습데이터(Training Dataset)와 테스트 데이터(Testing Dataset) 분리

In [None]:
Image('/content/LG_time_series_day05/image/intro8.png')

In [None]:
train_x, test_x, train_y, test_y = train_test_split(x_dummied, y, test_size=0.3)

### 정규화 : Standardization(표준정규화) with Standard Scaler

In [None]:
x.describe()

In [None]:
scaler = StandardScaler()
scaler.fit(train_x)

train_x = scaler.transform(train_x)
test_x = scaler.transform(test_x)

In [None]:
pd.DataFrame(train_x, columns = x_dummied.columns).describe()

## 2-2. Multilayer Perceptron (MLP) Regressor

### MLP Regressor 구조

In [None]:
Image('/content/LG_time_series_day05/image/intro9.png')

### MLP Regressor 정의
<a href='http://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html'> Scikit-Learn MLP Regressor Manual </a>

- <b>activation</b> - activation function 타입 (identity, logistic, tanh, <font color='red'>relu</font>) <br>
- <b>batch_size</b> - stochastic optimizer가 사용할 minibatch 크기 <br>
- <b>max_iter  </b> - stochastic optimizer의 최대 iteration 횟수 ( = Epochs )<br>
- <b>alpha     </b> - Learning Rate (과적합 방지용) <br>
- <b>solver    </b> - 경사하강법의 종류 (<font color='red'>adam</font>, sgd, lbfgs) <br>

In [None]:
reg_mlp = MLPRegressor(activation='relu', alpha=0.001, batch_size=100,
                       hidden_layer_sizes=(25, 15), max_iter=1000,
                       solver='adam', verbose = True, random_state = 2022)

### MLP Regressor 학습

In [None]:
reg_mlp.fit(train_x, train_y)

### 학습 상태 확인 (learning curve)

In [None]:
plt.figure(figsize=(20,10))

train_loss_values = reg_mlp.loss_curve_
plt.plot(train_loss_values,label='Train Loss')

plt.legend(fontsize=20)
plt.title("Learning Curve of trained MLP Regressor", fontsize=18)
plt.show()

## 2-3. Multilayer Perceptron 모델 성능 평가

### 학습된 MLP Regressor 결과 확인 및 성능 평가 : Training Data

In [None]:
train_y_pred = reg_mlp.predict(train_x)

In [None]:
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true-y_pred)/y_true))*100

In [None]:
print("Training MSE : {:.3f}".format(mean_squared_error(train_y, train_y_pred)))
print("Training RMSE : {:.3f}".format(np.sqrt(mean_squared_error(train_y, train_y_pred))))
print("Training MAE : {:.3f}".format(mean_absolute_error(train_y, train_y_pred)))
print("Training MAPE : {:.3f}".format(mean_absolute_percentage_error(train_y, train_y_pred)))
print("Training R2 : {:.3f}".format(r2_score(train_y, train_y_pred)))

In [None]:
# 산점도 그래프
fig_values = np.concatenate([train_y.squeeze(), train_y_pred.squeeze()])
vmin = np.min(fig_values) * 0.95
vmax = np.max(fig_values) * 1.05

plt.figure(figsize=(8, 8))
plt.title('Actual values vs. Predicted values (Training Data)', size=18)
plt.scatter(train_y, train_y_pred)
plt.plot([vmin, vmax], [vmin, vmax], color='grey', linestyle='dashed')
plt.xlabel('Actual', size=16)
plt.ylabel('Predicted', size=16)
plt.show()

### 학습된 MLP Regressor 결과 확인 및 성능 평가 : Testing Data

In [None]:
test_y_pred = reg_mlp.predict(test_x)

In [None]:
print("Testing MSE : {:.3f}".format(mean_squared_error(test_y, test_y_pred)))
print("Testing RMSE : {:.3f}".format(np.sqrt(mean_squared_error(test_y, test_y_pred))))
print("Testing MAE : {:.3f}".format(mean_absolute_error(test_y, test_y_pred)))
print("Testing MAPE : {:.3f}".format(mean_absolute_percentage_error(test_y, test_y_pred)))
print("Testing R2 : {:.3f}".format(r2_score(test_y, test_y_pred)))

In [None]:
# 산점도 그래프
fig_values = np.concatenate([test_y.squeeze(), test_y_pred.squeeze()])
vmin = np.min(fig_values) * 0.95
vmax = np.max(fig_values) * 1.05

plt.figure(figsize=(8, 8))
plt.title('Actual values vs. Predicted values (Testing Data)', size=18)
plt.scatter(test_y, test_y_pred)
plt.plot([vmin, vmax], [vmin, vmax], color='grey', linestyle='dashed')
plt.xlabel('Actual', size=16)
plt.ylabel('Predicted', size=16)
plt.show()