# 장애인콜택시 대기시간 예측
## 단계3. 모델링

In [None]:
!pip install -r requirements.txt

#### 2) 라이브러리 로딩

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import joblib

In [2]:
file1 = 'data2.pkl'

In [3]:
df=joblib.load(file1)

In [4]:
df=df.set_index('Date')

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2921 entries, 2015-01-01 to 2022-12-30
Data columns (total 24 columns):
 #   Column           Non-Null Count  Dtype   
---  ------           --------------  -----   
 0   car_cnt          2921 non-null   int64   
 1   request_cnt      2921 non-null   int64   
 2   ride_cnt         2921 non-null   int64   
 3   waiting_time     2921 non-null   float64 
 4   fare             2921 non-null   int64   
 5   distance         2921 non-null   int64   
 6   temp_max         2921 non-null   float64 
 7   temp_min         2921 non-null   float64 
 8   rain(mm)         2921 non-null   float64 
 9   humidity_max(%)  2921 non-null   float64 
 10  humidity_min(%)  2921 non-null   float64 
 11  sunshine(MJ/m2)  2921 non-null   float64 
 12  target           2921 non-null   float64 
 13  weekday          2921 non-null   category
 14  month            2921 non-null   category
 15  week             2921 non-null   category
 16  year             2921 no


* set1: [car_cnt,  waiting_time, distance, temp,humidity, weekday, year,season,is_holiday,ride_pro,is_rainy]
* set2: ['car_cnt','request_cnt','ride_cnt','ride_pro', 'waiting_time', 'fare','distance', 'temp',
          'humidity','weekday','season','is_holiday','is_rainy','covid,'target']

## 2.데이터 준비
* **세부요구사항**
    * NaN에 대한 조치를 수행하시오.
        * rolling 혹은 shift로 발생된 초기 행의 NaN은 삭제해도 무방합니다.
    * 가변수화 : 범주형에 대해서 가변수화를 수행합니다.
    * 데이터분할
        * 시계열 데이터 특성에 맞게 분할합니다.
        * 마지막 91일(3개월) 데이터를 검증셋으로 사용합니다.

In [6]:
df.columns

Index(['car_cnt', 'request_cnt', 'ride_cnt', 'waiting_time', 'fare',
       'distance', 'temp_max', 'temp_min', 'rain(mm)', 'humidity_max(%)',
       'humidity_min(%)', 'sunshine(MJ/m2)', 'target', 'weekday', 'month',
       'week', 'year', 'season', 'is_holiday', 'ride_pro', 'humidity', 'temp',
       'is_rainy', 'covid'],
      dtype='object')

In [7]:
#컬럼선택
#set2
set_col= ['car_cnt','request_cnt','ride_cnt','ride_pro', 'waiting_time', 'temp',
          'humidity','weekday','season','is_holiday','is_rainy','covid','target']
df1=df[set_col]

In [8]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2921 entries, 2015-01-01 to 2022-12-30
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   car_cnt       2921 non-null   int64   
 1   request_cnt   2921 non-null   int64   
 2   ride_cnt      2921 non-null   int64   
 3   ride_pro      2921 non-null   float64 
 4   waiting_time  2921 non-null   float64 
 5   temp          2921 non-null   float64 
 6   humidity      2921 non-null   float64 
 7   weekday       2921 non-null   category
 8   season        2921 non-null   category
 9   is_holiday    2921 non-null   category
 10  is_rainy      2921 non-null   category
 11  covid         2921 non-null   int64   
 12  target        2921 non-null   float64 
dtypes: category(4), float64(5), int64(4)
memory usage: 239.7 KB


### (1) NA 조치

In [9]:
# df1.isna().sum()

### (2) 가변수화

In [10]:
# 가변수화
dumm_cols1 = ['weekday', 'season','is_holiday','covid','is_rainy']
df1 = pd.get_dummies(df1, columns=dumm_cols1, drop_first=True)
df1.head()

Unnamed: 0_level_0,car_cnt,request_cnt,ride_cnt,ride_pro,waiting_time,temp,humidity,target,weekday_1,weekday_2,weekday_3,weekday_4,weekday_5,weekday_6,season_spring,season_summer,season_winter,is_holiday_1,covid_1,is_rainy_1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2015-01-01,213,1023,924,90.32,23.2,-5.45,45.5,17.2,0,0,1,0,0,0,0,0,1,1,0,0
2015-01-02,420,3158,2839,89.9,17.2,-3.4,55.0,26.2,0,0,0,1,0,0,0,0,1,0,0,0
2015-01-03,209,1648,1514,91.87,26.2,4.2,73.5,24.5,0,0,0,0,1,0,0,0,1,1,0,0
2015-01-04,196,1646,1526,92.71,24.5,3.5,73.5,26.2,0,0,0,0,0,1,0,0,1,1,0,0
2015-01-05,421,4250,3730,87.76,26.2,-1.65,63.5,23.6,0,0,0,0,0,0,0,0,1,0,0,1


### (3) 데이터분할
* **세부요구사항**
    * 마지막 91일 간의 데이터를 검증 셋으로 만듭니다. (2022-10-01 ~ )
    * 이 기간의 날짜 리스트를 별도로 저장하여, 모델 검증시 시각화할 때 활용합니다.

In [11]:
df1=df1.reset_index()

#### 1) x, y 나누기

In [137]:
# train_x=df1.loc[ df1['Date']<'2022-10-01'].drop(columns='target')
# train_y=df1['target'].loc[ df1['Date']<'2022-10-01']
# test_x=df1.loc['2022-10-01'<=df1['Date']].drop(columns='target')
# test_y=df1['target'].loc['2022-10-01'<=df1['Date']]

In [12]:
# target 확인
target = 'target'

# 데이터 분리
x = df1.drop(target, axis=1)
y = df1.loc[:, target]

In [13]:
x_date=x['Date']

In [14]:
x=x.drop(columns='Date')

#### 2) train : validation 나누기
* 힌트 : train_test_split(  ,   ,  test_size = 91, shuffle = False) 

In [15]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=91, shuffle=False)

In [16]:
x_train

Unnamed: 0,car_cnt,request_cnt,ride_cnt,ride_pro,waiting_time,temp,humidity,weekday_1,weekday_2,weekday_3,weekday_4,weekday_5,weekday_6,season_spring,season_summer,season_winter,is_holiday_1,covid_1,is_rainy_1
0,213,1023,924,90.32,23.2,-5.45,45.5,0,0,1,0,0,0,0,0,1,1,0,0
1,420,3158,2839,89.90,17.2,-3.40,55.0,0,0,0,1,0,0,0,0,1,0,0,0
2,209,1648,1514,91.87,26.2,4.20,73.5,0,0,0,0,1,0,0,0,1,1,0,0
3,196,1646,1526,92.71,24.5,3.50,73.5,0,0,0,0,0,1,0,0,1,1,0,0
4,421,4250,3730,87.76,26.2,-1.65,63.5,0,0,0,0,0,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2825,642,5947,5039,84.73,45.5,20.90,59.5,0,0,0,0,0,0,0,0,0,0,1,0
2826,667,6044,5087,84.17,43.6,21.05,66.0,1,0,0,0,0,0,0,0,0,0,1,0
2827,670,6182,5176,83.73,44.7,21.40,64.0,0,1,0,0,0,0,0,0,0,0,1,0
2828,655,5981,5008,83.73,42.9,21.30,63.0,0,0,1,0,0,0,0,0,0,0,1,0


### (4) Scaling
* KNN, SVM 알고리즘 및 DL을 적용하기 위해서는 스케일링을 해야 합니다.

In [68]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(x_train)
x_train_mm = scaler.transform(x_train)
x_test_mm = scaler.transform(x_test)

In [73]:
print(x_train_mm.shape,y_train.shape)
print(x_test_mm.shape,y_test.shape)

(2830, 19) (2830,)
(91, 19) (91,)


## 3.모델링
* **세부요구사항**
    * 머신러닝 알고리즘 중 3가지 이상을 사용하여 모델을 만들고 튜닝을 수행합니다.
    * 딥러닝 모델 구조 2가지 이상을 설계하고 모델을 생성합니다.
    * 성능 측정은 MAE, MAPE로 수행합니다.
    * 모델링 후 실제값과 예측값을 시각화(라인차트)하여 분석합니다.

In [19]:
# 불러오기
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.metrics import mean_absolute_error, r2_score

### (1) 머신러닝

#### 1) 모델1

### LinearRegression

In [20]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
import matplotlib.pyplot as plt

# 선형 회귀 모델 생성
linear_reg = LinearRegression()

# 모델 훈련
linear_reg.fit(x_train_mm, y_train)

# 예측
y_pred = linear_reg.predict(x_train_mm)

# 성능 측정
mae = mean_absolute_error(y_train, y_pred)
mape = mean_absolute_percentage_error(y_train, y_pred)

# 결과 출력
print("선형 회귀 MAE:", mae)
print("선형 회귀 MAPE:", mape)

선형 회귀 MAE: 6.088531252545338
선형 회귀 MAPE: 0.1598164312874149


In [21]:
from sklearn.model_selection import cross_val_predict
model = LinearRegression()

y_pred_cv = cross_val_predict(model, x_train_mm, y_train, cv=5)

mae_cv = mean_absolute_error(y_train, y_pred_cv)
mape_cv = mean_absolute_percentage_error(y_train, y_pred_cv)

# 교차 검증 MAE 출력
print("교차 검증 MAE:", mae_cv)
print("교차 검증 MAPE:", mape_cv)

교차 검증 MAE: 6.697777328708821
교차 검증 MAPE: 0.17478797146119907


In [22]:
#성능평가
y_pred = linear_reg.predict(x_test_mm)

mae = mean_absolute_error(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)
print("선형 회귀 MAE:", mae)
print("선형 회귀 MAPE:", mape)

선형 회귀 MAE: 4.661405273549796
선형 회귀 MAPE: 0.11588120152168127


In [266]:

# # 실제값과 예측값 시각화
# plt.plot(y_train, label='실제값', marker='o')
# plt.plot(y_pred, label='예측값', marker='x')
# plt.legend()
# plt.xlabel('샘플 인덱스')
# plt.ylabel('값')
# plt.title('선형 회귀 결과 시각화')
# plt.show()


#### 2) 모델2

### RandomForestRegressor

In [23]:
from sklearn.ensemble import RandomForestRegressor

random_forest = RandomForestRegressor(n_estimators=300, min_samples_split=2 ,random_state=42)
random_forest.fit(x_train_mm, y_train)

y_pred_rf = random_forest.predict(x_train_mm)

mae_rf = mean_absolute_error(y_train, y_pred_rf)
mape_rf = mean_absolute_percentage_error(y_train, y_pred_rf)

print("랜덤 포레스트 MAE:", mae_rf)
print("랜덤 포레스트 MAPE:", mape_rf)

랜덤 포레스트 MAE: 1.9829982332155456
랜덤 포레스트 MAPE: 0.05069176656006825


In [543]:
# from sklearn.ensemble import RandomForestRegressor

# random_forest = RandomForestRegressor(n_estimators=600, min_samples_split=2,random_state=42)
# random_forest.fit(x_train_mm, y_train)

# y_pred_cv = cross_val_predict(model, x_train, y_train, cv=5)

# mae_cv = mean_absolute_error(y_train, y_pred_cv)
# mape_cv = mean_absolute_percentage_error(y_train, y_pred_cv)

# # 교차 검증 MAE 출력
# print("교차 검증 MAE:", mae_cv)
# print("교차 검증 MAPE:", mape_cv)

In [24]:
#성능평가
y_pred = random_forest.predict(x_test_mm)

mae = mean_absolute_error(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)
print("random_forest MAE:", mae)
print("random_forest MAPE:", mape)

random_forest MAE: 4.617249084249083
random_forest MAPE: 0.11415560926071218


#### 3) 모델3

### GradientBoostingRegressor

In [28]:
from sklearn.ensemble import GradientBoostingRegressor

gradient_boosting = GradientBoostingRegressor(random_state=1)
gradient_boosting.fit(x_train_mm, y_train)

y_pred_gb = gradient_boosting.predict(x_train_mm)

mae_gb = mean_absolute_error(y_train, y_pred_gb)
mape_gb = mean_absolute_percentage_error(y_train, y_pred_gb)

print("Gradient Boosting MAE:", mae_gb)
print("Gradient Boosting MAPE:", mape_gb)

Gradient Boosting MAE: 5.024564278322307
Gradient Boosting MAPE: 0.12989506518434207


In [26]:
y_pred_cv = cross_val_predict(gradient_boosting, x_train_mm, y_train, cv=5)

mae_cv = mean_absolute_error(y_train, y_pred_cv)
mape_cv = mean_absolute_percentage_error(y_train, y_pred_cv)

# 교차 검증 MAE 출력
print("교차 검증 MAE:", mae_cv)
print("교차 검증 MAPE:", mape_cv)

교차 검증 MAE: 6.635269827285739
교차 검증 MAPE: 0.17442999295777317


In [29]:
#성능평가
y_pred = gradient_boosting.predict(x_test_mm)

mae = mean_absolute_error(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)
print("Gradient Boosting MAE:", mae)
print("Gradient Boosting MAPE:", mape)

Gradient Boosting MAE: 4.747481698302151
Gradient Boosting MAPE: 0.11583633630803808


#### 4) 모델4

### XGBRegressor

In [30]:
xgb_model = XGBRegressor(n_estimators=400,learning_rate=0.03,max_depth=4, colsample_bytree=0.7, random_state=1)

# 모델 훈련
xgb_model.fit(x_train_mm, y_train)

# 예측
y_pred_xgb = xgb_model.predict(x_train_mm)

# 성능 측정 (MAE 및 MAPE)
mae_xgb = mean_absolute_error(y_train, y_pred_xgb)
mape_xgb = mean_absolute_percentage_error(y_train, y_pred_xgb)

# 결과 출력
print("XGBoost MAE:", mae_xgb)
print("XGBoost MAPE:", mape_xgb)

XGBoost MAE: 4.374048570289207
XGBoost MAPE: 0.11345968856126082


In [31]:
# xgb_model = XGBRegressor(n_estimators=200,learning_rate=0.1,colsample_bytree=0.8, random_state=1)
# y_pred_cv = cross_val_predict(xgb_model, x_train_mm, y_train, cv=20)

# mae_cv = mean_absolute_error(y_train, y_pred_cv)
# mape_cv = mean_absolute_percentage_error(y_train, y_pred_cv)

# # 교차 검증 MAE 출력
# print("교차 검증 MAE:", mae_cv)
# print("교차 검증 MAPE:", mape_cv)

In [336]:
# param_grid = {
#     'n_estimators': [100, 200, 300],  # Number of boosting stages to be used
#     'learning_rate': [0.01, 0.1, 0.2],  # Step size shrinking to prevent overfitting
#     'max_depth': [3, 4, 5],  # Maximum depth of the individual trees
#     'min_samples_split': [2, 5, 10],  # Minimum number of samples required to split an internal node
# }

In [32]:
# Make predictions on the test data
y_pred = xgb_model.predict(x_test_mm)

# Calculate performance metrics
mae = mean_absolute_error(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)
print("XGBoost MAE:", mae)
print("XGBoost MAPE:", mape)

XGBoost MAE: 4.688563671740857
XGBoost MAPE: 0.11550955499651087


#### 4) 모델5

### LGBMRegressor

In [33]:
lgb_model = LGBMRegressor(n_estimators=300,learning_rate=0.005, random_state=1)

# 모델 훈련
lgb_model.fit(x_train_mm, y_train)

# 예측
y_pred_lgb = lgb_model.predict(x_train_mm)

# 성능 측정 (MAE 및 MAPE)
mae_lgb = mean_absolute_error(y_train, y_pred_lgb)
mape_lgb = mean_absolute_percentage_error(y_train, y_pred_lgb)

# 결과 출력
print("LightGBM MAE:", mae_lgb)
print("LightGBM MAPE:", mape_lgb)

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1693
[LightGBM] [Info] Number of data points in the train set: 2830, number of used features: 19
[LightGBM] [Info] Start training from score 40.208127
LightGBM MAE: 5.83609696488935
LightGBM MAPE: 0.1582688498751461


In [34]:
#성능평가
y_pred = lgb_model.predict(x_test_mm)

mae = mean_absolute_error(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)
print("LightGBM MAE:", mae)
print("LightGBM MAPE:", mape)

LightGBM MAE: 4.854484891443208
LightGBM MAPE: 0.11541850126598355


### (2) 딥러닝

In [130]:
pip install tensorflow

Note: you may need to restart the kernel to use updated packages.


#### 1) 모델1

In [35]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error
import matplotlib.pyplot as plt

In [36]:
model = Sequential()
model.add(Dense(128, input_dim=x_train_mm.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))

model.compile(loss='mean_squared_error', optimizer=Adam(lr=0.001))

history = model.fit(x_train_mm, y_train, epochs=100, batch_size=32, verbose=1, validation_split=0.2)

y_pred_nn = model.predict(x_train_mm)

mae_nn = mean_absolute_error(y_train, y_pred_nn)
mape_nn = mean_absolute_percentage_error(y_train, y_pred_nn)



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [37]:
# 결과 출력
print("MLP MAE:", mae_nn)
print("MLP MAPE:", mape_nn)

MLP MAE: 5.347209016644913
MLP MAPE: 0.13952068293667338


In [38]:
test_loss = model.evaluate(x_test_mm, y_test)

# 테스트 데이터의 MAE 및 MAPE 계산
y_pred_test = model.predict(x_test_mm)
mae_test = mean_absolute_error(y_test, y_pred_test)
mape_test = mean_absolute_percentage_error(y_test, y_pred_test)

# 결과 출력
print("Test Loss:", test_loss)
print("Test MAE:", mae_test)
print("Test MAPE:", mape_test)

Test Loss: 46.57362747192383
Test MAE: 5.206529910747822
Test MAPE: 0.12783942209599447


---

#### 2) 모델2

In [39]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error


In [46]:

model = Sequential()

model.add(Dense(128, input_dim=x_train_mm.shape[1], activation='relu'))
model.add(BatchNormalization())

model.add(Dense(32, activation='swish'))
model.add(Dropout(0.5))  

model.add(Dense(1, activation='swish'))

optimizer = Adam(learning_rate=0.002)
model.compile(loss='mean_squared_error', optimizer=optimizer)

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    x_train_mm, y_train, 
    epochs=300, batch_size=32, verbose=1, 
    validation_split=0.2,
    callbacks=[early_stopping]
)


y_pred_nn = model.predict(x_train_mm)


mae_nn = mean_absolute_error(y_train, y_pred_nn)
mape_nn = mean_absolute_percentage_error(y_train, y_pred_nn)
print("Neural Network MAE:", mae_nn)
print("Neural Network MAPE:", mape_nn)


Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Neural Network MAE: 5.899692762125507
Neural Network MAPE: 0.15417152751701216


In [47]:
test_loss = model.evaluate(x_test_mm, y_test)

# 테스트 데이터의 MAE 및 MAPE 계산
y_pred_test = model.predict(x_test_mm)
mae_test = mean_absolute_error(y_test, y_pred_test)
mape_test = mean_absolute_percentage_error(y_test, y_pred_test)

# 결과 출력
print("Test Loss:", test_loss)
print("Test MAE:", mae_test)
print("Test MAPE:", mape_test)

Test Loss: 50.18401336669922
Test MAE: 5.3835516290350265
Test MAPE: 0.12460078655301454


In [461]:
# # 실제값과 예측값 시각화
# plt.plot(y_train, label='실제값', marker='o')
# plt.plot(y_pred_nn, label='예측값', marker='x')
# plt.legend()
# plt.xlabel('샘플 인덱스')
# plt.ylabel('값')
# plt.title('MLP 결과 시각화')
# plt.show()

___

#### 3) 모델3

In [51]:
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.backend import clear_session
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, MaxPool2D, BatchNormalization, Dropout

from keras.layers import Input, Conv2D, MaxPool2D, Flatten, Dense, BatchNormalization, Dropout


In [78]:
print(x_train_mm.shape, y_train.shape)

(2830, 19) (2830,)


In [123]:
X = tf.keras.Input(shape=[19])
H = tf.keras.layers.Flatten()(X)
H = tf.keras.layers.Dense(256, activation='swish')(H)
H = Dropout(0.25)(H)
H = tf.keras.layers.Dense(128, activation='swish')(H)
H = Dropout(0.25)(H)
H = tf.keras.layers.Dense(64, activation='swish')(H)
H = Dropout(0.25)(H)
H = tf.keras.layers.Dense(32, activation='swish')(H)
H = Dropout(0.25)(H)
Y = tf.keras.layers.Dense(1)(H)

model = tf.keras.Model(X, Y)
model.compile(loss='mse')
model.summary()

Model: "model_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_13 (InputLayer)       [(None, 19)]              0         
                                                                 
 flatten_12 (Flatten)        (None, 19)                0         
                                                                 
 dense_50 (Dense)            (None, 256)               5120      
                                                                 
 dropout_32 (Dropout)        (None, 256)               0         
                                                                 
 dense_51 (Dense)            (None, 128)               32896     
                                                                 
 dropout_33 (Dropout)        (None, 128)               0         
                                                                 
 dense_52 (Dense)            (None, 64)                825

In [124]:
es = EarlyStopping(monitor='val_loss',       # 얼리스토핑을 적용할 관측 대상
                   min_delta=0,              # Threshold. 설정 값 이상으로 변해야 성능 개선!
                   patience=10,               # 성능 개선이 없을 때, 몇 epochs 더 볼 것인가.
                   verbose=1,
                   restore_best_weights=True # 가장 성능이 좋은 epoch의 가중치로 돌려줌!
                   )

In [125]:
hist = model.fit(x_train_mm, y_train, epochs=10000, verbose=1,
                 validation_split=0.2, # training set에서 20%를 validation set으로 만듬!
                 callbacks=[es]        # 얼리스토핑 적용!
                 )

Epoch 1/10000
Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
Epoch 10/10000
Epoch 11/10000
Epoch 12/10000
Epoch 13/10000
Epoch 14/10000
Epoch 15/10000
Epoch 16/10000
Epoch 17/10000
Epoch 18/10000
Epoch 19/10000
Epoch 20/10000
Epoch 21/10000
Epoch 22/10000
Epoch 23/10000
Epoch 24/10000
Epoch 25/10000
Epoch 26/10000
Epoch 27/10000
Epoch 28/10000
Epoch 29/10000
Epoch 30/10000
Epoch 31/10000
Epoch 32/10000
Epoch 33/10000
Epoch 34/10000
Epoch 35/10000
Epoch 36/10000
Epoch 37/10000
Epoch 38/10000
Epoch 39/10000
Epoch 40/10000
Epoch 40: early stopping


In [126]:
import numpy as np
y_pred = model.predict(x_test_mm) 

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)

print("평균 제곱 오차 (MSE):", mse)
print("평균 절대 오차 (MAE):", mae)
print("평균 절대 백분율 오차 (MAPE):", mape)

평균 제곱 오차 (MSE): 38.82440784422278
평균 절대 오차 (MAE): 4.641844768314571
평균 절대 백분율 오차 (MAPE): 0.11009650776251766


In [None]:
model.get_weights()

---

## 4.모델 비교
* **세부요구사항**
    * 모델링 단계에서 생성한 모든 모델의 성능을 하나로 모아서 비교합니다.
    * 가장 성능이 높은 모델을 선정합니다.