---
# 본 ipynb파일은 CNN 예측모델 성능평가 전용 파일입니다.  

###  * train_data.csv와 test_data.csv의 전처리 과정과 모델 평가 함수를 담고 있습니다.

###  * train_data.csv, test_data.csv, 0.7947479486465454_cnn.h5 파일을 요구합니다.

###  * CNN 모델 구축 과정은 'cnn_model.ipynb'에 담겨있습니다.
---

In [1]:
import pandas as pd
from tensorflow import keras
import tensorflow as tf
import numpy as np
import os
import random
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from datetime import datetime, timedelta

In [2]:
train=pd.read_csv("./train_data.csv", encoding='cp949')
test=pd.read_csv("./test_data.csv", encoding='cp949')

In [3]:
train['regdate'] = pd.to_datetime(train['regdate'])
start_date = train['regdate'].min()  
end_date = train['regdate'].max() 
current_date = start_date 
for i in range(len(train)):
    if pd.isnull(train.loc[i, 'regdate']):
        train.loc[i, 'regdate'] = current_date
    current_date += timedelta(minutes=1)
    if current_date > end_date:
        current_date = start_date

In [4]:
test['regdate'] = pd.to_datetime(test['regdate'])
start_date = test['regdate'].min()  
end_date = test['regdate'].max() 
current_date = start_date  
for i in range(len(test)):
    if pd.isnull(test.loc[i, 'regdate']):
        test.loc[i, 'regdate'] = current_date
    current_date += timedelta(minutes=1)
    if current_date > end_date:
        current_date = start_date

In [5]:
daysofweek = []
for i in range(len(train)) :
    dayweek = train.loc[i, 'regdate'].dayofweek
    daysofweek.append(dayweek)

train['dayofweek'] = daysofweek

In [6]:
daysofweek = []
for i in range(len(test)) :
    dayweek = test.loc[i, 'regdate'].dayofweek
    daysofweek.append(dayweek)

test['dayofweek'] = daysofweek

In [7]:
train['time'] = train['regdate'].dt.strftime('%H:%M').str.split().str[0]
train['time'] = train['time'].str.split(':').apply(lambda x: int(x[0]) * 60 + int(x[1]))
test['time'] = test['regdate'].dt.strftime('%H:%M').str.split().str[0]
test['time'] = test['time'].str.split(':').apply(lambda x: int(x[0]) * 60 + int(x[1]))

In [8]:
def make_area(df) :
    date_range = df['regdate'].max() - df['regdate'].min()
    lowsbydate = ((date_range.days + 1) * 1440)
    group_num = len(train) / lowsbydate
    for i in range(int(group_num)):
        start_index = i * lowsbydate
        end_index = (i + 1) * lowsbydate
        df.loc[start_index : end_index, 'area'] = i + 1

In [9]:
train_nona = train.copy()
make_area(train_nona)

test_nona = test.copy()
make_area(test_nona)

In [10]:
def replace_missing_with_mean(dataframe, dependent_col, independent_cols):
    for col in independent_cols:
        mean = dataframe.groupby(dependent_col)[col].transform('mean')
        dataframe[col].fillna(mean, inplace=True)

In [11]:
f_list=['temp','humi','co2','dust_pm1','dust_pm_25','dust_pm_10','illuminance','voc','eco2','PIR']
replace_missing_with_mean(train_nona, 'area', f_list )
replace_missing_with_mean(test_nona, 'area', f_list )

In [12]:
X_train = train_nona.drop('재실인원', axis=1)
y_train = train_nona['재실인원']
X_test = test_nona.drop('재실인원', axis=1)
y_test = test_nona['재실인원']

In [13]:
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

In [14]:
X_train = X_train.drop(['regdate','area','dayofweek'], axis=1)
X_valid = X_valid.drop(['regdate','area','dayofweek'], axis=1)
X_test = X_test.drop(['regdate','area','dayofweek'], axis=1)

In [15]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)

In [16]:
y_train = np.array(y_train)
y_valid = np.array(y_valid)
y_test = np.array(y_test)

In [17]:
X_train = np.reshape(X_train, (len(X_train),11,1)).astype(float)
X_valid = np.reshape(X_valid, (len(X_valid),11,1)).astype(float)
X_test = np.reshape(X_test, (len(X_test),11,1)).astype(float)

In [18]:
reconstructed_model = keras.models.load_model("./model/0.7947479486465454_cnn.h5")

## MSE 값

In [19]:
reconstructed_model.evaluate(X_test, y_test)



[0.7947479486465454, 0.7947479486465454]

## RMSE 값

In [20]:
np.sqrt(reconstructed_model.evaluate(X_test, y_test))



array([0.89148637, 0.89148637])