# 제주도 교통량 예측

In [1]:
# 모듈 로딩
from keras import Sequential
from keras.layers import Dense, Dropout
import numpy as np

## [1] 데이터 로딩 및 전처리

In [2]:
train=np.load('./train_save.npy')
target=np.load('./target_save.npy')
test=np.load('./test_save.npy')

In [3]:
train.shape, target.shape

((4701217, 16), (4701217, 1))

In [4]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(train, target, random_state=42)

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, random_state=42)

In [5]:
x_train.shape, x_test.shape, x_val.shape

((2644434, 16), (1175305, 16), (881478, 16))

## [2] 모델 만들기

### [2-1] 모델 구상
- 입력 데이터 -> 16개 Features
- 출력 데이터 -> 1개 평균 속도(교통량)
- 학슥 방법: 회귀  


In [6]:
# 모델 만들기

model=Sequential()

model.add(Dense(16, activation='relu', input_shape=(16,)))

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(16, activation='relu'))

# 회귀 출력층
model.add(Dense(1, activation='linear'))
          
          
# 모델 구성 확인
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 16)                272       
                                                                 
 dense_1 (Dense)             (None, 32)                544       
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense_2 (Dense)             (None, 64)                2112      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_3 (Dense)             (None, 128)               8320      
                                                                 
 dropout_2 (Dropout)         (None, 128)               0

In [7]:
model.compile(loss='mse', optimizer='adam', metrics='mse')

In [8]:
# 모델 체크포인트 생성
from keras.callbacks import ModelCheckpoint

path='./model/{epoch:02d}-{val_mse:.4f}.hdf5'
model_ckpt = ModelCheckpoint(path)

In [10]:
# 모델 학습
model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_val, y_val), callbacks=[model_ckpt])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x270e8fb5d30>

## [3] 테스트 및 평가

In [11]:
import os
model_list=os.listdir('./model')

In [12]:
from keras.models import load_model

for m in model_list:
    model=load_model(f'./model/{m}')
    print(f'{m}:')
    model.evaluate(x_test, y_test)

01-0.5753.hdf5:
01-0.5849.hdf5:
02-0.6351.hdf5:
03-1.2532.hdf5:
04-1.2596.hdf5:
05-1.2256.hdf5:
06-1.1105.hdf5:
07-1.6822.hdf5:
08-2.2283.hdf5:
09-1.8009.hdf5:
10-1.7144.hdf5:
best 02-0.5720.hdf5:
best 1.0930 model.h5:


### [데이콘]

In [13]:
model=load_model('./model/01-0.5753.hdf5')

In [14]:
pre=model.predict(test)



In [15]:
pre[0]

array([1.1066463], dtype=float32)

In [16]:
import pandas as pd
test_df=pd.read_csv('./Data/sample_submission.csv')

In [17]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 291241 entries, 0 to 291240
Data columns (total 2 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   id      291241 non-null  object
 1   target  291241 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 4.4+ MB


In [18]:
pre.shape

(291241, 1)

In [19]:
test_df['target']=pre

In [20]:
test_df.head()

Unnamed: 0,id,target
0,TEST_000000,1.106646
1,TEST_000001,5.97167
2,TEST_000002,0.006869
3,TEST_000003,1.157146
4,TEST_000004,1.89408


In [21]:
test_df.to_csv('sample_submission3.csv', index=False)