# 제주도 교통량 예측

In [1]:
# 모듈 로딩
from keras import Sequential
from keras.layers import Dense, Dropout
import numpy as np

## [1] 데이터 로딩 및 전처리

In [2]:
train=np.load('./train_save.npy')
target=np.load('./target_save.npy')
test=np.load('./test_save.npy')

In [3]:
train.shape, target.shape

((4701217, 16), (4701217, 1))

In [4]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(train, target, random_state=42)

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, random_state=42)

In [5]:
x_train.shape, x_test.shape, x_val.shape

((2644434, 16), (1175305, 16), (881478, 16))

## [2] 모델 만들기

### [2-1] 모델 구상
- 입력 데이터 -> 16개 Features
- 출력 데이터 -> 1개 평균 속도(교통량)
- 학슥 방법: 회귀  


In [28]:
# 모델 만들기

model=Sequential()

model.add(Dense(16, activation='relu', input_shape=(16,)))

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(16, activation='relu'))

# 회귀 출력층
model.add(Dense(1, activation='linear'))
          
          
# 모델 구성 확인
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_44 (Dense)            (None, 16)                272       
                                                                 
 dense_45 (Dense)            (None, 32)                544       
                                                                 
 dropout_7 (Dropout)         (None, 32)                0         
                                                                 
 dense_46 (Dense)            (None, 64)                2112      
                                                                 
 dropout_8 (Dropout)         (None, 64)                0         
                                                                 
 dense_47 (Dense)            (None, 128)               8320      
                                                                 
 dropout_9 (Dropout)         (None, 128)              

In [29]:
model.compile(loss='mse', optimizer='adam', metrics='mse')

In [30]:
# 모델 체크포인트 생성
from keras.callbacks import ModelCheckpoint

path='./model/{epoch:02d}-{val_mse:.4f}.hdf5'
model_ckpt = ModelCheckpoint(path)

In [None]:
# 모델 학습
model.fit(x_train, y_train, epochs=10, batch_size=64, validation_data=(x_val, y_val), callbacks=[model_ckpt])

## [3] 테스트 및 평가

In [32]:
import os
model_list=os.listdir('./model')

In [33]:
from keras.models import load_model

for m in model_list:
    model=load_model(f'./model/{m}')
    print(f'{m}:')
    model.evaluate(x_test, y_test)

01-0.6082.hdf5:
01-0.9780.hdf5:
02-0.5720.hdf5:
02-0.9517.hdf5:
03-0.5902.hdf5:
03-0.8167.hdf5:
04-0.5838.hdf5:
05-0.7630.hdf5:
06-1.3808.hdf5:
07-1.1703.hdf5:
08-1.1624.hdf5:
09-0.9461.hdf5:
09-1.0903.hdf5:
10-0.9615.hdf5:
best 1.0930 model.h5:


### [데이콘]

In [34]:
model=load_model('./model/02-0.5720.hdf5')

In [35]:
pre=model.predict(test)



In [36]:
pre[0]

array([0.9747555], dtype=float32)

In [37]:
import pandas as pd
test_df=pd.read_csv('./Data/sample_submission.csv')

In [38]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 291241 entries, 0 to 291240
Data columns (total 2 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   id      291241 non-null  object
 1   target  291241 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 4.4+ MB


In [39]:
pre.shape

(291241, 1)

In [40]:
test_df['target']=pre

In [41]:
test_df.head()

Unnamed: 0,id,target
0,TEST_000000,0.974756
1,TEST_000001,5.979795
2,TEST_000002,0.05471
3,TEST_000003,0.975721
4,TEST_000004,2.003477


In [42]:
test_df.to_csv('sample_submission2.csv', index=False)