In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split

In [2]:
# csvファイルからPandas DataFrameへ読み込み
train = pd.read_csv('train.csv', delimiter=',', low_memory=False)
test = pd.read_csv('test.csv', delimiter=',', low_memory=False)

# 冒頭を表示して確認
train.head()

Unnamed: 0,date_time,deg_C,relative_humidity,absolute_humidity,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,target_carbon_monoxide,target_benzene,target_nitrogen_oxides
0,2010-03-10 18:00:00,13.1,46.0,0.7578,1387.2,1087.8,1056.0,1742.8,1293.4,2.5,12.0,167.7
1,2010-03-10 19:00:00,13.2,45.3,0.7255,1279.1,888.2,1197.5,1449.9,1010.9,2.1,9.9,98.9
2,2010-03-10 20:00:00,12.6,56.2,0.7502,1331.9,929.6,1060.2,1586.1,1117.0,2.2,9.2,127.1
3,2010-03-10 21:00:00,11.0,62.4,0.7867,1321.0,929.0,1102.9,1536.5,1263.2,2.2,9.7,177.2
4,2010-03-10 22:00:00,11.9,59.0,0.7888,1272.0,852.7,1180.9,1415.5,1132.2,1.5,6.4,121.8


In [3]:
# trainデータを入力データとラベルに分割する
X = train.drop(['date_time'], axis=1).drop(['target_carbon_monoxide'], axis=1).drop(['target_benzene'], axis=1).drop(['target_nitrogen_oxides'], axis=1).values
Y_1 = train.target_carbon_monoxide.values  # carbon_monoxide(一酸化炭素)
Y_2 = train.target_benzene.values          # benzene(ベンゼン)
Y_3 = train.target_nitrogen_oxides.values  # nitrogen_oxides(窒素酸化物)

# 表示して確認
X, Y_1

(array([[1.3100e+01, 4.6000e+01, 7.5780e-01, ..., 1.0560e+03, 1.7428e+03,
         1.2934e+03],
        [1.3200e+01, 4.5300e+01, 7.2550e-01, ..., 1.1975e+03, 1.4499e+03,
         1.0109e+03],
        [1.2600e+01, 5.6200e+01, 7.5020e-01, ..., 1.0602e+03, 1.5861e+03,
         1.1170e+03],
        ...,
        [9.6000e+00, 3.4600e+01, 4.3100e-01, ..., 8.6190e+02, 8.8920e+02,
         1.1591e+03],
        [8.0000e+00, 4.0700e+01, 4.0850e-01, ..., 9.0850e+02, 9.1700e+02,
         1.2063e+03],
        [8.0000e+00, 4.1300e+01, 4.3750e-01, ..., 7.9710e+02, 8.8000e+02,
         1.2731e+03]]),
 array([2.5, 2.1, 2.2, ..., 1.6, 1.5, 1.4]))

In [4]:
# データを訓練データと検証データに分割する
def split_data(X, Y):
    # trainデータを訓練データと検証データに分割する
    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
    
    return x_train, x_test, y_train, y_test

In [5]:
# RMSLE カスタム評価関数 #####################
from keras import backend as K
msle = keras.metrics.MeanSquaredLogarithmicError()

def root_mean_squared_logarithmic_error(y_true, y_pred):
    return K.sqrt(msle(y_true, y_pred))

In [6]:
# RNN(LSTM)モデルをセット
def model_set():
    # RNN(LSTM)モデルを構築
    model = keras.Sequential([
            keras.layers.LSTM(1, batch_input_shape=(None, 8, 1), return_sequences=True),
    ])
    
    # モデルをコンパイルして構造を表示
    model.compile(optimizer='adam', #rmsprop
                  loss='mean_squared_error')
    model.summary()
    
    return model

# carbon_monoxide(一酸化炭素)について------------------------

In [7]:
# データを訓練データと検証データに分割する
x_train, x_test, y_train, y_test = split_data(X, Y_1)

# 形を表示して確認
((x_train.shape, x_test.shape), (y_train.shape, y_test.shape))

(((5688, 8), (1423, 8)), ((5688,), (1423,)))

In [8]:
# RNN(LSTM)モデルをセット
model_1 = model_set()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 8, 1)              12        
Total params: 12
Trainable params: 12
Non-trainable params: 0
_________________________________________________________________


In [9]:
# 学習させる
model_1.fit(x_train.reshape(-1, 8, 1), y_train, batch_size=256, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7fcd6387f2e0>

In [10]:
# 検証データへの精度
y_test_pred = model_1.predict(x_test.reshape(-1, 8, 1))[:,0]
print('RMSLE: %.3f' % root_mean_squared_logarithmic_error(y_test, y_test_pred))

RMSLE: 0.660


# benzene(ベンゼン)について---------------------------------

In [11]:
# データを訓練データと検証データに分割する
x_train, x_test, y_train, y_test = split_data(X, Y_2)

# 形を表示して確認
((x_train.shape, x_test.shape), (y_train.shape, y_test.shape))

(((5688, 8), (1423, 8)), ((5688,), (1423,)))

In [12]:
# RNN(LSTM)モデルをセット
model_2 = model_set()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 8, 1)              12        
Total params: 12
Trainable params: 12
Non-trainable params: 0
_________________________________________________________________


In [13]:
# 学習させる
model_2.fit(x_train.reshape(-1, 8, 1), y_train, batch_size=256, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x7fcd663f64c0>

In [14]:
# 検証データへの精度
y_test_pred = model_2.predict(x_test.reshape(-1, 8, 1))[:,0]
print('RMSLE: %.3f' % root_mean_squared_logarithmic_error(y_test, y_test_pred))

RMSLE: 1.360


# nitrogen_oxides(窒素酸化物)について-------------------------

In [15]:
# データを訓練データと検証データに分割する
x_train, x_test, y_train, y_test = split_data(X, Y_3)

# 形を表示して確認
((x_train.shape, x_test.shape), (y_train.shape, y_test.shape))

(((5688, 8), (1423, 8)), ((5688,), (1423,)))

In [16]:
# RNN(LSTM)モデルをセット
model_3 = model_set()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 8, 1)              12        
Total params: 12
Trainable params: 12
Non-trainable params: 0
_________________________________________________________________


In [17]:
# 学習させる
model_3.fit(x_train.reshape(-1, 8, 1), y_train, batch_size=256, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x7fcd634205e0>

In [18]:
# 検証データへの精度
y_test_pred = model_3.predict(x_test.reshape(-1, 8, 1))[:,0]
print('RMSLE: %.3f' % root_mean_squared_logarithmic_error(y_test, y_test_pred))

RMSLE: 2.831


# 提出データへの適用

In [22]:
# testデータを適用できる形に変換する
X_sb = test.drop(['date_time'], axis=1).values

# 表示して確認
X_sb

array([[8.0000e+00, 4.1300e+01, 4.3750e-01, ..., 7.9710e+02, 8.8000e+02,
        1.2731e+03],
       [5.1000e+00, 5.1700e+01, 4.5640e-01, ..., 6.8790e+02, 9.7280e+02,
        1.7140e+03],
       [5.8000e+00, 5.1500e+01, 4.6890e-01, ..., 6.9370e+02, 9.4190e+02,
        1.3008e+03],
       ...,
       [2.6600e+01, 1.9000e+01, 6.4060e-01, ..., 5.7280e+02, 1.2534e+03,
        1.0811e+03],
       [2.9100e+01, 1.2700e+01, 5.1390e-01, ..., 7.0200e+02, 1.0098e+03,
        8.0850e+02],
       [2.7900e+01, 1.3500e+01, 5.0280e-01, ..., 6.0820e+02, 1.0613e+03,
        8.1600e+02]])

In [37]:
# 各目的変数を回帰
predict_1 = pd.DataFrame(model_1.predict(X_sb.reshape(-1, 8, 1))[:,0])
predict_2 = pd.DataFrame(model_2.predict(X_sb.reshape(-1, 8, 1))[:,0])
predict_3 = pd.DataFrame(model_3.predict(X_sb.reshape(-1, 8, 1))[:,0])

# データの形を確認
predict_1.shape, predict_2.shape, predict_3.shape

((2247, 1), (2247, 1), (2247, 1))

In [43]:
# 結果を提出形式に変形
df_predictions = pd.concat([predict_1, predict_2, predict_3],axis=1)
df_predictions.columns = ['target_carbon_monoxide', 'target_benzene', 'target_nitrogen_oxides']
submit_data = pd.concat([test.date_time, df_predictions],axis=1)
submit_data = submit_data.set_index('date_time')
submit_data.head()

Unnamed: 0_level_0,target_carbon_monoxide,target_benzene,target_nitrogen_oxides
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2011-01-01 00:00:00,0.688739,0.700819,0.700616
2011-01-01 01:00:00,0.66769,0.66636,0.672191
2011-01-01 02:00:00,0.673271,0.67746,0.681488
2011-01-01 03:00:00,0.666857,0.664558,0.670657
2011-01-01 04:00:00,0.662507,0.654549,0.661999


In [44]:
# CSVファイルとして出力
submit_data.to_csv("submission_RNN_LSTM.csv")