# AutoKerasを使った時系列予測のチュートリアル

In [1]:
import pandas as pd
import tensorflow as tf
import autokeras as ak

## データロード

In [3]:
dataset=pd.read_csv("AirQualityUCI.csv",sep=";")
dataset = dataset[dataset.columns[:-2]]
dataset = dataset.dropna()
dataset = dataset.replace(",", ".", regex=True)
print(dataset)

            Date      Time CO(GT)  PT08.S1(CO)  NMHC(GT) C6H6(GT)  \
0     10/03/2004  18.00.00    2.6       1360.0     150.0     11.9   
1     10/03/2004  19.00.00      2       1292.0     112.0      9.4   
2     10/03/2004  20.00.00    2.2       1402.0      88.0      9.0   
3     10/03/2004  21.00.00    2.2       1376.0      80.0      9.2   
4     10/03/2004  22.00.00    1.6       1272.0      51.0      6.5   
...          ...       ...    ...          ...       ...      ...   
9352  04/04/2005  10.00.00    3.1       1314.0    -200.0     13.5   
9353  04/04/2005  11.00.00    2.4       1163.0    -200.0     11.4   
9354  04/04/2005  12.00.00    2.4       1142.0    -200.0     12.4   
9355  04/04/2005  13.00.00    2.1       1003.0    -200.0      9.5   
9356  04/04/2005  14.00.00    2.2       1071.0    -200.0     11.9   

      PT08.S2(NMHC)  NOx(GT)  PT08.S3(NOx)  NO2(GT)  PT08.S4(NO2)  \
0            1046.0    166.0        1056.0    113.0        1692.0   
1             955.0    103.0     

In [5]:
# ラーニング用と検証用にスプリット
val_split = int(len(dataset) * 0.7)
data_train = dataset[:val_split]
validation_data = dataset[val_split:]




In [10]:
data_x = data_train[
    [
        "CO(GT)",
        "PT08.S1(CO)",
        "NMHC(GT)",
        "C6H6(GT)",
        "PT08.S2(NMHC)",
        "NOx(GT)",
        "PT08.S3(NOx)",
        "NO2(GT)",
        "PT08.S4(NO2)",
        "PT08.S5(O3)",
        "T",
        "RH",
    ]
].astype("float64")

display(data_x)

Unnamed: 0,CO(GT),PT08.S1(CO),NMHC(GT),C6H6(GT),PT08.S2(NMHC),NOx(GT),PT08.S3(NOx),NO2(GT),PT08.S4(NO2),PT08.S5(O3),T,RH
0,2.6,1360.0,150.0,11.9,1046.0,166.0,1056.0,113.0,1692.0,1268.0,13.6,48.9
1,2.0,1292.0,112.0,9.4,955.0,103.0,1174.0,92.0,1559.0,972.0,13.3,47.7
2,2.2,1402.0,88.0,9.0,939.0,131.0,1140.0,114.0,1555.0,1074.0,11.9,54.0
3,2.2,1376.0,80.0,9.2,948.0,172.0,1092.0,122.0,1584.0,1203.0,11.0,60.0
4,1.6,1272.0,51.0,6.5,836.0,131.0,1205.0,116.0,1490.0,1110.0,11.2,59.6
...,...,...,...,...,...,...,...,...,...,...,...,...
6544,1.7,1111.0,-200.0,7.6,884.0,310.0,751.0,81.0,1280.0,1025.0,13.1,66.2
6545,2.3,1206.0,-200.0,9.8,969.0,363.0,701.0,95.0,1365.0,1178.0,14.3,63.3
6546,2.1,1202.0,-200.0,9.5,958.0,302.0,732.0,92.0,1330.0,1181.0,16.0,58.1
6547,2.7,1261.0,-200.0,11.2,1022.0,424.0,635.0,113.0,1407.0,1250.0,17.3,53.9


In [8]:
data_x_val = validation_data[
    [
        "CO(GT)",
        "PT08.S1(CO)",
        "NMHC(GT)",
        "C6H6(GT)",
        "PT08.S2(NMHC)",
        "NOx(GT)",
        "PT08.S3(NOx)",
        "NO2(GT)",
        "PT08.S4(NO2)",
        "PT08.S5(O3)",
        "T",
        "RH",
    ]
].astype("float64")

display(data_x_val)

Unnamed: 0,CO(GT),PT08.S1(CO),NMHC(GT),C6H6(GT),PT08.S2(NMHC),NOx(GT),PT08.S3(NOx),NO2(GT),PT08.S4(NO2),PT08.S5(O3),T,RH
6549,1.2,997.0,-200.0,4.6,742.0,179.0,911.0,82.0,1180.0,899.0,16.7,54.7
6550,1.4,1049.0,-200.0,5.5,790.0,221.0,870.0,96.0,1183.0,1019.0,15.4,58.9
6551,1.9,1082.0,-200.0,6.5,834.0,303.0,800.0,107.0,1207.0,1057.0,15.2,58.0
6552,2.6,1177.0,-200.0,10.3,987.0,429.0,692.0,119.0,1324.0,1229.0,16.2,54.5
6553,2.4,1166.0,-200.0,8.9,935.0,357.0,756.0,114.0,1301.0,1135.0,16.5,55.3
...,...,...,...,...,...,...,...,...,...,...,...,...
9352,3.1,1314.0,-200.0,13.5,1101.0,472.0,539.0,190.0,1374.0,1729.0,21.9,29.3
9353,2.4,1163.0,-200.0,11.4,1027.0,353.0,604.0,179.0,1264.0,1269.0,24.3,23.7
9354,2.4,1142.0,-200.0,12.4,1063.0,293.0,603.0,175.0,1241.0,1092.0,26.9,18.3
9355,2.1,1003.0,-200.0,9.5,961.0,235.0,702.0,156.0,1041.0,770.0,28.3,13.5


## テストデータ

In [9]:
# Data with train data and the unseen data from subsequent time steps.
data_x_test = dataset[
    [
        "CO(GT)",
        "PT08.S1(CO)",
        "NMHC(GT)",
        "C6H6(GT)",
        "PT08.S2(NMHC)",
        "NOx(GT)",
        "PT08.S3(NOx)",
        "NO2(GT)",
        "PT08.S4(NO2)",
        "PT08.S5(O3)",
        "T",
        "RH",
    ]
].astype("float64")

display(data_x_test)

Unnamed: 0,CO(GT),PT08.S1(CO),NMHC(GT),C6H6(GT),PT08.S2(NMHC),NOx(GT),PT08.S3(NOx),NO2(GT),PT08.S4(NO2),PT08.S5(O3),T,RH
0,2.6,1360.0,150.0,11.9,1046.0,166.0,1056.0,113.0,1692.0,1268.0,13.6,48.9
1,2.0,1292.0,112.0,9.4,955.0,103.0,1174.0,92.0,1559.0,972.0,13.3,47.7
2,2.2,1402.0,88.0,9.0,939.0,131.0,1140.0,114.0,1555.0,1074.0,11.9,54.0
3,2.2,1376.0,80.0,9.2,948.0,172.0,1092.0,122.0,1584.0,1203.0,11.0,60.0
4,1.6,1272.0,51.0,6.5,836.0,131.0,1205.0,116.0,1490.0,1110.0,11.2,59.6
...,...,...,...,...,...,...,...,...,...,...,...,...
9352,3.1,1314.0,-200.0,13.5,1101.0,472.0,539.0,190.0,1374.0,1729.0,21.9,29.3
9353,2.4,1163.0,-200.0,11.4,1027.0,353.0,604.0,179.0,1264.0,1269.0,24.3,23.7
9354,2.4,1142.0,-200.0,12.4,1063.0,293.0,603.0,175.0,1241.0,1092.0,26.9,18.3
9355,2.1,1003.0,-200.0,9.5,961.0,235.0,702.0,156.0,1041.0,770.0,28.3,13.5


In [11]:
data_y = data_train["AH"].astype("float64")

data_y_val = validation_data["AH"].astype("float64")

print(data_x.shape)  # (6549, 12)
print(data_y.shape)  # (6549,)

(6549, 12)
(6549,)


In [12]:
predict_from = 1
predict_until = 10
lookback = 3
clf = ak.TimeseriesForecaster(
    lookback=lookback,
    predict_from=predict_from,
    predict_until=predict_until,
    max_trials=1,
    objective="val_loss",
)
# Train the TimeSeriesForecaster with train data
clf.fit(
    x=data_x,
    y=data_y,
    validation_data=(data_x_val, data_y_val),
    batch_size=32,
    epochs=10,
)
# Predict with the best model(includes original training data).
predictions = clf.predict(data_x_test)
print(predictions.shape)
# Evaluate the best model with testing data.
print(clf.evaluate(data_x_val, data_y_val))

Trial 1 Complete [00h 00m 58s]
val_loss: 2633.755615234375

Best val_loss So Far: 2633.755615234375
Total elapsed time: 00h 00m 58s
INFO:tensorflow:Oracle triggered exit
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: ./time_series_forecaster/best_model/assets


INFO:tensorflow:Assets written to: ./time_series_forecaster/best_model/assets


(10, 1)
[2618.830078125, 2618.830078125]
