In [2]:
import os
import datetime

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from keras_tuner.tuners import RandomSearch
from keras_tuner import Objective
from keras_tuner.engine.hyperparameters import HyperParameters

In [3]:
df = pd.read_csv('extracted/high/AMS.csv')
df['dt'] = pd.to_datetime(df['dt'])

In [4]:
df = df[(df['dt'].dt.year>2008) & (df['dt'].dt.year<2021)]

In [5]:
df = df[['dt','Temperature']]
date_time = pd.to_datetime(df.pop('dt'))

In [6]:
timestamp_s = date_time.map(datetime.datetime.timestamp)

day = 24*60*60
year = (365.2425)*day

df['Day sin'] = np.sin(timestamp_s * (2 * np.pi / day))
df['Day cos'] = np.cos(timestamp_s * (2 * np.pi / day))
df['Year sin'] = np.sin(timestamp_s * (2 * np.pi / year))
df['Year cos'] = np.cos(timestamp_s * (2 * np.pi / year))

In [7]:
df = df.set_index(pd.DatetimeIndex(date_time))

In [10]:
historic_interval = pd.DataFrame()
for year in range(2010,2020):
    for month in range(1,13):
            for hour in range(24):  
                    points = df.loc[(df.index.hour==hour) & (df.index.month==month) & (df.index.year==year)]
                    mean = points['Temperature'].mean()
                    historic_interval = historic_interval.append({
                            'Mean':mean,
                            'Day sin':points['Day sin'].mean(),
                            'Day cos':points['Day cos'].mean(),
                            'Year sin':points['Year sin'].mean(),
                            'Year cos':points['Year cos'].mean()}, ignore_index=True)

In [11]:
Backward = 12*24
Forward = 12*24
interest = 'Mean'

In [13]:
train_df = historic_interval[:-24*12*4]
val_df = historic_interval[-24*12*4:-24*12]
test_df = historic_interval[-24*12:]

In [14]:
train_df

Unnamed: 0,Day cos,Day sin,Mean,Year cos,Year sin
0,9.659258e-01,-2.588190e-01,-0.084194,0.954356,0.256421
1,1.000000e+00,-5.187689e-12,-0.171935,0.954172,0.257105
2,9.659258e-01,2.588190e-01,-0.290968,0.953988,0.257789
3,8.660254e-01,5.000000e-01,-0.377097,0.953803,0.258473
4,7.071068e-01,7.071068e-01,-0.746452,0.953617,0.259156
...,...,...,...,...,...
1723,-5.584381e-12,-1.000000e+00,9.032903,0.953706,-0.258828
1724,2.588190e-01,-9.659258e-01,8.951935,0.953892,-0.258145
1725,5.000000e-01,-8.660254e-01,8.793871,0.954077,-0.257461
1726,7.071068e-01,-7.071068e-01,8.674194,0.954261,-0.256777


In [10]:
train_df = train_df[[interest,'Day sin','Day cos','Year sin','Year cos']]
val_df = val_df[[interest,'Day sin','Day cos','Year sin','Year cos']]

In [11]:
from sklearn.preprocessing import RobustScaler

temp_transformer = RobustScaler()
temp_transformer = temp_transformer.fit(train_df[[interest]])
train_df[interest] = temp_transformer.transform(train_df[[interest]])
val_df[interest] = temp_transformer.transform(val_df[[interest]])

In [12]:
train_df = np.array(train_df)
val_df = np.array(val_df)
test_df = np.array(test_df)

In [13]:
X_train, y_train = [],[]
for i in range(Backward, len(train_df)-Forward):
    X_train.append(train_df[i-Backward:i]) 
    y_train.append(train_df[i:i+Forward,0])

In [14]:
X_train, y_train = np.array(X_train),np.array(y_train)

In [15]:
X_train.shape, y_train.shape

((1152, 288, 5), (1152, 288))

In [16]:
X_val, y_val = [],[]
for i in range(Backward, len(val_df)-Forward):
    X_val.append(val_df[i-Backward:i]) 
    y_val.append(val_df[i:i+Forward,0])

In [17]:
X_val, y_val = np.array(X_val),np.array(y_val)

In [18]:
X_val.shape, y_val.shape

((288, 288, 5), (288, 288))

In [40]:
def build_model(hp):
    model = tf.keras.Sequential()
    n_layers=hp.Int('n layers', min_value=0, max_value=2, step=1)

    if(n_layers!=0):
        model.add(
            tf.keras.layers.LSTM(
                units=hp.Int(name="first LSTM", min_value=100, max_value=1600, step=100),
                input_shape=(X_train.shape[1], X_train.shape[2]),return_sequences=True
        )) 
    else:
        model.add(
            tf.keras.layers.LSTM(
                units=hp.Int(name="first LSTM", min_value=100, max_value=1600, step=100),
                input_shape=(X_train.shape[1], X_train.shape[2])
        )) 

    for i in range(n_layers):
        if(i==n_layers-1):
            model.add(
            tf.keras.layers.LSTM(
                units=hp.Int(name="LSTM l{} units".format(i), min_value=100, max_value=1600, step=100)
                )
            )
        else:
            model.add(
            tf.keras.layers.LSTM(
                units=hp.Int(name="LSTM l{} units".format(i), min_value=100, max_value=1600, step=100)
                ,return_sequence=True
            )
    )

    model.add(tf.keras.layers.Dropout(rate=hp.Float(name="dropout",min_value=0, max_value=0.9, step=0.1)))
    model.add(tf.keras.layers.Dense(units=Forward))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

In [41]:
model = build_model(HyperParameters())

In [21]:
tuner = RandomSearch(
    build_model,
    objective='loss',
    max_trials=3,
    directory='tuning',
)

tuner.search(x=X_train,
    y=y_train,
    epochs=1,
    validation_data=(X_val,y_val))

Trial 3 Complete [00h 03m 22s]
loss: 0.1451890766620636

Best loss So Far: 0.1451890766620636
Total elapsed time: 00h 06m 50s
INFO:tensorflow:Oracle triggered exit


In [None]:
X_val.shape

In [None]:
history = model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_data=(X_val, y_val),
    shuffle=False
)

In [None]:
plt.plot(history.history['loss'],label='train')
plt.plot(history.history['val_loss'],label='validation')
plt.legend()

In [None]:
X_test_final = np.array(val_df)[-12*24:]

In [None]:
y_pred = model.predict(X_test_final.reshape(1,288,5))

In [None]:
y_pred_inv = temp_transformer.inverse_transform(y_pred)

In [None]:
y_pred_inv = y_pred_inv.reshape(288)
test_df = test_df.reshape(288)

In [None]:
results = pd.DataFrame()
results['Actual'] = test_df
results['Prediction'] = y_pred_inv

In [None]:
historic_interval = pd.DataFrame()
for year in range(2010,2020):
    for month in range(1,13):
            for hour in range(24):  
                    points = df.loc[(df.index.hour==hour) & (df.index.month==month) & (df.index.year==year)]
                    mean = points['Temperature'].mean()
                    historic_interval = historic_interval.append({
                            '':, ignore_index=True)

In [None]:
print('MAE: {:.2f}'.format(abs(results['Actual'] - results['Prediction']).mean()))
print('RMSE: {:.2f}'.format(((results['Actual'] - results['Prediction']) ** 2).mean() ** .5))

In [1]:
for i in range(0):
    print(i)