# LSTM prediction

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras

from keras.models import Sequential
from keras.layers import LSTM, Dense

from sklearn.metrics import mean_squared_error

## importing the data

In [None]:
df_train = pd.read_pickle(r"../input/train.pkl")
X_train = df_train.drop(["date", "count"], axis=1)
y_train = df_train["count"]
df_train.head()

In [None]:
df_test = pd.read_pickle(r"../input/test.pkl")
X_test = df_test.drop(['date', 'count'], axis=1)
y_test = df_test['count']
df_test.head()

# something with the data

In [None]:
# converting bools to integers
X_train[['Friday', 'Weekend', 'School holiday', 'National holiday']] = X_train[['Friday', 'Weekend', 'School holiday', 'National holiday']].astype(int)
X_test[['Friday', 'Weekend', 'School holiday', 'National holiday']] = X_test[['Friday', 'Weekend', 'School holiday', 'National holiday']].astype(int)

In [None]:
def create_dataset(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs), np.array(ys)

In [None]:
# the periodicity of the data
time_steps = 7

X_train, y_train = create_dataset(X_train, y_train, time_steps=time_steps)
X_test, y_test = create_dataset(X_test, y_test, time_steps=time_steps)

print(X_train.shape, y_train.shape)

## Modeling

In [None]:
model = keras.Sequential()
model.add(keras.layers.LSTM(
    units=128,
    activation='relu',
    input_shape=(X_train.shape[1], X_train.shape[2])
))
model.add(keras.layers.Dense(units=1))
model.compile(
  loss='mean_squared_error',
  optimizer=keras.optimizers.Adam(0.001)
)

In [None]:
# define model
# model = Sequential()
# model.add(LSTM(128, activation='relu', return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
# model.add(LSTM(128, activation='relu'))
# model.add(Dense(units=1))
# model.compile(optimizer=keras.optimizers.Adam(0.001), loss='mse')

### Training

In [None]:
model.fit(
    X_train, y_train,
    epochs=120,
    batch_size=16,
    validation_split=0.1,
    verbose=0,
    shuffle=False
)

In [None]:
y_pred = model.predict(X_test)
mean_squared_error(y_test, y_pred) ** 0.5

In [None]:
plt.plot(list(range(len(y_test))), y_test, label='real value')
plt.plot(list(range(len(y_test))), y_pred, label='prediction')

plt.legend()
plt.show()