In [29]:
import numpy as np
import pandas as pd
import random
from datetime import datetime
import matplotlib.pyplot as plt
from pprint import pprint

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM

import warnings
warnings.filterwarnings("ignore")

In [30]:
training_data = pd.read_csv("./StationsDatasets/trainingMax.csv").drop(columns=["Unnamed: 0"])
testing_data = pd.read_csv("./StationsDatasets/testingMax.csv").drop(columns=["Unnamed: 0"])
validating_data = pd.read_csv("./StationsDatasets/validationMax.csv").drop(columns=["Unnamed: 0"])

In [31]:
cols = ["StationEnd", "Count", "Count1", "Count1week", "Count2week", "Count3week", "Count4week", "Temp", "ATemp", "Humidity", "Hour", "WeatherSituation"]

training_data = training_data[[col for col in training_data.columns if col in cols]]
testing_data = testing_data[[col for col in testing_data.columns if col in cols]]
validating_data = validating_data[[col for col in validating_data.columns if col in cols]]

In [32]:
trainX = training_data.drop(columns=["Count"])
trainY = training_data["Count"]

testX = testing_data.drop(columns=["Count"])
testY = testing_data["Count"]

valX = validating_data.drop(columns=["Count"])
valY = validating_data["Count"]

trainX = np.reshape(trainX.values, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX.values, (testX.shape[0], 1, testX.shape[1]))
valX = np.reshape(valX.values, (valX.shape[0], 1, valX.shape[1]))

In [33]:
model = Sequential()
model.add(LSTM(4, input_shape=(1,training_data.shape[1]-1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=40, batch_size=1, verbose=2)

scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(pd.concat([training_data, testing_data]))

Epoch 1/40


2023-05-06 16:10:56.445158: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-06 16:10:56.446320: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-06 16:10:56.447089: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

150406/150406 - 123s - loss: 33.8270 - 123s/epoch - 815us/step
Epoch 2/40
150406/150406 - 122s - loss: 27.7012 - 122s/epoch - 811us/step
Epoch 3/40
150406/150406 - 121s - loss: 27.9817 - 121s/epoch - 806us/step
Epoch 4/40
150406/150406 - 122s - loss: 27.9000 - 122s/epoch - 810us/step
Epoch 5/40
150406/150406 - 122s - loss: 27.1267 - 122s/epoch - 812us/step
Epoch 6/40
150406/150406 - 122s - loss: 26.9775 - 122s/epoch - 814us/step
Epoch 7/40
150406/150406 - 122s - loss: 26.8503 - 122s/epoch - 809us/step
Epoch 8/40
150406/150406 - 122s - loss: 26.8149 - 122s/epoch - 811us/step
Epoch 9/40
150406/150406 - 122s - loss: 26.6727 - 122s/epoch - 810us/step
Epoch 10/40
150406/150406 - 119s - loss: 26.5110 - 119s/epoch - 793us/step
Epoch 11/40
150406/150406 - 122s - loss: 26.4803 - 122s/epoch - 810us/step
Epoch 12/40
150406/150406 - 122s - loss: 26.4563 - 122s/epoch - 811us/step
Epoch 13/40
150406/150406 - 121s - loss: 26.3909 - 121s/epoch - 806us/step
Epoch 14/40
150406/150406 - 122s - loss: 26.4

In [55]:
trainPredict = model.predict(trainX)

trainScore = np.sqrt(mean_squared_error(trainY[:], trainPredict[:]))
print('Train Score: %.2f RMSE' % (trainScore))
results = model.evaluate(trainX, trainY, batch_size=128)
print("Train Accuracy:", results)

Train Score: 5.19 RMSE
Train Accuracy: 26.931499481201172


In [35]:
testPredict = model.predict(testX)

testScore = np.sqrt(mean_squared_error(testY[:], testPredict[:]))
print('Test Score: %.2f RMSE' % (testScore))
results = model.evaluate(testX, testY, batch_size=128)
print("Test Accuracy:", results)

Test Score: 5.22 RMSE
Test Accuracy: 27.224096298217773


In [36]:
valPredict = model.predict(valX)

valScore = np.sqrt(mean_squared_error(valY[:], valPredict[:]))
print('Validation Score: %.2f RMSE' % (valScore))
results = model.evaluate(valX, valY, batch_size=128)
print("Validation Accuracy:", results)

Validation Score: 5.12 RMSE
Validation Accuracy: 26.19596290588379


### With different cluster

In [37]:
training_data_min = pd.read_csv("./StationsDatasets/trainingMin.csv").drop(columns=["Unnamed: 0"])
testing_data_min = pd.read_csv("./StationsDatasets/testingMin.csv").drop(columns=["Unnamed: 0"])
validating_data_min = pd.read_csv("./StationsDatasets/validationMin.csv").drop(columns=["Unnamed: 0"])

In [38]:
cols = ["StationEnd", "Count", "Count1", "Count1week", "Count2week", "Count3week", "Count4week", "Temp", "ATemp", "Humidity", "Hour", "WeatherSituation"]

training_data_min = training_data_min[[col for col in training_data_min.columns if col in cols]]
testing_data_min = testing_data_min[[col for col in testing_data_min.columns if col in cols]]
validating_data_min = validating_data_min[[col for col in validating_data_min.columns if col in cols]]

In [39]:
valX = training_data_min.drop(columns=["Count"])
valY = training_data_min["Count"]

valX = np.reshape(valX.values, (valX.shape[0], 1, valX.shape[1]))

valPredict = model.predict(valX)

valScore = np.sqrt(mean_squared_error(valY[:], valPredict[:]))
print('Validation Score: %.2f RMSE' % (valScore))
results = model.evaluate(valX, valY, batch_size=128)
print("Validation Accuracy:", results)

Validation Score: 3.09 RMSE
Validation Accuracy: 9.533097267150879


In [40]:
valX = testing_data_min.drop(columns=["Count"])
valY = testing_data_min["Count"]

valX = np.reshape(valX.values, (valX.shape[0], 1, valX.shape[1]))

valPredict = model.predict(valX)

valScore = np.sqrt(mean_squared_error(valY[:], valPredict[:]))
print('Validation Score: %.2f RMSE' % (valScore))
results = model.evaluate(valX, valY, batch_size=128)
print("Validation Accuracy:", results)

Validation Score: 3.05 RMSE
Validation Accuracy: 9.307868957519531


In [41]:
valX = validating_data_min.drop(columns=["Count"])
valY = validating_data_min["Count"]

valX = np.reshape(valX.values, (valX.shape[0], 1, valX.shape[1]))

valPredict = model.predict(valX)

valScore = np.sqrt(mean_squared_error(valY[:], valPredict[:]))
print('Validation Score: %.2f RMSE' % (valScore))
results = model.evaluate(valX, valY, batch_size=128)
print("Validation Accuracy:", results)

Validation Score: 3.05 RMSE
Validation Accuracy: 9.310832023620605
