## This notebook asseses the performnace of our models on new/unseen data to make sure that good results aren't corresponding to overfitting :)

# Preprocessing Validation Data

In [5]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Load the data
data = pd.read_csv('Validation_data')

# Select features and target
features = data[['Open', 'High', 'Low', 'Close', 'Adj Close']]
target = data['Close']

# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(features)

# Create sequences of data for LSTM
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length, 3])  # Use the 'Close' price as target
    return np.array(X), np.array(y)

seq_length = 60  # Using 60 days of data to predict the next day's price
X, y = create_sequences(scaled_data, seq_length)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1, shuffle=False)

# 1. LSTM

In [6]:
# Load the model
import pickle 
with open('gru.pkl', 'rb') as file:
    LSTM_model = pickle.load(file)

print("Model loaded successfully")

Model loaded successfully


In [7]:
# Predicting on test data
predicted_prices = LSTM_model.predict(X_test)

# Inverse transform the predicted and actual values to get them in the original scale
predicted_prices = scaler.inverse_transform(np.concatenate([X_test[:, -1, :-1], predicted_prices], axis=1))[:, -1]
actual_prices = scaler.inverse_transform(np.concatenate([X_test[:, -1, :-1], y_test.reshape(-1, 1)], axis=1))[:, -1]

# Evaluate the model
mse = np.mean((predicted_prices - actual_prices) ** 2)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 2.727161200172379e-05


In [8]:
print(actual_prices)
print(predicted_prices)

[1.10864747]
[1.10342524]
