In [11]:
from google.colab import drive
drive.mount('/content/drive')

import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/Stock_Market_Prediction')

from config import *

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
import numpy as np
import pandas as pd
import os

def create_sequences(data, seq_length=14):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data['Close'].values[i:(i + seq_length)])
        y.append(data['Target'].values[i + seq_length])
    return np.array(X), np.array(y)

def load_and_prepare_data(processed_path, seq_length=14):
    # Load data
    train = pd.read_csv(os.path.join(processed_path, 'train/scaled_data.csv'), index_col=0, parse_dates=True)
    val = pd.read_csv(os.path.join(processed_path, 'val/scaled_data.csv'), index_col=0, parse_dates=True)
    test = pd.read_csv(os.path.join(processed_path, 'test/scaled_data.csv'), index_col=0, parse_dates=True)

    # Create sequences
    X_train, y_train = create_sequences(train, seq_length)
    X_val, y_val = create_sequences(val, seq_length)
    X_test, y_test = create_sequences(test, seq_length)

    # Reshape for LSTM [samples, timesteps, features]
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

    return (X_train, y_train), (X_val, y_val), (X_test, y_test)


In [10]:
(X_train, y_train), (X_val, y_val), (X_test, y_test) = load_and_prepare_data(PROCESSED_DATA_PATH)

print(f'Training set shape: {X_train.shape}')
print(f'Validation set shape: {X_val.shape}')
print(f'Test set shape: {X_test.shape}')
print("Training set last 5 rows:")
print(X_train[-5:])
print("Validation set last 5 rows:")
print(X_val[-5:])
print("Test set last 5 rows:")
print(X_test[-5:])

Training set shape: (17039, 14, 1)
Validation set shape: (3640, 14, 1)
Test set shape: (3641, 14, 1)
Training set last 5 rows:
[[[0.95472408]
  [0.94983596]
  [0.9574199 ]
  [0.96290889]
  [0.96734226]
  [0.96211306]
  [0.9676183 ]
  [0.96463028]
  [0.9671798 ]
  [0.9693722 ]
  [0.97770311]
  [0.97963562]
  [0.97594921]
  [0.97856375]]

 [[0.94983596]
  [0.9574199 ]
  [0.96290889]
  [0.96734226]
  [0.96211306]
  [0.9676183 ]
  [0.96463028]
  [0.9671798 ]
  [0.9693722 ]
  [0.97770311]
  [0.97963562]
  [0.97594921]
  [0.97856375]
  [0.98944428]]

 [[0.9574199 ]
  [0.96290889]
  [0.96734226]
  [0.96211306]
  [0.9676183 ]
  [0.96463028]
  [0.9671798 ]
  [0.9693722 ]
  [0.97770311]
  [0.97963562]
  [0.97594921]
  [0.97856375]
  [0.98944428]
  [0.99594011]]

 [[0.96290889]
  [0.96734226]
  [0.96211306]
  [0.9676183 ]
  [0.96463028]
  [0.9671798 ]
  [0.9693722 ]
  [0.97770311]
  [0.97963562]
  [0.97594921]
  [0.97856375]
  [0.98944428]
  [0.99594011]
  [1.        ]]

 [[0.96734226]
  [0.96211