In [4]:
from google.colab import drive
drive.mount('/content/drive')

import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/Stock_Market_Prediction')

from config import *

Mounted at /content/drive


In [2]:
import numpy as np
import pandas as pd
import os

def create_sequences(data, seq_length=21, feature_col='Close', target_col='Target'):
    """
    Create sequences for time series prediction with shape checking
    """
    X, y = [], []
    data_length = len(data)
    print(f"Original data length: {data_length}")

    for i in range(data_length - seq_length):
        X.append(data[feature_col].values[i:(i + seq_length)])
        y.append(data[target_col].values[i + seq_length])

    X = np.array(X)
    y = np.array(y)

    print(f"Created sequences:")
    print(f"X shape: {X.shape}")
    print(f"y shape: {y.shape}")

    return X, y

def load_and_prepare_data(processed_path,
                         train_file,
                         val_file,
                         test_file,
                         seq_length=21,
                         feature_col='Close',
                         target_col='Target',
                         train_dir='train',
                         val_dir='val',
                         test_dir='test'):
    # Load data with specified paths and filenames
    train = pd.read_csv(os.path.join(processed_path, train_dir, train_file),
                       index_col=0, parse_dates=True)
    val = pd.read_csv(os.path.join(processed_path, val_dir, val_file),
                     index_col=0, parse_dates=True)
    test = pd.read_csv(os.path.join(processed_path, test_dir, test_file),
                      index_col=0, parse_dates=True)

    # Create sequences with specified columns
    X_train, y_train = create_sequences(train, seq_length, feature_col, target_col)
    X_val, y_val = create_sequences(val, seq_length, feature_col, target_col)
    X_test, y_test = create_sequences(test, seq_length, feature_col, target_col)

    # Reshape for LSTM [samples, timesteps, features]
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

    return (X_train, y_train), (X_val, y_val), (X_test, y_test)

In [5]:
(X_train, y_train), (X_val, y_val), (X_test, y_test) = load_and_prepare_data(processed_path=PROCESSED_DATA_PATH,
    train_file='scaled_data.csv',
    val_file='scaled_data.csv',
    test_file='scaled_data.csv',
    feature_col='Scaled',  # Use Scaled column instead of Close
    target_col='Target',
)

print(f'Training set shape: {X_train.shape}')
print(f'Validation set shape: {X_val.shape}')
print(f'Test set shape: {X_test.shape}')
print("Training set last 5 rows:")
print(X_train[-5:])
print("Validation set last 5 rows:")
print(X_val[-5:])
print("Test set last 5 rows:")
print(X_test[-5:])

Original data length: 17053


KeyError: 'Scaled'