####  1. Select Stock Ticker

In [1]:
import warnings
warnings.filterwarnings("ignore")

# Set the ticker symbol
ticker = "INFY.NS"

#### 2. Select appropriate device

In [2]:
import torch

# Select appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


#### 3. Run the CUDA check

In [3]:
from Config.check_cuda_config import check_cuda_configuration

# Run the CUDA check
check_cuda_configuration()

CUDA is available with 1 device(s).
Current device: 0 - NVIDIA GeForce RTX 3050 Laptop GPU
CUDA version: 12.1
Total GPU memory     : 4.29444301 GB
Currently allocated  : 0.00000000 GB
Currently reserved   : 0.00000000 GB


True

#### 4. Fetch stock data

In [4]:
import os

from DataPipeline.data_fetcher import fetch_daily_data_ist

# Create the path if it doesn't exist
raw_data_dir = "Data/RawData"

# Fetch stock data
data = fetch_daily_data_ist(ticker)

# Construct filename and full path
filename = f"{ticker[:-3]}.csv"
filepath = os.path.join(raw_data_dir, filename)

# Save to CSV
data.to_csv(filepath)
print(f"Fetched raw data saved to: {filepath}")
data.head()

Fetched raw data saved to: Data/RawData\INFY.csv


Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits
2010-01-04,232.496428,234.27801,231.18697,232.728043,4069264,0.0,0.0
2010-01-05,233.387211,234.340353,231.614546,233.507477,6895528,0.0,0.0
2010-01-06,238.999244,238.999244,229.387617,230.10025,6817288,0.0,0.0
2010-01-07,230.411991,230.411991,224.318968,224.929169,10892600,0.0,0.0
2010-01-08,224.390205,224.773249,218.956381,219.508667,12649312,0.0,0.0


#### 5. Calculate Technical Indicators

In [5]:
from DataPipeline.technical_indicators import TechnicalIndicators

# Calculate technical indicators
indicators = TechnicalIndicators(data)
indicators_data = indicators.calculate_all()

#### 6. Data Splitting

In [6]:
print(f"\nProcessed data shape after indicators: {indicators_data.shape}")

# Split data into train and validation (85%) and test (15%)
total_size = len(indicators_data)
train_size = int(0.85 * total_size)

train_data = indicators_data[:train_size].copy()
test_data = indicators_data[train_size:].copy()

print(f"\nData split sizes:")
print(f"\nTrain: {len(train_data)}, Test: {len(test_data)}")


Processed data shape after indicators: (3802, 81)

Data split sizes:

Train: 3231, Test: 571


#### 7. Dropping NaN Columns and Rows

In [7]:
from DataPipeline.data_cleaning import drop_all_null_columns

# Drop all-null columns
train_data_ready, dropped_columns = drop_all_null_columns(train_data)
test_data_ready, _ = drop_all_null_columns(test_data)

print(f"\nDropped columns due to nulls: {dropped_columns}")
print(f"Final shapes - Train: {train_data_ready.shape}, Test: {test_data_ready.shape}")

No columns with all null values found.
No columns with all null values found.

Dropped columns due to nulls: []
Final shapes - Train: (3204, 81), Test: (570, 81)


#### 8. Data Preparation

In [8]:
from Optimization.prepare_data_for_hpo import prepare_data_for_hpo

# Prepare data for LSTM (only train loader, no val)
data, train_loader, val_loader, X_scaler, y_scaler, feature_names, num_features = prepare_data_for_hpo(
    Modelling_data=train_data_ready,
    model_type="LSTM",
    batch_size=64,
    seq_length=30
)

#### 9. Hyper Parameter Optimization

In [10]:
from Optimization.hyperparameter_optimization import run_hyperparameter_optimization
data_file = train_data_ready  # Replace with your actual data file
model_type = "lstm"
n_trials = 2
output_dir = "Results/HPO"

# Print welcome message
print(f"Starting Bayesian HPO for {model_type.upper()} model with {n_trials} trials")
print(f"Data file: {data_file}")
print(f"Output directory: {output_dir}")

# Run hyperparameter optimization
best_params, study = run_hyperparameter_optimization(
    data_file, 
    output_dir=output_dir, 
    n_trials=n_trials, 
    model_type=model_type
)

print("\nOptimization complete!")
print(f"Best validation RMSE: {study.best_value:.8f}")
print(f"Best parameters: {best_params}")



[I 2025-05-31 03:44:43,411] A new study created in memory with name: lstm_multifidelity_hpo_20250531_034443


Starting Bayesian HPO for LSTM model with 2 trials
Data file:                    Open         High          Low        Close    Volume  \
2010-02-04   219.579937   220.025332   214.903285   216.377548   6176408   
2010-02-05   212.934601   213.651692   207.465155   209.576324  13544872   
2010-02-08   211.553872   217.531086   208.845877   214.110458   9252480   
2010-02-09   214.146092   221.094259   213.277571   220.363815  10387392   
2010-02-10   221.896065   222.359272   218.065663   219.753708   7187984   
...                 ...          ...          ...          ...       ...   
2023-01-25  1461.103096  1469.603373  1453.311176  1457.325195   4158617   
2023-01-27  1460.158697  1464.833804  1423.701977  1434.799561   6209955   
2023-01-30  1443.913768  1458.269746  1435.602340  1453.358521   6964719   
2023-01-31  1462.897567  1462.897567  1428.990886  1448.588745   9448126   
2023-02-01  1456.569612  1468.706210  1443.205380  1464.975464   6194199   

            Dividends  St

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 0/20 - Train Loss: 0.73248730, Val Loss: 5.30984044
Epoch 10/20 - Train Loss: 0.67191773, Val Loss: 5.36524117
Early stopping at epoch 10

Model Evaluation Metrics:
MSE                 : 29.770412
RMSE                : 5.456227
MAE                 : 5.420729
R2                  : -75.384056
MAPE                : 100.600278
EXPLAINED_VARIANCE  : 0.009341
MAX_ERROR           : 6.894820
[I 2025-05-31 03:44:47,899] Trial 0 finished with value: 5.456226942225586 and parameters: {'hidden_size': 403, 'num_layers': 3, 'dropout': 0.36599697090570255, 'learning_rate': 0.0006251373574521745, 'weight_decay': 2.9380279387035354e-06, 'batch_norm': True, 'cell_dropout': 0.4330880728874676, 'optimizer': 'sgd', 'lr_scheduler': 'none', 'loss_function': 'mae'}. Best is trial 0 with value: 5.456226942225586.
Epoch 0/178 - Train Loss: 0.73487945, Val Loss: 5.35169423
Epoch 10/178 - Train Loss: 0.72684696, Val Loss: 5.38345969
Epoch 20/178 - Train Loss: 0.72453008, Val Loss: 5.39666748
Epoch 30/178 - 

#### 10. Test Set Predictions

In [None]:
# ====================================
# Imports
# ====================================
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset

# Custom modules
from Models.evaluate_model import evaluate_model
from Optimization.select_features_for_model import select_features_for_model
from PlotScripts.get_time_series_comparison_plot import time_series_comparison_plot
from PlotScripts.get_scatter_plot import scatter_plot


# ====================================
# Step 1: Data Preparation
# ====================================
df = test_data_ready.copy()  # Work on a copy to preserve original
valid_features = select_features_for_model(df, "lstm")  # Select features suitable for LSTM
seq_length = 30  # Length of input sequences


# ====================================
# Step 2: Feature & Target Scaling
# ====================================
X_all = X_scaler.transform(df[valid_features].values)   # Feature scaling
y_all = df["Close"].values.reshape(-1, 1)               # Target variable
y_all_scaled = y_scaler.transform(y_all)                # Target scaling


# ====================================
# Step 3: Create Sliding Windows (Sequence Generation)
# ====================================
X_windows = []  # Sequences of input features
y_targets = []  # Corresponding next-day targets

for end_ix in range(seq_length, len(X_all)):
    start_ix = end_ix - seq_length
    X_windows.append(X_all[start_ix:end_ix])
    y_targets.append(y_all_scaled[end_ix])

X_tensor = torch.tensor(X_windows, dtype=torch.float32)
y_tensor = torch.tensor(y_targets, dtype=torch.float32)


# ====================================
# Step 4: DataLoader Creation
# ====================================
dataset = TensorDataset(X_tensor, y_tensor)
loader = DataLoader(dataset, batch_size=len(dataset), shuffle=False)


# ====================================
# Step 5: Model Evaluation
# ====================================
metrics = evaluate_model(Trained_Model, data_loader=loader)

# ====================================
# Step 6: Model Inference for Plotting
# ====================================
Trained_Model.eval()  # Set model to evaluation mode
X_tensor = X_tensor.to(device)

with torch.no_grad():
    y_pred_scaled = Trained_Model(X_tensor).view(-1, 1).cpu().numpy()

# Inverse scale to get actual price values
y_pred = y_scaler.inverse_transform(y_pred_scaled).flatten()
y_true = y_all[seq_length:].flatten()
dates = df.index[seq_length:]


# ====================================
# Step 7: Visualization
# ====================================
time_series_comparison_plot(
    targets_original=y_true,
    predictions_original=y_pred,
    model_type="LSTM",
    output_dir="Results",
    phase="Testing"
)
scatter_plot(
    targets_original=y_true,
    predictions_original=y_pred,
    model_type="LSTM",
    output_dir="Results",
    phase="Testing"
)


# ====================================
# Step 8: Store and Print Results
# ====================================
results = {
    f"{ticker}_predictions": y_pred,
    f"{ticker}_actuals": y_true,
    **{f"{ticker}_{k}": v for k, v in metrics.items()},
}


####  11. Generate Forecast 

In [66]:
# Import the forecasting function from the utility module
from Utils.get_forecast import generate_forecast

# Create a copy of the test data to avoid modifying the original dataset
forecast_data = test_data_ready.copy()

# Generate forecast using the trained LSTM model
generate_forecast(
    forecast_data=forecast_data,  # Input data prepared for forecasting
    Trained_Model=Trained_Model,  # The trained model used for prediction
    X_scaler=X_scaler,  # Scaler used to normalize input features
    y_scaler=y_scaler,  # Scaler used to denormalize output predictions
    seq_length=30,  # Number of time steps to look back for each prediction
    ticker=ticker,  # Ticker symbol for the stock (or identifier for the time series)
    model="lstm",  # Type of model to be used ("lstm" in this case)
)

Predicted Close Price of INFY.NS for 2025-06-02: 1570.0154
