In [1]:
# This script installs all required libraries for data analysis, plotting, LLM workflows, and notebook imports.
# Note: The installation command is commented out to prevent accidental execution.
# --------------------------------------------------------------------------------

# Required Libraries:
# pandas: Data manipulation and analysis
# numpy: Numerical computations
# matplotlib: Data visualization
# yfinance: Downloading financial data from Yahoo Finance
# langchain: Building LLM-powered applications and chains
# import_ipynb: Importing Jupyter notebooks as Python modules
# scipy: Scientific computing (e.g., signal processing)
# statsmodels: Statistical modeling and time series analysis
# xgboost: Gradient boosting for machine learning
# selenium: Web scraping and browser automation
# webdriver_manager: Managing browser drivers for Selenium
# transformers: State-of-the-art NLP models
# peft: Parameter-efficient fine-tuning for transformers
# accelerate: Optimizing training and inference of models
# bitsandbytes: Efficient training of large models with 8-bit optimizers
# tensorflow: Deep learning framework
# torch: PyTorch deep learning framework
# tensorboard: Visualization tool for TensorFlow and PyTorch
# scikit-learn: Machine learning library for Python (version 1.6.1)

# Install all required libraries
#%pip install -U tensorflow pandas torch tensorboard numpy matplotlib yfinance langchain import_ipynb scipy statsmodels xgboost selenium webdriver_manager transformers peft accelerate bitsandbytes
#%pip install scikit-learn==1.6.1

In [19]:
%pip install gradio

Collecting gradioNote: you may need to restart the kernel to use updated packages.

  Downloading gradio-5.34.2-py3-none-any.whl (54.3 MB)
Collecting tomlkit<0.14.0,>=0.12.0
  Downloading tomlkit-0.13.3-py3-none-any.whl (38 kB)
Collecting safehttpx<0.2.0,>=0.1.6
  Downloading safehttpx-0.1.6-py3-none-any.whl (8.7 kB)
Collecting uvicorn>=0.14.0
  Downloading uvicorn-0.34.3-py3-none-any.whl (62 kB)
Collecting aiofiles<25.0,>=22.0
  Downloading aiofiles-24.1.0-py3-none-any.whl (15 kB)
Collecting semantic-version~=2.0
  Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)
Collecting typer<1.0,>=0.12
  Downloading typer-0.16.0-py3-none-any.whl (46 kB)
Collecting ffmpy
  Downloading ffmpy-0.6.0-py3-none-any.whl (5.5 kB)
Collecting fastapi<1.0,>=0.115.2
  Downloading fastapi-0.115.13-py3-none-any.whl (95 kB)
Collecting starlette<1.0,>=0.40.0
  Downloading starlette-0.47.1-py3-none-any.whl (72 kB)
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Collecting g

You should consider upgrading via the 'c:\Users\Tarun Vinjamuru\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [2]:
import os
# -------------------------------------------------------------------------
#  LangChain Imports
# -------------------------------------------------------------------------
from langchain.chains import SequentialChain, LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import SequentialChain, TransformChain
# -------------------------------------------------------------------------
# Other Imports
# -------------------------------------------------------------------------
import numpy as np
import pandas as pd
from sklearn.preprocessing import RobustScaler, MinMaxScaler
from datetime import datetime, timedelta
import statsmodels.api as sm
import torch
# -------------------------------------------------------------------------
#  Custom Imports
from modules.modules import SetTransformer, LSTMModel, VariableSetDataset
from modules.functions import *
from modules.model_run_functions import *




  from .autonotebook import tqdm as notebook_tqdm


In [3]:
WORKAREA = os.getenv("WORKAREA", "D:/CAREER/IISC_B/Academics/Courses\SEM_3\DA_225o\Project\DL-7-25\Final")
# =========================================================================
# Get today's date and the next day in YYYY-MM-DD format
# =========================================================================
today = datetime.now().strftime("%Y-%m-%d")
next_day = (datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d")
print(f"Today's date: {today}")
print(f"Next day's date: {next_day}")   

Today's date: 2025-06-23
Next day's date: 2025-06-24


In [4]:
start = datetime(2010, 1, 1)
end = datetime(2026, 1, 1)

# Download gold prices
gold_prices_csv = os.path.join(WORKAREA, "Tarun/data/GOLDBEES_ETF_price_data_technical_indicators_sentiment.csv")
if os.path.exists(gold_prices_csv):
    gold = pd.read_csv(gold_prices_csv, parse_dates=['Date'], index_col='Date')
else:
    gold = generate_sentiment_from_trend_with_labels(add_technical_indicators(download_gold_prices(start, end)))

current_price = gold['Close'].iloc[-1]
print(f"Current Gold Price: {current_price}")

# Prepare input dictionary (ensure these variables are defined in your notebook)
# --------------------------------------------------------------------
# Load pre-trained models
# --------------------------------------------------------------------
# # Get device for PyTorch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

#lstw_model           = LSTMModel(input_size=11).to(device)
#lstw_model.load_state_dict(torch.load(f'{WORKAREA}/Tarun/Model/LSTM/lstm_{today}.pt', map_location=device))
#arimax_model         = sm.load_pickle(f'{WORKAREA}/Tarun/Model/Arimax/arimax_{today}.pkl')
#random_forest_model  = sm.load_pickle(f'{WORKAREA}/Tarun/Model/RandomForest/random_forest_{today}.pkl')
#xgboost_model        = sm.load_pickle(f'{WORKAREA}/Tarun/Model/XGBoost/xgboost_{today}.pkl')
lstw_model           = f'{WORKAREA}/Tarun/Model/LSTM/'
arimax_model         = f'{WORKAREA}/Tarun/Model/Arimax/'
random_forest_model  = f'{WORKAREA}/Tarun/Model/RandomForest/'
xgboost_model        = f'{WORKAREA}/Tarun/Model/XGBoost/'
news_model_path      = f'{WORKAREA}/Tarun/Model/final_model.pth'
ensemble_model       = sm.load_pickle(f'{WORKAREA}/Tarun/Model/Final_Ensemble/ensemble_model_{today}.pkl')

Current Gold Price: 82.2300033569336
Using device: cpu


In [5]:
## TODO [Yaswanth] : Replace this with today's news articles scraping.

## TODO [Tejashwini] : cleaning script for scraped news data.


In [6]:
## TODO [Mohan]: Integrate sentiment extraction model here
# UI Based inputs + 



In [12]:
## TODO [Adithya] : Insert Topic extraction model here.


# 2. Define a function to generate dummy input (replace with real input later)
def generate_dummy_news_input(device):
    encodings = torch.tensor(np.random.rand(1, 10, 512).astype(np.float32), dtype=torch.float32).to(device)
    mask = torch.tensor(np.ones((1, 10)).astype(np.float32), dtype=torch.float32).to(device)
    return encodings, mask

In [15]:
# 1. Define a function to load the SetTransformer model and weights
def load_news_llm_model( device, model_path):
    news_model = SetTransformer(
        dim_input=512,
        num_outputs=1,
        dim_output=1,
        num_inds=32,
        dim_hidden=128,
        num_heads=4,
        ln=True
    ).to(device)
    if os.path.exists(model_path):
        news_model, _ = load_checkpoint(model_path, news_model, device)
    return news_model

# 3. Define the transform function for the TransformChain
def news_llm_transform(inputs):
    news_model = load_news_llm_model(inputs["device"],inputs["model_path"])
    encodings, mask = generate_dummy_news_input(inputs["device"])
    with torch.no_grad():
        pred = news_model(encodings, mask=mask)
        if hasattr(pred, "item"):
            pred = pred.item()
    predicted_price = inputs["current_price"] * (1 + pred)
    return predicted_price

In [18]:
# --------------------------------------------------------------------
# Define prompt templates for each model prediction step
# Define TransformChains for each model prediction step using the existing functions
arimax_chain = TransformChain(
    input_variables=["df", "arimax_model"],
    output_variables=["predicted_price_arimax"],
    transform=lambda inputs: {
        "predicted_price_arimax": predict_next_day_gold_price_arimax(inputs["df"], inputs["arimax_model"])
    }
)

rf_chain = TransformChain(
    input_variables=["df", "random_forest_model"],
    output_variables=["predicted_price_rf"],
    transform=lambda inputs: {
        "predicted_price_rf": predict_next_day_gold_price_rf(inputs["df"], inputs["random_forest_model"])
    }
)

xgb_chain = TransformChain(
    input_variables=["df", "xgboost_model"],
    output_variables=["predicted_price_xgb"],
    transform=lambda inputs: {
        "predicted_price_xgb": predict_next_day_gold_price_xgboost(inputs["df"], inputs["xgboost_model"])
    }
)

lstm_chain = TransformChain(
    input_variables=["df", "lstw_model"],
    output_variables=["predicted_price_lstw"],
    transform=lambda inputs: {
        "predicted_price_lstw": predict_next_day_gold_price_lstm(inputs["df"], inputs["lstw_model"])
    }
)

news_llm_chain = TransformChain(
    input_variables=["current_price", "device", "model_path"],
    output_variables=["predicted_price_news_llm"],
    transform=news_llm_transform
)

# Define the ensemble prediction as a TransformChain
ensemble_chain = TransformChain(
    input_variables=[
        "ensemble_model",
        "predicted_price_arimax",
        "predicted_price_xgb",
        "predicted_price_rf",
        "predicted_price_lstw",
        "predicted_price_news_llm"
    ],
    output_variables=["ensemble_results"],
    transform=lambda inputs: {
        "ensemble_results": predict_next_day_gold_price_ensemble(
            inputs["ensemble_model"],
            inputs["predicted_price_arimax"],
            inputs["predicted_price_xgb"],
            inputs["predicted_price_rf"],
            inputs["predicted_price_lstw"],
            inputs["predicted_price_news_llm"]
        )
    }
)

# Orchestrate the workflow with SequentialChain



# General inputs for the sequence
general_inputs = {
    "df": gold,
    "current_price": current_price,
    "device": device
}

# Prepare input dictionary for the time series models
ts_inputs = {
    "arimax_model": arimax_model,
    "random_forest_model": random_forest_model,
    "xgboost_model": xgboost_model,
    "lstw_model": lstw_model
}

# Prepare input dictionary for the news LLM chain
news_llm_inputs = {
    "model_path": news_model_path
}

# ensemble inputs
emsemble_inputs = {
    "ensemble_model": ensemble_model,
}

full_inputs = {
    **general_inputs,
    **ts_inputs,
    **news_llm_inputs,
    **emsemble_inputs
}

# Compose the full sequence
full_seq_chain = SequentialChain(
    chains=[arimax_chain, rf_chain, xgb_chain, lstm_chain, news_llm_chain, ensemble_chain],
    input_variables=[
        "current_price", "device", "df",
        "model_path", "arimax_model", "random_forest_model", "xgboost_model", "lstw_model",
        "ensemble_model", 
    ],
    output_variables=[
        "predicted_price_arimax", "predicted_price_rf", "predicted_price_xgb", "predicted_price_lstw",
        "predicted_price_news_llm", 
        "ensemble_results"
    ]
)

# Run the orchestrated sequence
results = full_seq_chain(full_inputs)
ensemble_results = results["ensemble_results"]

print("---------------------------------------------------")
print(f"Current Gold Price: {current_price}")
print("---------------------------------------------------")
print(f"Predictions for next day: {next_day}")
print("---------------------------------------------------")
print(f"ARIMAX: Predicted gold price: {results['predicted_price_arimax']}")
print(f"Random Forest: Predicted gold price: {results['predicted_price_rf']}")
print(f"XGBoost: Predicted gold price: {results['predicted_price_xgb']}")
print(f"LSTM: Predicted gold price: {results['predicted_price_lstw']}")
print(f"News LLM: Predicted gold price: {results['predicted_price_news_llm']}")
print("---------------------------------------------------")
print("Ensemble Model Results:")
print("---------------------------------------------------")
print(f"Predicted Price: {ensemble_results['predictions']['meta_ensemble']}")
print(f"Percentage Change: {ensemble_results['percentage_changes']['meta_ensemble']:.2f}%")

Loading existing ARIMAX model for today: D:/CAREER/IISC_B/Academics/Courses\SEM_3\DA_225o\Project\DL-7-25\Final/Tarun/Model/Arimax/arimax_2025-06-23.pkl
Loading existing Random Forest model for today: D:/CAREER/IISC_B/Academics/Courses\SEM_3\DA_225o\Project\DL-7-25\Final/Tarun/Model/RandomForest/random_forest_2025-06-23.pkl
Loading XGBoost model from D:/CAREER/IISC_B/Academics/Courses\SEM_3\DA_225o\Project\DL-7-25\Final/Tarun/Model/XGBoost/xgboost_2025-06-23.pkl
Loading LSTM model from D:/CAREER/IISC_B/Academics/Courses\SEM_3\DA_225o\Project\DL-7-25\Final/Tarun/Model/LSTM/lstm_2025-06-23.pt


TypeError: 'int' object is not callable