In [None]:
# This script installs all required libraries for data analysis, plotting, LLM workflows, and notebook imports.
# Note: The installation command is commented out to prevent accidental execution.
# --------------------------------------------------------------------------------

# Required Libraries:
# pandas: Data manipulation and analysis
# numpy: Numerical computations
# matplotlib: Data visualization
# yfinance: Downloading financial data from Yahoo Finance
# langchain: Building LLM-powered applications and chains
# import_ipynb: Importing Jupyter notebooks as Python modules
# scipy: Scientific computing (e.g., signal processing)
# statsmodels: Statistical modeling and time series analysis
# xgboost: Gradient boosting for machine learning
# selenium: Web scraping and browser automation
# webdriver_manager: Managing browser drivers for Selenium
# transformers: State-of-the-art NLP models
# peft: Parameter-efficient fine-tuning for transformers
# accelerate: Optimizing training and inference of models
# bitsandbytes: Efficient training of large models with 8-bit optimizers
# tensorflow: Deep learning framework
# torch: PyTorch deep learning framework
# tensorboard: Visualization tool for TensorFlow and PyTorch
# scikit-learn: Machine learning library for Python (version 1.6.1)

# Install all required libraries
#%pip install -U tensorflow pandas torch tensorboard numpy matplotlib yfinance langchain import_ipynb scipy statsmodels xgboost selenium webdriver_manager transformers peft accelerate bitsandbytes
#%pip install scikit-learn==1.6.1

In [None]:
import os
# -------------------------------------------------------------------------
#  LangChain Imports
# -------------------------------------------------------------------------
import datetime
#from langchain.chains import SequentialChain, LLMChain
#from langchain.prompts import PromptTemplate
#from langchain.llms import OpenAI  # Replace with any LLM provider
#from langchain.output_parsers import RegexParser
# -------------------------------------------------------------------------
# Other Imports
# -------------------------------------------------------------------------
import numpy as np
import pandas as pd
from sklearn.preprocessing import RobustScaler, MinMaxScaler
from datetime import datetime, timedelta
from torch.utils.data import Dataset
import statsmodels.api as sm
import torch.nn as nn
import torch.optim as optim
import torch
# -------------------------------------------------------------------------
#  Custom Imports
from modules.modules import SetTransformer, VariableSetDataset
from modules.functions import *
from transformers import AutoModelForSequenceClassification, AutoTokenizer
# Import predict_sentiment from your finbert_finetune_refactored.py
#from finbert_finetune_refactored import predict_sentiment
# -------------------------------------------------------------------------
#  Web Scraping Imports
# -------------------------------------------------------------------------
from bs4 import BeautifulSoup
import requests
import pandas as pd
import time
from datetime import timedelta
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
# -------------------------------------------------------------------------
import tensorflow_hub as hub

In [2]:
def predict_sentiment(text, model, tokenizer, device):
    """
    Predict sentiment for a given text using the trained model
    Returns: Dictionary containing prediction results including logits
    """
    try:
        # Prepare model
        model.eval()
        model = model.to(device)

        # Tokenize input
        inputs = tokenizer(
            text,
            padding=True,
            truncation=True,
            max_length=512,
            return_tensors="pt"
        ).to(device)

        # Make prediction
        with torch.no_grad():
            outputs = model(**inputs)
            logits = outputs.logits
            probs = torch.nn.functional.softmax(logits, dim=-1)
            predicted_class = torch.argmax(probs, dim=1)[0].item()

        # Map prediction to sentiment
        sentiment_map = {0: "positive", 1: "neutral", 2: "negative"}
        confidence = probs[0][predicted_class].item()

        return {
            "text": text,
            "sentiment": sentiment_map[predicted_class],
            "confidence": f"{confidence:.4f}",
            "logits": logits[0].cpu().numpy().tolist(),
            "probabilities": {
                "positive": f"{probs[0][0].item():.4f}",
                "neutral": f"{probs[0][1].item():.4f}",
                "negative": f"{probs[0][2].item():.4f}"
            }
        }

    except Exception as e:
        print(f"Error in prediction: {e}")
        return None


In [3]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer


def test_model(model_path="./finbert_best_model_merged"):
    """
    Test the trained model on sample texts and print sentiment, logits, and probabilities.
    """
    try:
        # Load model and tokenizer
        device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
        model = AutoModelForSequenceClassification.from_pretrained(model_path)
        tokenizer = AutoTokenizer.from_pretrained(model_path)

        # Sample texts for testing
        test_texts = [
            "Dec. gold climbs $9.40, or 0.7%, to settle at $1,356.90/oz",
            "gold prices rebound rs 350 on global cues, weak rupee",
            "Gold futures down at Rs 30,244 ",
            "gold, oil trade lower as jobs data weigh"
        ]

        # Make predictions
        results = []
        for text in test_texts:
            prediction = predict_sentiment(text, model, tokenizer, device)
            if prediction:
                results.append(prediction)
                print("\nText:", text)
                print("Sentiment:", prediction["sentiment"])
                print("Confidence:", prediction["confidence"])
                print("Logits:", prediction["logits"])
                print("Class Probabilities:", prediction["probabilities"])

        return results

    except Exception as e:
        print(f"Error in testing: {e}")
        return None

# Example usage:
if __name__ == "__main__":
    test_model()

Error in testing: Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' cannot start or end the name, max length is 96: './finbert_best_model_merged'.
