In [None]:
from dotenv import load_dotenv
import os
import wandb

load_dotenv()

wandb_api_key = os.getenv("wand_api_key")

!wandb login $wandb_api_key
wandb.login()

run = wandb.init(project='huggingface')
artifact = run.use_artifact('finbert-augmented:latest', type="model")
artifact_dir = artifact.download()

In [6]:
import torch
from transformers import AutoModelForSequenceClassification

model_name = "ProsusAI/finbert"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)

artifact_dir = os.getenv("artifact_dir")

model_path = f"{artifact_dir}/finbert.pth"
model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))

model.eval()


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [7]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained(model_name)

text = "The stock market is performing exceptionally well today."

inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)

with torch.no_grad():
    outputs = model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)

print(predictions)


tensor([[4.8687e-04, 8.2450e-05, 9.9943e-01]])


In [8]:
import requests
import json
from datetime import datetime, timedelta


class NewsRetrievalAgent:
    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "https://newsapi.org/v2/everything?q=NSE"

    def fetch_news(self, query="IPO OR initial public offering", days=7, sources=None):
        date_from = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d')
        params = {
            "q": query,
            "from": date_from,
            "language": "en",
            "sortBy": "publishedAt",
            "apiKey": self.api_key
        }
        if sources:
            params["sources"] = sources
        
        response = requests.get(self.base_url, params=params)
        if response.status_code == 200:
            data = response.json()
            return data.get("articles", [])
        else:
            print(f"Error fetching news: {response.status_code}")
            return []

    def summarize_news(self, articles, top_n=5):
        summaries = []
        for article in articles[:top_n]:
            summaries.append({
                "title": article["title"],
                "source": article["source"]["name"],
                "published_at": article["publishedAt"],
                "url": article["url"]
            })
        return summaries

news_api_key = os.getenv("news_api_key")
news_agent = NewsRetrievalAgent(news_api_key)
news_articles = news_agent.fetch_news()
summaries = news_agent.summarize_news(news_articles)

for news in summaries:
    print(news)

{'title': 'Gensol Engineering shares hit another lower circuit level; drop for 11th day after SEBI curbs', 'source': 'BusinessLine', 'published_at': '2025-04-24T06:11:47Z', 'url': 'https://www.thehindubusinessline.com/markets/stock-markets/gensol-engineering-shares-hit-another-lower-circuit-level-drop-for-11th-day-after-sebi-curbs/article69485650.ece'}
{'title': 'Sensex falls! But these stocks are up over 15% on BSE', 'source': 'The Times of India', 'published_at': '2025-04-24T05:56:37Z', 'url': 'https://economictimes.indiatimes.com/markets/stocks/stock-watch/sensex-falls-but-these-stocks-are-up-over-15-on-bse/articleshow/120573654.cms'}
{'title': 'Stock market update: Power stocks up as market falls', 'source': 'The Times of India', 'published_at': '2025-04-24T05:51:28Z', 'url': 'https://economictimes.indiatimes.com/markets/stocks/stock-watch/stock-market-update-power-stocks-up-as-market-falls/articleshow/120573535.cms'}
{'title': 'Gensol Engineering rejects reports linking promoters 

In [13]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

model_name = "ProsusAI/finbert"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)

model_path = f"{artifact_dir}/finbert.pth"
model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))

def analyze_sentiment(news_articles):
    results = []
    for article in news_articles:
        text = article["title"] + " " + article.get("description", "")
        inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
        
        with torch.no_grad():
            outputs = model(**inputs)
        
        logits = outputs.logits
        sentiment = torch.softmax(logits, dim=1).tolist()[0]
        article["sentiment"] = {
            "negative": sentiment[0],
            "neutral": sentiment[1],
            "positive": sentiment[2]
        }
        results.append(article)

    return results

#news_articles = news_agent.summarize_news(news_articles)
news_with_sentiment = analyze_sentiment(summaries)

for news in news_with_sentiment:
    print(news["title"], news["sentiment"])


Gensol Engineering shares hit another lower circuit level; drop for 11th day after SEBI curbs {'negative': 0.9990509152412415, 'neutral': 0.0007546000415459275, 'positive': 0.00019442831398919225}
Sensex falls! But these stocks are up over 15% on BSE {'negative': 0.00047348366933874786, 'neutral': 0.02258582040667534, 'positive': 0.9769407510757446}
Stock market update: Power stocks up as market falls {'negative': 0.0008851760649122298, 'neutral': 0.9972459077835083, 'positive': 0.0018689731368795037}
Gensol Engineering rejects reports linking promoters to ED probe in Mahadev betting app Case {'negative': 0.9792022109031677, 'neutral': 0.020305577665567398, 'positive': 0.0004921953659504652}
Stock market update: Stocks that hit 52-week highs on NSE {'negative': 0.0017717990558594465, 'neutral': 0.4831587076187134, 'positive': 0.5150695443153381}


In [11]:
import requests
import csv
import os
from datetime import datetime
from transformers import BertTokenizer, AutoModelForSequenceClassification
import torch

model_name = "ProsusAI/finbert"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)

artifact_dir = os.getenv("artifact_dir")
news_api_key = os.getenv("news_api_key")
model_path = f"{artifact_dir}/finbert.pth"
model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))

NEWS_URL = "https://newsapi.org/v2/everything"
QUERY = 'NSE OR "National Stock Exchange of India"'
LANGUAGE = "en"

def fetch_nse_news():
    params = {
        "q": QUERY,
        "language": LANGUAGE,
        "apiKey": news_api_key
    }
    response = requests.get(NEWS_URL, params=params)
    if response.status_code == 200:
        return response.json().get("articles", [])
    else:
        print(f"Error fetching news: {response.status_code}")
        return []

def analyze_sentiment(news_articles):
    results = []
    for article in news_articles:
        text = article["title"] + " " + article.get("description", "")
        inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
        
        with torch.no_grad():
            outputs = model(**inputs)

        logits = outputs.logits
        sentiment = torch.softmax(logits, dim=1)[0].tolist()

        total=sum(sentiment)
        if total>0:
            sentiment=[x/total for x in sentiment]

        sentiment = [round(x,4) for x in sentiment]
        
        article["sentiment"] = {
            "negative": sentiment[0],
            "neutral": sentiment[1],
            "positive": sentiment[2]
        }
        results.append(article)

    return results

def save_to_csv(news_with_sentiment, filename="nse_news_sentiment.csv"):
    file_exists = os.path.exists(filename)
    with open(filename, mode="a", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        
        if not file_exists:
            writer.writerow(["Date", "Title", "Source", "URL", "Positive", "Neutral", "Negative"])

        for article in news_with_sentiment:
            writer.writerow([
                datetime.now().strftime("%Y-%m-%d"),
                article["title"],
                article["source"]["name"],
                article["url"],
                article["sentiment"]["positive"],
                article["sentiment"]["neutral"],
                article["sentiment"]["negative"]
            ])
    print(f"Saved {len(news_with_sentiment)} articles to {filename}")

if __name__ == "__main__":
    news_articles = fetch_nse_news()
    if news_articles:
        news_with_sentiment = analyze_sentiment(news_articles)
        save_to_csv(news_with_sentiment)
    else:
        print("No news articles retrieved.")


Saved 100 articles to nse_news_sentiment.csv


In [12]:
import requests
import csv
import os
from datetime import datetime
from transformers import BertTokenizer, BertForSequenceClassification
import torch

model_name = "ProsusAI/finbert"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)

model_path = f"{artifact_dir}/finbert.pth"
model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))

NEWS_URL = "https://newsapi.org/v2/everything"
QUERY = 'BSE OR "Bombay Stock Exchange of India"'
LANGUAGE = "en"

def fetch_nse_news():
    params = {
        "q": QUERY,
        "language": LANGUAGE,
        "apiKey": news_api_key
    }
    response = requests.get(NEWS_URL, params=params)
    if response.status_code == 200:
        return response.json().get("articles", [])
    else:
        print(f"Error fetching news: {response.status_code}")
        return []

def analyze_sentiment(news_articles):
    results = []
    for article in news_articles:
        text = article["title"] + " " + article.get("description", "")
        inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
        
        with torch.no_grad():
            outputs = model(**inputs)

        logits = outputs.logits
        sentiment = torch.softmax(logits, dim=1)[0].tolist()

        total=sum(sentiment)
        if total>0:
            sentiment=[x/total for x in sentiment]

        sentiment = [round(x,4) for x in sentiment]
        
        article["sentiment"] = {
            "negative": sentiment[0],
            "neutral": sentiment[1],
            "positive": sentiment[2]
        }
        results.append(article)

    return results

def save_to_csv(news_with_sentiment, filename="bse_news_sentiment.csv"):
    file_exists = os.path.exists(filename)
    with open(filename, mode="a", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        
        if not file_exists:
            writer.writerow(["Date", "Title", "Source", "URL", "Positive", "Neutral", "Negative"])

        for article in news_with_sentiment:
            writer.writerow([
                datetime.now().strftime("%Y-%m-%d"),
                article["title"],
                article["source"]["name"],
                article["url"],
                article["sentiment"]["positive"],
                article["sentiment"]["neutral"],
                article["sentiment"]["negative"]
            ])
    print(f"Saved {len(news_with_sentiment)} articles to {filename}")

if __name__ == "__main__":
    news_articles = fetch_nse_news()
    if news_articles:
        news_with_sentiment = analyze_sentiment(news_articles)
        save_to_csv(news_with_sentiment)
    else:
        print("No news articles retrieved.")


Saved 100 articles to bse_news_sentiment.csv
