In [15]:
from datetime import datetime

In [17]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
from tqdm import tqdm  # For progress bar

In [19]:
df=pd.read_csv("Reliance_GoogleNews_Monthly_2020_2024.csv") # Loaded the saved dataset

In [21]:
# Converting 'Published Date' to YYYY/MM/DD format
df["Published Date"] = df["Published Date"].apply(lambda x: datetime.strptime(x, "%a, %d %b %Y %H:%M:%S GMT").strftime("%Y/%m/%d"))

In [23]:
df.head()

Unnamed: 0,Headline,Published Date
0,Reliance Industries may get a non-Ambani MD fo...,2020/01/13
1,Reliance Industries posts record Q3 profit at ...,2020/01/17
2,Reliance refers to start-up playbook to grow J...,2020/01/07
3,RIL lays out road with plastic waste - The Hindu,2020/01/30
4,Reliance to enter restaurant business with Arm...,2020/01/31


In [25]:
# Droping multiple articles on a single day randomly
df = df.sample(frac=1, random_state=42)  # Shuffle for randomness; set seed for reproducibility
df.drop_duplicates(subset=["Published_Date"], inplace=True)
df.reset_index(drop=True, inplace=True)

print(f"✅ Randomly selected one article per day. Remaining: {len(df)}")
df.head()

KeyError: Index(['Published_Date'], dtype='object')

In [22]:
MODEL_NAME = "ProsusAI/finbert"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

In [24]:
def get_sentiment_score(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)

    scores = softmax(outputs.logits.numpy()[0])  # Convert logits to probabilities
    labels = ["negative", "neutral", "positive"]
    
    sentiment = labels[scores.argmax()]  # Get the sentiment label
    sentiment_score = scores.max()  # Get the highest confidence score
    
    return sentiment, sentiment_score

In [26]:
tqdm.pandas()  # Enable progress bar for DataFrame
df[["Sentiment", "Sentiment Score"]] = df["Headline"].progress_apply(lambda x: pd.Series(get_sentiment_score(x)))

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:26<00:00,  3.83it/s]


In [28]:
df.head()

Unnamed: 0,Headline,Published Date,Sentiment,Sentiment Score
0,Reliance Retail -- next big thing for Reliance...,2018/10/23,positive,0.870251
1,RIL aims to cut debt in 2018 - The Hindu,2018/01/26,negative,0.729574
2,Reliance Industries pips TCS by market capital...,2018/07/31,positive,0.843151
3,Mukesh Ambani gets additional 5 years as Relia...,2018/07/08,positive,0.802535
4,Mukesh Ambani's Reliance Industries to ride on...,2018/07/06,negative,0.638397


In [32]:
df[df["Sentiment"]=="positive"]["Sentiment Score"].mean()

0.8520471

In [34]:
df[df["Sentiment"]=="negative"]["Sentiment Score"].mean()

0.7557526