In [1]:
# --- STEP 1: Install dependencies ---
!pip install transformers torch tqdm -q

In [2]:
# --- STEP 2: Imports ---
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch.nn.functional as F
from tqdm import tqdm
from google.colab import files

In [3]:
# --- STEP 3: Load FinBERT model ---
MODEL_NAME = "yiyanghkust/finbert-tone"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
model.eval()

print("âœ… FinBERT model loaded successfully!")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/533 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

âœ… FinBERT model loaded successfully!


In [5]:
# --- STEP 4: Load financial news dataset ---
news_path = "/content/financial_news_cleaned.csv"  # uploaded file
df = pd.read_csv(news_path)

print("ðŸ“„ Dataset loaded! Columns:", df.columns.tolist())
print(df.head())

# Ensure published_date is datetime
df["published_date"] = pd.to_datetime(df["published_date"])

ðŸ“„ Dataset loaded! Columns: ['URL', 'Content', 'Summary', 'Sentiment', 'published_date']
                                                 URL  \
0  http://www.financialexpress.com/economy/gst-2-...   
1  http://www.financialexpress.com/market/vas-inf...   
2  http://www.livemint.com/Money/CM8uMHgC9QPZIZT5...   
3  http://www.livemint.com/Companies/zeQXv4iWqz0j...   
4  http://www.financialexpress.com/money/personal...   

                                             Content  \
0  The year 2017 will be forever etched in Indian...   
1  Markets regulator Sebi has imposed a total pen...   
2  US and Asian equities end the year 2017 on a m...   
3  New Delhi/Bengaluru: Salil Parekh took charge ...   
4  2017 was an eventful year and full of surprise...   

                                             Summary Sentiment  \
0  the year 2017 will be forever etched in india'...   Neutral   
1  11 present and former promoters of Vas Infrast...  Negative   
2  major markets in Asia closed narro

In [6]:
# --- STEP 5: Define helper function to compute sentiment impact (-1 to 1) ---
def get_sentiment_score(text):
    if not isinstance(text, str) or text.strip() == "":
        return 0.0
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = F.softmax(outputs.logits, dim=-1)
        labels = ["negative", "neutral", "positive"]
        score = probs[0, 2].item() - probs[0, 0].item()  # (pos - neg)
    return round(score, 2)

In [7]:
# --- STEP 6: Compute sentiment score for each news content ---
tqdm.pandas(desc="Analyzing News Sentiment")
df["Impact"] = df["Content"].progress_apply(get_sentiment_score)

print("\nâœ… Sample sentiment scores:")
print(df[["Content", "Impact"]].head())

Analyzing News Sentiment: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 9434/9434 [1:01:48<00:00,  2.54it/s]


âœ… Sample sentiment scores:
                                             Content  Impact
0  The year 2017 will be forever etched in Indian...   -0.01
1  Markets regulator Sebi has imposed a total pen...    0.82
2  US and Asian equities end the year 2017 on a m...   -0.00
3  New Delhi/Bengaluru: Salil Parekh took charge ...   -0.94
4  2017 was an eventful year and full of surprise...    0.20





In [8]:
# --- STEP 7: Aggregate average impact per date ---
impact_df = df.groupby(df["published_date"].dt.date)["Impact"].mean().reset_index()
impact_df.rename(columns={"published_date": "Date"}, inplace=True)
impact_df["Impact"] = impact_df["Impact"].round(2)

print("\nâœ… Final aggregated dataset preview:")
print(impact_df.head())


âœ… Final aggregated dataset preview:
         Date  Impact
0  2018-01-01    0.27
1  2018-01-02   -0.22
2  2018-01-03   -0.50
3  2018-01-04    0.32
4  2018-01-05    0.20


In [9]:
# --- STEP 8: Save final impact dataset ---
output_path = "/content/news_daily_impact.csv"
impact_df.to_csv(output_path, index=False)
print(f"\nðŸ’¾ Saved daily impact dataset to: {output_path}")


ðŸ’¾ Saved daily impact dataset to: /content/news_daily_impact.csv


In [10]:
# --- STEP 9: Download the CSV to your system (optional) ---
files.download(output_path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>