In [1]:
from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
import pandas as pd
from transformers import pipeline
from datetime import datetime, timedelta
import re 
import torch

In [2]:
torch.__version__

'2.6.0+cu126'

In [3]:
tickertape_url = "https://www.tickertape.in/stocks/"
tickers = ['PNBK','MRF','RAIV','NTPC','INFY','RELI','TCS','HDBK','BRTI']
news_list = []
pipe = pipeline("text-classification", model="ProsusAI/finbert")

Device set to use cuda:0


In [4]:
def convert_relative_date(relative_date):
    today = datetime.today()

    match = re.match(
        r'(\d+) (minute|hour|day|week|month|year)s? ago', relative_date.lower())

    if match:
        num = int(match.group(1))
        unit = match.group(2)

        if unit == "minute":
            return today - timedelta(minutes=num)
        elif unit == "hour":
            return today - timedelta(hours=num)
        elif unit == "day":
            return today - timedelta(days=num)
        elif unit == "week":
            return today - timedelta(weeks=num)
        elif unit == "month":
            return today - timedelta(days=num * 30)
        elif unit == "year":
            # Approximate 1 year = 365 days
            return today - timedelta(days=num * 365)

    return today

In [None]:
for ticker in tickers:
    url = tickertape_url + ticker
    req = Request(url=url, headers={'user-agent': 'my-app'})
    response = urlopen(req)
    html = BeautifulSoup(response, 'html.parser')

    news_tab = html.find(id='news-tab')
    if news_tab:
        headlines = news_tab.find_all(
            'span', class_='jsx-1669302504 text-primary headline mb8 heading')
        dates = news_tab.find_all(
            'span', class_='jsx-1669302504 text-grey caption-style all-uppercase')

        for headline, date in zip(headlines, dates):
            news_list.append([
                ticker,
                headline.text.strip(),
                convert_relative_date(date.text.strip()).strftime(
                    '%Y-%m-%d %H:%M:%S')
            ])

In [None]:
df = pd.DataFrame(news_list, columns=['Ticker', 'Title', 'Date_RAW'])

In [None]:
df['Date'] = pd.to_datetime(df["Date_RAW"]).dt.date
df = df.drop(columns='Date_RAW')

In [None]:
df.head()

Unnamed: 0,Ticker,Title,Date
0,PNBK,Punjab National Bank consolidated net profit r...,2025-01-31
1,PNBK,Punjab National Bank rallies as Q3 PAT doubles...,2025-01-31
2,PNBK,Punjab National Bank to convene board meeting,2025-01-31
3,PNBK,PNB rises as domestic deposits climb 14% YoY i...,2025-01-15
4,PNBK,Punjab National Bank announces cessation of MD...,2025-01-15


In [None]:
df.tail()

Unnamed: 0,Ticker,Title,Date
85,BRTI,Bharti Airtel announces board meeting date,2025-01-24
86,BRTI,Bharti Airtel to divest its entire 50% stake i...,2025-01-15
87,BRTI,Airtel enhances network coverage in Prayagraj ...,2025-01-15
88,BRTI,Bharti Airtel allots 4.61 lakh equity shares o...,2025-01-15
89,BRTI,Bharti Airtel to acquire 26% stake in AMP Ener...,2025-01-15


In [None]:
text = "The Punjab National bank recorded loss of 2400Cr in the last fiscal year"
pipe(text)

[{'label': 'negative', 'score': 0.9625182151794434}]

In [None]:
pipe(text)[0]['score']

0.9625182151794434

In [None]:
def f(Title): return pd.Series([pipe(Title)[0]['label'], pipe(Title)[0]['score']])


df[['label', 'score']] = df['Title'].apply(f)

In [None]:
df

Unnamed: 0,Ticker,Title,Date,label,score
0,PNBK,Punjab National Bank consolidated net profit r...,2025-01-31,positive,0.932558
1,PNBK,Punjab National Bank rallies as Q3 PAT doubles...,2025-01-31,positive,0.808500
2,PNBK,Punjab National Bank to convene board meeting,2025-01-31,neutral,0.936114
3,PNBK,PNB rises as domestic deposits climb 14% YoY i...,2025-01-15,positive,0.931605
4,PNBK,Punjab National Bank announces cessation of MD...,2025-01-15,neutral,0.711983
...,...,...,...,...,...
85,BRTI,Bharti Airtel announces board meeting date,2025-01-24,neutral,0.906702
86,BRTI,Bharti Airtel to divest its entire 50% stake i...,2025-01-15,neutral,0.893169
87,BRTI,Airtel enhances network coverage in Prayagraj ...,2025-01-15,positive,0.949488
88,BRTI,Bharti Airtel allots 4.61 lakh equity shares o...,2025-01-15,neutral,0.934414


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Ticker  90 non-null     object 
 1   Title   90 non-null     object 
 2   Date    90 non-null     object 
 3   label   90 non-null     object 
 4   score   90 non-null     float64
dtypes: float64(1), object(4)
memory usage: 3.6+ KB


In [None]:
df['Date'] = pd.to_datetime(df['Date'], format="%Y-%m-%d")

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Ticker  90 non-null     object        
 1   Title   90 non-null     object        
 2   Date    90 non-null     datetime64[ns]
 3   label   90 non-null     object        
 4   score   90 non-null     float64       
dtypes: datetime64[ns](1), float64(1), object(3)
memory usage: 3.6+ KB


In [None]:
df["score"] = df.apply(lambda row: row["score"] if row["label"] == "positive"
                       else (-row["score"] if row["label"] == "negative" else 0), axis=1)

In [None]:
df.head(2)

Unnamed: 0,Ticker,Title,Date,label,score
0,PNBK,Punjab National Bank consolidated net profit r...,2025-01-31,positive,0.932558
1,PNBK,Punjab National Bank rallies as Q3 PAT doubles...,2025-01-31,positive,0.8085


In [None]:
df.tail(10)

Unnamed: 0,Ticker,Title,Date,label,score
80,BRTI,Airtel collaborates with Nokia and Qualcomm to...,2025-02-12,positive,0.920419
81,BRTI,Bharti Airtel spurts after strong Q3 numbers; ...,2025-02-07,positive,0.939367
82,BRTI,"Bharti Airtel Ltd Surges 2.65%, BSE Telecommun...",2025-02-07,positive,0.920457
83,BRTI,Bharti Airtel consolidated net profit rises 50...,2025-02-07,positive,0.930748
84,BRTI,Bharti Airtel records multi-fold jump in Q3 PA...,2025-02-07,positive,0.922609
85,BRTI,Bharti Airtel announces board meeting date,2025-01-24,neutral,0.0
86,BRTI,Bharti Airtel to divest its entire 50% stake i...,2025-01-15,neutral,0.0
87,BRTI,Airtel enhances network coverage in Prayagraj ...,2025-01-15,positive,0.949488
88,BRTI,Bharti Airtel allots 4.61 lakh equity shares o...,2025-01-15,neutral,0.0
89,BRTI,Bharti Airtel to acquire 26% stake in AMP Ener...,2025-01-15,neutral,0.0


In [None]:
score_diff_df = df.groupby("Ticker")["score"].sum().reset_index()
score_diff_df

Unnamed: 0,Ticker,score
0,BRTI,5.583088
1,HDBK,3.182211
2,INFY,1.636936
3,MRF,-5.571212
4,NTPC,0.81288
5,PNBK,4.355379
6,RAIV,0.324654
7,RELI,0.80294
8,TCS,2.712619


In [None]:
summary_data = []
for ticker in df["Ticker"].unique():
    ticker_news = df[df["Ticker"] == ticker][["Date", "score"]]

    last_2_news = ticker_news.head(2)
    last_5_news = ticker_news.head(5)
    last_10_news = ticker_news.head(10)

    last_2_score = last_2_news["score"].sum()
    last_5_score = last_5_news["score"].sum()
    last_10_score = last_10_news["score"].sum()
    total_score = ticker_news["score"].sum()

    last_2_date = last_2_news["Date"].min() if not last_2_news.empty else None
    last_5_date = last_5_news["Date"].min() if not last_5_news.empty else None
    last_10_date = last_10_news["Date"].min(
    ) if not last_10_news.empty else None

    summary_data.append([ticker, last_2_score, last_2_date, last_5_score,
                        last_5_date, last_10_score, last_10_date, total_score])

In [None]:
summary_df = pd.DataFrame(summary_data, columns=["Ticker", "Last 2 Score", "Last 2 Date",
                                                 "Last 5 Score", "Last 5 Date",
                                                 "Last 10 Score", "Last 10 Date",
                                                 "Total Score"])

In [None]:
summary_df

Unnamed: 0,Ticker,Last 2 Score,Last 2 Date,Last 5 Score,Last 5 Date,Last 10 Score,Last 10 Date,Total Score
0,PNBK,1.741058,2025-01-31,2.672663,2025-01-15,4.355379,2024-10-17,4.355379
1,MRF,0.0,2025-02-07,-1.940346,2025-01-31,-5.571212,2025-01-15,-5.571212
2,RAIV,0.0,2025-02-07,-1.347534,2025-01-17,0.324654,2025-01-15,0.324654
3,NTPC,-0.053352,2025-01-31,-1.028109,2025-01-31,0.81288,2025-01-15,0.81288
4,INFY,-1.551276,2025-02-11,1.037885,2025-01-17,1.636936,2024-12-16,1.636936
5,RELI,0.0,2025-02-12,-0.132344,2025-01-31,0.80294,2025-01-17,0.80294
6,TCS,0.0,2025-01-24,-0.077984,2025-01-15,2.712619,2025-01-15,2.712619
7,HDBK,-0.521666,2025-01-31,1.3599,2025-01-24,3.182211,2025-01-15,3.182211
8,BRTI,1.859785,2025-02-07,4.633599,2025-02-07,5.583088,2025-01-15,5.583088


In [None]:
summary_df.to_csv("sentiment_summary.csv", index=False)