In [69]:
from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
import pandas as pd
from transformers import pipeline
from datetime import datetime, timedelta
import re 
import torch

In [70]:
torch.__version__

'2.6.0+cu126'

In [71]:
tickertape_url = "https://www.tickertape.in/stocks/"
tickers = ['PNBK','MRF','RAIV','NTPC']
news_list = []
pipe = pipeline("text-classification", model="ProsusAI/finbert")

Device set to use cuda:0


In [72]:
def convert_relative_date(relative_date):
    today = datetime.today()

    match = re.match(
        r'(\d+) (minute|hour|day|week|month|year)s? ago', relative_date.lower())

    if match:
        num = int(match.group(1))
        unit = match.group(2)

        if unit == "minute":
            return today - timedelta(minutes=num)
        elif unit == "hour":
            return today - timedelta(hours=num)
        elif unit == "day":
            return today - timedelta(days=num)
        elif unit == "week":
            return today - timedelta(weeks=num)
        elif unit == "month":
            return today - timedelta(days=num * 30)
        elif unit == "year":
            # Approximate 1 year = 365 days
            return today - timedelta(days=num * 365)

    return today

In [73]:
for ticker in tickers:
    url = tickertape_url + ticker
    req = Request(url=url, headers={'user-agent': 'my-app'})
    response = urlopen(req)
    html = BeautifulSoup(response, 'html.parser')

    news_tab = html.find(id='news-tab')
    if news_tab:
        headlines = news_tab.find_all(
            'span', class_='jsx-1669302504 text-primary headline mb8 heading')
        dates = news_tab.find_all(
            'span', class_='jsx-1669302504 text-grey caption-style all-uppercase')

        for headline, date in zip(headlines, dates):
            news_list.append([
                ticker,
                headline.text.strip(),
                convert_relative_date(date.text.strip()).strftime(
                    '%Y-%m-%d %H:%M:%S')
            ])

In [74]:
df = pd.DataFrame(news_list, columns=['Ticker', 'Title', 'Date_RAW'])

In [75]:
df['Date'] = pd.to_datetime(df["Date_RAW"]).dt.date
df = df.drop(columns='Date_RAW')

In [76]:
df.head()

Unnamed: 0,Ticker,Title,Date
0,PNBK,Punjab National Bank consolidated net profit r...,2025-01-31
1,PNBK,Punjab National Bank rallies as Q3 PAT doubles...,2025-01-31
2,PNBK,Punjab National Bank to convene board meeting,2025-01-31
3,PNBK,PNB rises as domestic deposits climb 14% YoY i...,2025-01-15
4,PNBK,Punjab National Bank announces cessation of MD...,2025-01-15


In [77]:
df.tail()

Unnamed: 0,Ticker,Title,Date
35,NTPC,NTPC to hold board meeting,2025-01-24
36,NTPC,"NTPC Ltd soars 2.96%, rises for fifth straight...",2025-01-24
37,NTPC,NTPC commissions further 25 MW of Gujarat Sola...,2025-01-24
38,NTPC,"NTPC Ltd soars 1.48%, Gains for third straight...",2025-01-17
39,NTPC,NTPC commissions two solar projects aggregatin...,2025-01-15


In [78]:
text = "The Punjab National bank recorded loss of 2400Cr in the last fiscal year"
pipe(text)

[{'label': 'negative', 'score': 0.9625182151794434}]

In [79]:
pipe(text)[0]['score']

0.9625182151794434

In [80]:
def f(Title): return pd.Series([pipe(Title)[0]['label'], pipe(Title)[0]['score']])


df[['label', 'score']] = df['Title'].apply(f)

In [81]:
df

Unnamed: 0,Ticker,Title,Date,label,score
0,PNBK,Punjab National Bank consolidated net profit r...,2025-01-31,positive,0.932558
1,PNBK,Punjab National Bank rallies as Q3 PAT doubles...,2025-01-31,positive,0.8085
2,PNBK,Punjab National Bank to convene board meeting,2025-01-31,neutral,0.936114
3,PNBK,PNB rises as domestic deposits climb 14% YoY i...,2025-01-15,positive,0.931605
4,PNBK,Punjab National Bank announces cessation of MD...,2025-01-15,neutral,0.711983
5,PNBK,Punjab National Bank receives affirmation in c...,2024-12-16,neutral,0.53367
6,PNBK,Punjab National Bank up for third straight ses...,2024-12-16,positive,0.753224
7,PNBK,Punjab National Bank consolidated net profit r...,2024-11-16,positive,0.929492
8,PNBK,Punjab National Bank announces board meeting date,2024-11-16,neutral,0.844728
9,PNBK,Punjab National Bank launches QIP; floor price...,2024-10-17,neutral,0.894893


In [82]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40 entries, 0 to 39
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Ticker  40 non-null     object 
 1   Title   40 non-null     object 
 2   Date    40 non-null     object 
 3   label   40 non-null     object 
 4   score   40 non-null     float64
dtypes: float64(1), object(4)
memory usage: 1.7+ KB


In [84]:
df['Date'] = pd.to_datetime(df['Date'], format="%Y-%m-%d")

In [85]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40 entries, 0 to 39
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Ticker  40 non-null     object        
 1   Title   40 non-null     object        
 2   Date    40 non-null     datetime64[ns]
 3   label   40 non-null     object        
 4   score   40 non-null     float64       
dtypes: datetime64[ns](1), float64(1), object(3)
memory usage: 1.7+ KB


In [86]:
df["score"] = df.apply(lambda row: row["score"] if row["label"] == "positive"
                       else (-row["score"] if row["label"] == "negative" else 0), axis=1)

In [113]:
df.head(2)

Unnamed: 0,Ticker,Title,Date,label,score
0,PNBK,Punjab National Bank consolidated net profit r...,2025-01-31,positive,0.932558
1,PNBK,Punjab National Bank rallies as Q3 PAT doubles...,2025-01-31,positive,0.8085


In [87]:
df.tail(10)

Unnamed: 0,Ticker,Title,Date,label,score
30,NTPC,NTPC receives favourable judgement from Delhi ...,2025-02-07,positive,0.918732
31,NTPC,"NTPC slides as Q3 PAT decline 2% YoY to Rs 5,0...",2025-01-31,negative,-0.972084
32,NTPC,NTPC consolidated net profit declines 1.80% in...,2025-01-31,negative,-0.974757
33,NTPC,Board of NTPC recommends second interim dividend,2025-01-31,neutral,0.0
34,NTPC,NTPC commissions Unit 1 (660 MW) of Khurja STPP,2025-01-31,neutral,0.0
35,NTPC,NTPC to hold board meeting,2025-01-24,neutral,0.0
36,NTPC,"NTPC Ltd soars 2.96%, rises for fifth straight...",2025-01-24,positive,0.907309
37,NTPC,NTPC commissions further 25 MW of Gujarat Sola...,2025-01-24,neutral,0.0
38,NTPC,"NTPC Ltd soars 1.48%, Gains for third straight...",2025-01-17,positive,0.93368
39,NTPC,NTPC commissions two solar projects aggregatin...,2025-01-15,neutral,0.0


In [90]:
score_diff_df = df.groupby("Ticker")["score"].sum().reset_index()
score_diff_df

Unnamed: 0,Ticker,score
0,MRF,-5.571212
1,NTPC,0.81288
2,PNBK,4.355379
3,RAIV,0.324654


In [125]:
summary_data = []
for ticker in df["Ticker"].unique():
    ticker_news = df[df["Ticker"] == ticker][["Date", "score"]]

    last_2_news = ticker_news.head(2)
    last_5_news = ticker_news.head(5)
    last_10_news = ticker_news.head(10)

    last_2_score = last_2_news["score"].sum()
    last_5_score = last_5_news["score"].sum()
    last_10_score = last_10_news["score"].sum()
    total_score = ticker_news["score"].sum()

    last_2_date = last_2_news["Date"].min() if not last_2_news.empty else None
    last_5_date = last_5_news["Date"].min() if not last_5_news.empty else None
    last_10_date = last_10_news["Date"].min(
    ) if not last_10_news.empty else None

    summary_data.append([ticker, last_2_score, last_2_date, last_5_score,
                        last_5_date, last_10_score, last_10_date, total_score])

In [126]:
summary_df = pd.DataFrame(summary_data, columns=["Ticker", "Last 2 Score", "Last 2 Date",
                                                 "Last 5 Score", "Last 5 Date",
                                                 "Last 10 Score", "Last 10 Date",
                                                 "Total Score"])

In [127]:
summary_df

Unnamed: 0,Ticker,Last 2 Score,Last 2 Date,Last 5 Score,Last 5 Date,Last 10 Score,Last 10 Date,Total Score
0,PNBK,1.741058,2025-01-31,2.672663,2025-01-15,4.355379,2024-10-17,4.355379
1,MRF,0.0,2025-02-07,-1.940346,2025-01-31,-5.571212,2025-01-15,-5.571212
2,RAIV,0.0,2025-02-07,-1.347534,2025-01-17,0.324654,2025-01-15,0.324654
3,NTPC,-0.053352,2025-01-31,-1.028109,2025-01-31,0.81288,2025-01-15,0.81288


In [None]:
summary_df.to_csv("sentiment_summary.csv", index=False)