In [51]:
import multiprocessing as mp
import numpy as np
import pandas as pd
import requests
from tqdm import tqdm
from bs4 import BeautifulSoup
import nsepython as nse

In [52]:
# from transformers import pipeline
from huggingface_hub.inference_api import InferenceApi
import os
from dotenv import load_dotenv

In [53]:
universe = "nifty_50"
news_url = "https://www.google.com/finance/quote"
header = {
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0"
}

In [54]:
article_data = []
ticker_meta = []
unavailable_tickers = []

In [55]:
def fetch_tickers(universe):
    tickers_url_dict = {
        "nifty_500": "https://archives.nseindia.com/content/indices/ind_nifty500list.csv",
        "nifty_200": "https://archives.nseindia.com/content/indices/ind_nifty200list.csv",
        "nifty_100": "https://archives.nseindia.com/content/indices/ind_nifty100list.csv",
        "nifty_50": "https://archives.nseindia.com/content/indices/ind_nifty50list.csv",
    }
    print(f"Downloading {universe} Tickers")
    tickers_url = tickers_url_dict[universe]
    universe_tickers = pd.read_csv(tickers_url)
    universe_tickers.to_csv(f"./datasets/{universe}.csv")
    return universe_tickers[["Symbol", "Company Name"]]

In [56]:
tickers_df = fetch_tickers(universe)
tickers_df.head()

Downloading nifty_50 Tickers


Unnamed: 0,Symbol,Company Name
0,ADANIENT,Adani Enterprises Ltd.
1,ADANIPORTS,Adani Ports and Special Economic Zone Ltd.
2,APOLLOHOSP,Apollo Hospitals Enterprise Ltd.
3,ASIANPAINT,Asian Paints Ltd.
4,AXISBANK,Axis Bank Ltd.


In [57]:
tickers_list = list(tickers_df["Symbol"])

In [58]:
def get_url_content(ticker):
    _ticker = ticker + ":NSE"
    url = f"{news_url}/{_ticker}"
    print(f"Fetching data for {ticker} from {url}")
    response = requests.get(url, headers=header)
    soup = BeautifulSoup(response.content, "lxml")
    meta = nse.nse_eq(ticker)
    return ticker, soup, meta

In [59]:
def ticker_article_fetch(ticker, soup):
    news_articles = soup.select("div.z4rs2b")
    if not news_articles:
        print(f"No news found for {ticker}")
        return True
    ticker_articles_counter = 0
    for link in news_articles:
        art_title = link.select_one("div.Yfwt5").text.strip()
        date_posted = link.select_one("div.Adak").text
        source = link.select_one("div.sfyJob").text
        article_link = link.select_one("a").get("href")
        article_data.append([ticker, art_title, date_posted, source, article_link])
        ticker_articles_counter += 1
    print(f"No of articles: {ticker_articles_counter} for {ticker}")

In [60]:
def ticker_meta_fetch(ticker, meta):
    try:
        sector = meta["industryInfo"]["macro"]
        industry = meta["industryInfo"]["industry"]
        mCap = round(
            (meta["priceInfo"]["previousClose"] * meta["securityInfo"]["issuedSize"])
            / 1e9,
            2,
        )
        companyName = meta["info"]["companyName"]
    except KeyError as e:
        print(f"Error fetching metadata for {ticker}: {e}")
        sector = industry = mCap = companyName = np.nan
    ticker_meta.append([ticker, sector, industry, mCap, companyName])

In [61]:
def process_ticker(ticker):
    ticker, soup, meta = get_url_content(ticker)
    if ticker_article_fetch(ticker, soup):
        unavailable_tickers.append(ticker)
        print(f"Skipping meta check for {ticker}")
        return
    ticker_meta_fetch(ticker, meta)

In [62]:
# with mp.Pool(processes=mp.cpu_count()) as pool:
#     list(
#         tqdm(
#             pool.imap(process_ticker, tickers_list),
#             total=len(tickers_list),
#         )
#     )

In [63]:
articles_df = pd.read_csv("./datasets/NIFTY_500_Articles.csv", index_col=0)
ticker_meta_df = pd.read_csv("./datasets/ticker_metadata.csv", index_col=0)

In [64]:
token = os.getenv("hf_api_key")

In [65]:
sentiment_model = InferenceApi("ProsusAI/finbert", token=token)



In [66]:
articles_df.Headline = articles_df.Headline.astype(str)

In [68]:
articles_df.Headline[1]

'Q1 Results 2024 Live Updates: Adani Total Gas, Hindustan Petroleum, ACC, Adani Wilmar, Pfizer, among others to report Q1 earnings today'

['Adani Group to commission first phase of $4 bn petchem project by Dec 2026',
 'Q1 Results 2024 Live Updates: Adani Total Gas, Hindustan Petroleum, ACC, \nAdani Wilmar, Pfizer, among others to report Q1 earnings today',
 'Adani Wilmar Jumps Over 5% As Company Swings To Black, Posts Rs 323 Crore \nProfit; Details',
 'Adani Total Gas Q1 Results: PAT jumps 15% YoY to Rs 172 crore, revenue \nrises 9%',
 'Stocks to buy: Adani Ports, Cipla, Ashok Leyland among 9 stocks that may \nrise 4-16% in next 3-4 weeks, say analysts | Stock Market News',
 'Top stock picks | Ashok Leyland, Adani Ports, Balkrishna Industries and \nBharat Forge on the radar',
 'Adani Wilmar, ACC, Adani Total Gas shares gain ahead Q1 results today; \nearnings previews',
 'Adani Ports & Special Economic Zone Share Price Updates: Adani Ports & \nSpecial Economic Zone Sees Margin...',
 'Q1 Results 2024 Live Updates: Adani Total Gas, Hindustan Petroleum, ACC, \nAdani Wilmar, Pfizer, among others to report Q1 earnings today',


In [34]:
results = sentiment_model(list(articles_df.Headline))

In [37]:
results

[[{'label': 'neutral', 'score': 0.8948242664337158},
  {'label': 'positive', 'score': 0.09680631011724472},
  {'label': 'negative', 'score': 0.00836937129497528}],
 [{'label': 'neutral', 'score': 0.9389209747314453},
  {'label': 'positive', 'score': 0.03397244215011597},
  {'label': 'negative', 'score': 0.02710658125579357}],
 [{'label': 'positive', 'score': 0.8837496042251587},
  {'label': 'negative', 'score': 0.06055238097906113},
  {'label': 'neutral', 'score': 0.05569801479578018}],
 [{'label': 'positive', 'score': 0.9486324787139893},
  {'label': 'neutral', 'score': 0.026764869689941406},
  {'label': 'negative', 'score': 0.024602612480521202}],
 [{'label': 'positive', 'score': 0.9162551164627075},
  {'label': 'neutral', 'score': 0.06868037581443787},
  {'label': 'negative', 'score': 0.015064483508467674}],
 [{'label': 'neutral', 'score': 0.9096757769584656},
  {'label': 'positive', 'score': 0.07339461892843246},
  {'label': 'negative', 'score': 0.01692965254187584}],
 [{'label': '

In [42]:
# Initialize an empty list to hold the flattened data
flattened_data = []

# Loop through each entry in the data
for entry in results:
    # Create a dictionary for each entry with labels as keys and scores as values
    score_dict = {item["label"]: item["score"] for item in entry}
    flattened_data.append(score_dict)

# Create the DataFrame
df = pd.DataFrame(flattened_data)

In [49]:
df.loc[:, "sentiment_score"] = df.loc[:, "positive"] - df.loc[:, "negative"]

In [50]:
pd.merge(articles_df, df, left_index=True, right_index=True).iloc[2]

Ticker                                               ADANIENT
Headline    Adani Wilmar Jumps Over 5% As Company Swings T...
Date                                              2 hours ago
Source                                                 News18
Link        https://www.news18.com/business/markets/adani-...
neutral                                              0.055698
positive                                              0.88375
negative                                             0.060552
compound                                             0.823197
Name: 2, dtype: object

In [4]:
print("Downloading NIFTY 500, 200, 100, 50 Tickers")

nifty_500_ticker_url = (
    "https://archives.nseindia.com/content/indices/ind_nifty500list.csv"
)
nifty_500 = pd.read_csv(nifty_500_ticker_url)
nifty_500.to_csv("./datasets/NIFTY_500.csv")

nifty_200_ticker_url = (
    "https://archives.nseindia.com/content/indices/ind_nifty200list.csv"
)
nifty_200 = pd.read_csv(nifty_200_ticker_url)
nifty_200.to_csv("./datasets/NIFTY_200.csv")

nifty_100_ticker_url = (
    "https://archives.nseindia.com/content/indices/ind_nifty100list.csv"
)
nifty_100 = pd.read_csv(nifty_100_ticker_url)
nifty_100.to_csv("./datasets/NIFTY_100.csv")

nifty_50_ticker_url = (
    "https://archives.nseindia.com/content/indices/ind_nifty50list.csv"
)
nifty_50 = pd.read_csv(nifty_50_ticker_url)
nifty_50.to_csv("./datasets/NIFTY_50.csv")

Downloading NIFTY 500, 200, 100, 50 Tickers


In [5]:
# Set universe
universe = nifty_50

In [6]:
# Read CSV & create a tickers df
tickers_df = universe[["Symbol", "Company Name"]]
tickers_list = tickers_df["Symbol"]

In [7]:
news_urls = {
    "ticker_finology": "https://ticker.finology.in/company",
    "google_finance": "https://www.google.com/finance/quote",
    "yahoo_finance": "https://finance.yahoo.com/quote",
}

news_url = news_urls["google_finance"]

In [8]:
special_symbols = {
    "L&TFH": "SCRIP-220350",
    "M&M": "SCRIP-100520",
    "M&MFIN": "SCRIP-132720",
}

In [9]:
# Header for sending requests
header = {
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0"
}

In [10]:
# list to store article data
article_data = []

# list to store meta data
ticker_meta = []

# list to store tickers for which data is unavailable
unavailable_tickers = []

In [11]:
# function to fetch news and meta concurrently
def get_url_content(ticker):
    _ticker = special_symbols[ticker] if ticker in special_symbols.keys() else ticker
    url = f"{news_url}/{_ticker}"
    print(f"Fetching data for {ticker} from {url}")
    response = requests.get(url, headers=header)
    soup = BeautifulSoup(response.content, "lxml")
    meta = nse.nse_eq(ticker)
    return ticker, soup, meta

In [12]:
def ticker_article_fetch(ticker, soup):
    print("Fetching Article")
    news_articles = soup.select("div.z4rs2b")
    if len(news_articles) == 0:
        print("No news found for {}".format(ticker))
        return True
    ticker_articles_counter = 0
    for link in news_articles:
        art_title = link.select_one("div.Yfwt5").text.replace("\n", "")
        date_posted = link.select_one("div.Adak").text
        source = link.select_one("div.sfyJob").text
        article_link = link.select_one("a").get("href")
        article_data.append([ticker, art_title, date_posted, source, article_link])
        ticker_articles_counter += 1
    print("No of articles: {}".format(ticker_articles_counter))

In [13]:
def ticker_meta_fetch(ticker, meta):
    print("Fetching meta")
    try:
        sector = meta["industryInfo"]["macro"]
    except KeyError:
        print("{} sector info is not available".format(ticker))
        sector = np.nan
        industry = np.nan
        mCap = np.nan
        companyName = np.nan
        ticker_meta.append([ticker, sector, industry, mCap, companyName])
        return True
    try:
        industry = meta["industryInfo"]["industry"]
    except KeyError:
        print("{} industry info is not available".format(ticker))
        industry = np.nan
        mCap = np.nan
        companyName = np.nan
        ticker_meta.append([ticker, sector, industry, mCap, companyName])
        return True
    try:
        mCap = round(
            (meta["priceInfo"]["previousClose"] * meta["securityInfo"]["issuedSize"])
            / 1000000000,
            2,
        )  # Rounding MCap off to Billion
    except KeyError:
        print("{} mCap data is not available".format(ticker))
        mCap = np.nan
        companyName = np.nan
        ticker_meta.append([ticker, sector, industry, mCap, companyName])
        return True
    try:
        companyName = meta["info"]["companyName"]
    except KeyError:
        print("{} company Name is not available".format(ticker))
        companyName = np.nan
        ticker_meta.append([ticker, sector, industry, mCap, companyName])
        return True
    ticker_meta.append([ticker, sector, industry, mCap, companyName])

In [15]:
def process_tickers(ticker):
    ticker, soup, meta = get_url_content(ticker)
    ticker_article_response = ticker_article_fetch(ticker, soup)
    if ticker_article_response:
        unavailable_tickers.append(ticker)
        print("skipping meta check for {}".format(ticker))
        return
    ticker_meta_response = ticker_meta_fetch(ticker, meta)
    if ticker_meta_response:
        unavailable_tickers.append(ticker)

In [16]:
process_tickers("SBIN:NSE")

Fetching data for SBIN:NSE from https://www.google.com/finance/quote/SBIN:NSE
Fetching Article
No of articles: 6
Fetching meta


In [17]:
tickers_list += ":NSE"

In [36]:
# send multiple concurrent requests using concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
    results = [executor.submit(get_url_content, ticker) for ticker in tickers_list]
    for i, future in enumerate(concurrent.futures.as_completed(results)):
        ticker, soup, meta = future.result()
        print(i, ticker)
        ticker_article_response = ticker_article_fetch(ticker, soup)
        if ticker_article_response:
            unavailable_tickers.append(ticker)
            print("skipping meta check for {}".format(ticker))
            continue
        ticker_meta_response = ticker_meta_fetch(ticker, meta)
        if ticker_meta_response:
            unavailable_tickers.append(ticker)
end_time = time.time()

100%|██████████| 50/50 [00:00<00:00, 14706.54it/s]


Fetching data for ADANIENT:NSE from https://www.google.com/finance/quote/ADANIENT:NSEFetching data for ADANIPORTS:NSE from https://www.google.com/finance/quote/ADANIPORTS:NSE
Fetching data for APOLLOHOSP:NSE from https://www.google.com/finance/quote/APOLLOHOSP:NSE

Fetching data for ASIANPAINT:NSE from https://www.google.com/finance/quote/ASIANPAINT:NSE
Fetching data for AXISBANK:NSE from https://www.google.com/finance/quote/AXISBANK:NSE0 ADANIPORTS:NSE
Fetching Article

No of articles: 6
Fetching meta
Fetching data for BAJAJ-AUTO:NSE from https://www.google.com/finance/quote/BAJAJ-AUTO:NSE
1 APOLLOHOSP:NSE
Fetching Article
No of articles: 1
Fetching meta
Fetching data for BAJFINANCE:NSE from https://www.google.com/finance/quote/BAJFINANCE:NSE2 ADANIENT:NSE
Fetching Article

No of articles: 6
Fetching meta
Fetching data for BAJAJFINSV:NSE from https://www.google.com/finance/quote/BAJAJFINSV:NSE
3 ASIANPAINT:NSE
Fetching Article
No of articles: 3
Fetching meta
Fetching data for BPCL:NSE

In [23]:
for ticker in tickers_list:
    process_tickers(ticker)

Fetching data for ADANIENT:NSE from https://www.google.com/finance/quote/ADANIENT:NSE
Fetching Article
No of articles: 6
Fetching meta
Fetching data for ADANIPORTS:NSE from https://www.google.com/finance/quote/ADANIPORTS:NSE
Fetching Article
No of articles: 6
Fetching meta
Fetching data for APOLLOHOSP:NSE from https://www.google.com/finance/quote/APOLLOHOSP:NSE
Fetching Article
No of articles: 2
Fetching meta
Fetching data for ASIANPAINT:NSE from https://www.google.com/finance/quote/ASIANPAINT:NSE
Fetching Article
No of articles: 3
Fetching meta
Fetching data for AXISBANK:NSE from https://www.google.com/finance/quote/AXISBANK:NSE
Fetching Article
No of articles: 6
Fetching meta
Fetching data for BAJAJ-AUTO:NSE from https://www.google.com/finance/quote/BAJAJ-AUTO:NSE
Fetching Article
No of articles: 4
Fetching meta
Fetching data for BAJFINANCE:NSE from https://www.google.com/finance/quote/BAJFINANCE:NSE
Fetching Article
No of articles: 2
Fetching meta
Fetching data for BAJAJFINSV:NSE fr

In [24]:
unavailable_tickers

[]

In [26]:
# create df from article_data
articles_df = pd.DataFrame(
    article_data, columns=["Ticker", "Headline", "Date", "Source", "Link"]
)
articles_df

Unnamed: 0,Ticker,Headline,Date,Source,Link
0,SBIN:NSE,SBI Life Q1 Results: Net profit rises 36% YoY ...,17 hours ago,The Economic Times,https://m.economictimes.com/markets/stocks/ear...
1,SBIN:NSE,Sbi Share Price Live blog for 25 Jul 2024,1 hour ago,Livemint,https://www.livemint.com/market/live-blog/sbi-...
2,SBIN:NSE,"SBI, FCDO UK Ink MoU For ₹8,100 Cr Investment",16 hours ago,Benzinga,https://in.benzinga.com/content/39927673/sbi-f...
3,SBIN:NSE,SBI Recruitment 2024: New Notification Out for...,22 hours ago,Studycafe,https://studycafe.in/sbi-recruitment-2024-new-...
4,SBIN:NSE,Budget 2024: Finance minister’s focus on rural...,1 day ago,CNBC TV18,https://www.cnbctv18.com/economy/budget-2024-f...
...,...,...,...,...,...
208,ULTRACEMCO:NSE,"Accumulate UltraTech Cement; target of Rs 12,1...",18 hours ago,www.tradingview.com,https://www.tradingview.com/news/moneycontrol:...
209,WIPRO:NSE,"Wipro goes for a tumble, may stay weak in near...",2 days ago,The Economic Times,https://m.economictimes.com/markets/stocks/new...
210,WIPRO:NSE,Wipro fails to show revival signs; may underpe...,2 days ago,Business Standard,https://www.business-standard.com/markets/news...
211,WIPRO:NSE,"Reliance, Wipro weigh on Indian shares amid vo...",2 days ago,The Hindu,https://www.thehindu.com/business/markets/reli...


In [27]:
# create df from metadata
ticker_meta_df = pd.DataFrame(
    ticker_meta, columns=["Ticker", "Sector", "Industry", "Market Cap", "Company Name"]
)
ticker_meta_df

Unnamed: 0,Ticker,Sector,Industry,Market Cap,Company Name
0,SBIN:NSE,Financial Services,Banks,7603.77,State Bank of India
1,ADANIENT:NSE,Commodities,Metals & Minerals Trading,3386.6,Adani Enterprises Limited
2,ADANIPORTS:NSE,Services,Transport Infrastructure,3207.37,Adani Ports and Special Economic Zone Limited
3,APOLLOHOSP:NSE,Healthcare,Healthcare Services,923.47,Apollo Hospitals Enterprise Limited
4,ASIANPAINT:NSE,Consumer Discretionary,Consumer Durables,2793.95,Asian Paints Limited
5,AXISBANK:NSE,Financial Services,Banks,3830.48,Axis Bank Limited
6,BAJAJ-AUTO:NSE,Consumer Discretionary,Automobiles,2585.26,Bajaj Auto Limited
7,BAJFINANCE:NSE,Financial Services,Finance,4089.8,Bajaj Finance Limited
8,BAJAJFINSV:NSE,Financial Services,Finance,2521.53,Bajaj Finserv Limited
9,BPCL:NSE,Energy,Petroleum Products,1366.41,Bharat Petroleum Corporation Limited


In [28]:
# Sentiment Analysis
print("Performing Sentiment Analysis")
vader = SentimentIntensityAnalyzer()

# import custom lexicon dictionary
lex_fin = pd.read_csv("./datasets/lexicon_dictionary.csv")
# create a dictionary from df columns
lex_dict = dict(zip(lex_fin.word, lex_fin.sentiment_score))
# set custom lexicon dictionary as default to calculate sentiment analysis scores
vader.lexicon = lex_dict

# Perform sentiment Analysis on the Headline column of all_news_df
# It returns a dictionary, transform it into a list
art_scores_df = pd.DataFrame(
    articles_df["Headline"].apply(vader.polarity_scores).to_list()
)

Performing Sentiment Analysis


In [29]:
art_scores_df

Unnamed: 0,neg,neu,pos,compound
0,0.157,0.515,0.327,0.3168
1,0.256,0.298,0.447,-0.0191
2,0.094,0.448,0.458,0.4703
3,0.238,0.394,0.368,0.3217
4,0.224,0.469,0.307,0.1503
...,...,...,...,...
208,0.219,0.601,0.181,0.1588
209,0.463,0.162,0.376,-0.2676
210,0.471,0.367,0.162,-0.4967
211,0.555,0.135,0.311,0.0974


In [30]:
# Merge articles_df with art_scores_df
# merging on index, hence both indices should be same
art_scores_df = pd.merge(articles_df, art_scores_df, left_index=True, right_index=True)

In [31]:
art_scores_df

Unnamed: 0,Ticker,Headline,Date,Source,Link,neg,neu,pos,compound
0,SBIN:NSE,SBI Life Q1 Results: Net profit rises 36% YoY ...,17 hours ago,The Economic Times,https://m.economictimes.com/markets/stocks/ear...,0.157,0.515,0.327,0.3168
1,SBIN:NSE,Sbi Share Price Live blog for 25 Jul 2024,1 hour ago,Livemint,https://www.livemint.com/market/live-blog/sbi-...,0.256,0.298,0.447,-0.0191
2,SBIN:NSE,"SBI, FCDO UK Ink MoU For ₹8,100 Cr Investment",16 hours ago,Benzinga,https://in.benzinga.com/content/39927673/sbi-f...,0.094,0.448,0.458,0.4703
3,SBIN:NSE,SBI Recruitment 2024: New Notification Out for...,22 hours ago,Studycafe,https://studycafe.in/sbi-recruitment-2024-new-...,0.238,0.394,0.368,0.3217
4,SBIN:NSE,Budget 2024: Finance minister’s focus on rural...,1 day ago,CNBC TV18,https://www.cnbctv18.com/economy/budget-2024-f...,0.224,0.469,0.307,0.1503
...,...,...,...,...,...,...,...,...,...
208,ULTRACEMCO:NSE,"Accumulate UltraTech Cement; target of Rs 12,1...",18 hours ago,www.tradingview.com,https://www.tradingview.com/news/moneycontrol:...,0.219,0.601,0.181,0.1588
209,WIPRO:NSE,"Wipro goes for a tumble, may stay weak in near...",2 days ago,The Economic Times,https://m.economictimes.com/markets/stocks/new...,0.463,0.162,0.376,-0.2676
210,WIPRO:NSE,Wipro fails to show revival signs; may underpe...,2 days ago,Business Standard,https://www.business-standard.com/markets/news...,0.471,0.367,0.162,-0.4967
211,WIPRO:NSE,"Reliance, Wipro weigh on Indian shares amid vo...",2 days ago,The Hindu,https://www.thehindu.com/business/markets/reli...,0.555,0.135,0.311,0.0974
