In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from pprint import pprint

In [2]:
#NIFTY URLS
nifty_500_ticker_url = 'https://www1.nseindia.com/content/indices/ind_nifty500list.csv'
nifty_200_ticker_url = 'https://www1.nseindia.com/content/indices/ind_nifty200list.csv'
nifty_100_ticker_url = 'https://www1.nseindia.com/content/indices/ind_nifty100list.csv'
nifty_50_ticker_url = 'https://www1.nseindia.com/content/indices/ind_nifty50list.csv'

In [3]:
# Set universe
universe = nifty_50_ticker_url

In [4]:
# Read CSV & create a tickers df
tickers_file = pd.read_csv(universe)
tickers_df = tickers_file[['Symbol', 'Company Name']]
tickers_list = tickers_df['Symbol']

In [5]:
# News URL
news_url = 'https://ticker.finology.in/company/'

In [6]:
#fetch all available data

In [7]:
# list to store article data
article_data = []
# list to store tickers for which data is unavailable
unavailable_tickers = []
# length of companies
companies_len = len(tickers_list)
tickers_length = companies_len
#days_limit = datetime.datetime.now() - datetime.timedelta(days=30) #only 30 days old or newer articles
print('Fetching Article data..')
for i,ticker in enumerate(tickers_list):
    print(i, ticker)
    url= '{}/{}'.format(news_url, ticker)
    header={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}
    response = requests.get(url, headers=header)
    html = BeautifulSoup(response.content, 'lxml')
    news_links = html.select('#newsarticles > a')
    if len(news_links) == 0:
        print('No news found for {}'.format(tickers_list[i]))
        unavailable_tickers.append(tickers_list[i])
        continue
    # for tickers which are not recognized by finology website, it returns home-page. There's also a news section on homepage. so for unrecognized tickers, this function will scrape general financial news instead of ticker specific news
    # var to store article count
    ticker_articles_counter = 0
    for link in news_links:
        art_title = link.find('span', class_='h6').text
        #separate date and time from datetime object
        date_time_obj = datetime.strptime(link.find('small').text, '%d %b %Y, %I:%M%p')
        #if (date_time_obj <= days_limit):
        #     continue
        art_date = date_time_obj.date().strftime('%Y/%m/%d')
        art_time = date_time_obj.time().strftime('%H:%M')
        article_data.append([ticker, art_title, art_date, art_time])
        ticker_articles_counter += 1
    if(ticker_articles_counter==0):
        unavailable_tickers.append(ticker)  

Fetching Article data..
0 ADANIENT
1 ADANIPORTS
2 APOLLOHOSP
3 ASIANPAINT
4 AXISBANK
5 BAJAJ-AUTO
6 BAJFINANCE
7 BAJAJFINSV
8 BPCL
9 BHARTIARTL
10 BRITANNIA
11 CIPLA
12 COALINDIA
13 DIVISLAB
14 DRREDDY
15 EICHERMOT
16 GRASIM
17 HCLTECH
18 HDFCBANK
19 HDFCLIFE
20 HEROMOTOCO
21 HINDALCO
22 HINDUNILVR
23 HDFC
24 ICICIBANK
25 ITC
26 INDUSINDBK
27 INFY
28 JSWSTEEL
29 KOTAKBANK
30 LT
31 M&M
No news found for M&M
32 MARUTI
33 NTPC
34 NESTLEIND
35 ONGC
36 POWERGRID
37 RELIANCE
38 SBILIFE
39 SBIN
40 SUNPHARMA
41 TCS
42 TATACONSUM
43 TATAMOTORS
44 TATASTEEL
45 TECHM
46 TITAN
47 UPL
48 ULTRACEMCO
49 WIPRO


In [8]:
unavailable_tickers

['M&M']

In [9]:
all_news_df = pd.DataFrame(article_data, columns=['Ticker', 'Headline', 'Date', 'Time'])
all_news_df

Unnamed: 0,Ticker,Headline,Date,Time
0,ADANIENT,Adani group planning to spin off businesses of...,2023/01/23,11:11
1,ADANIENT,Adani Enterprises’ arm incorporates JV Company,2023/01/21,17:16
2,ADANIENT,Adani Enterprises planning to enter water segment,2023/01/20,11:06
3,ADANIENT,"Adani Enterprises files for Rs 20,000 crore FPO",2023/01/19,11:27
4,ADANIENT,Adani Enterprises inks pact with Ashok Leyland...,2023/01/18,10:51
...,...,...,...,...
2428,WIPRO,Wipro enters into partnership with Finastra,2022/09/20,17:58
2429,WIPRO,Wipro informs about press release,2022/09/20,09:37
2430,WIPRO,Wipro launches @nowStudio in Brazil,2022/09/07,16:30
2431,WIPRO,Wipro expands collaboration with Palo Alto Net...,2022/09/06,18:04


In [10]:
tickers_list = all_news_df['Ticker'].unique()
tickers_list

array(['ADANIENT', 'ADANIPORTS', 'APOLLOHOSP', 'ASIANPAINT', 'AXISBANK',
       'BAJAJ-AUTO', 'BAJFINANCE', 'BAJAJFINSV', 'BPCL', 'BHARTIARTL',
       'BRITANNIA', 'CIPLA', 'COALINDIA', 'DIVISLAB', 'DRREDDY',
       'EICHERMOT', 'GRASIM', 'HCLTECH', 'HDFCBANK', 'HDFCLIFE',
       'HEROMOTOCO', 'HINDALCO', 'HINDUNILVR', 'HDFC', 'ICICIBANK', 'ITC',
       'INDUSINDBK', 'INFY', 'JSWSTEEL', 'KOTAKBANK', 'LT', 'MARUTI',
       'NTPC', 'NESTLEIND', 'ONGC', 'POWERGRID', 'RELIANCE', 'SBILIFE',
       'SBIN', 'SUNPHARMA', 'TCS', 'TATACONSUM', 'TATAMOTORS',
       'TATASTEEL', 'TECHM', 'TITAN', 'UPL', 'ULTRACEMCO', 'WIPRO'],
      dtype=object)

In [15]:
tickers_df = tickers_df[tickers_df['Symbol'].isin(tickers_list)].reset_index(drop=True)

Unnamed: 0,Symbol,Company Name
0,ADANIENT,Adani Enterprises Ltd.
1,ADANIPORTS,Adani Ports and Special Economic Zone Ltd.
2,APOLLOHOSP,Apollo Hospitals Enterprise Ltd.
3,ASIANPAINT,Asian Paints Ltd.
4,AXISBANK,Axis Bank Ltd.
5,BAJAJ-AUTO,Bajaj Auto Ltd.
6,BAJFINANCE,Bajaj Finance Ltd.
7,BAJAJFINSV,Bajaj Finserv Ltd.
8,BPCL,Bharat Petroleum Corporation Ltd.
9,BHARTIARTL,Bharti Airtel Ltd.


In [24]:
articles_df = pd.DataFrame(article_data, columns=['Ticker', 'Headline', 'Date', 'Time'])

In [25]:
articles_df

Unnamed: 0,Ticker,Headline,Date,Time
0,ADANIENT,Adani group planning to spin off businesses of...,2023/01/23,11:11
1,ADANIENT,Adani Enterprises’ arm incorporates JV Company,2023/01/21,17:16
2,ADANIENT,Adani Enterprises planning to enter water segment,2023/01/20,11:06
3,ADANIENT,"Adani Enterprises files for Rs 20,000 crore FPO",2023/01/19,11:27
4,ADANIENT,Adani Enterprises inks pact with Ashok Leyland...,2023/01/18,10:51
...,...,...,...,...
2428,WIPRO,Wipro enters into partnership with Finastra,2022/09/20,17:58
2429,WIPRO,Wipro informs about press release,2022/09/20,09:37
2430,WIPRO,Wipro launches @nowStudio in Brazil,2022/09/07,16:30
2431,WIPRO,Wipro expands collaboration with Palo Alto Net...,2022/09/06,18:04


In [18]:
# sentiment analysis libraries
import nltk
nltk.downloader.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\singh\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [19]:
vader = SentimentIntensityAnalyzer()

In [26]:
art_scores_df = pd.DataFrame(articles_df['Headline'].apply(vader.polarity_scores).to_list())

In [29]:
art_scores_df

Unnamed: 0,neg,neu,pos,compound
0,0.0,1.000,0.000,0.0000
1,0.0,1.000,0.000,0.0000
2,0.0,1.000,0.000,0.0000
3,0.0,1.000,0.000,0.0000
4,0.0,1.000,0.000,0.0000
...,...,...,...,...
2428,0.0,1.000,0.000,0.0000
2429,0.0,1.000,0.000,0.0000
2430,0.0,1.000,0.000,0.0000
2431,0.0,0.811,0.189,0.1027


In [30]:
articles_df

Unnamed: 0,Ticker,Headline,Date,Time
0,ADANIENT,Adani group planning to spin off businesses of...,2023/01/23,11:11
1,ADANIENT,Adani Enterprises’ arm incorporates JV Company,2023/01/21,17:16
2,ADANIENT,Adani Enterprises planning to enter water segment,2023/01/20,11:06
3,ADANIENT,"Adani Enterprises files for Rs 20,000 crore FPO",2023/01/19,11:27
4,ADANIENT,Adani Enterprises inks pact with Ashok Leyland...,2023/01/18,10:51
...,...,...,...,...
2428,WIPRO,Wipro enters into partnership with Finastra,2022/09/20,17:58
2429,WIPRO,Wipro informs about press release,2022/09/20,09:37
2430,WIPRO,Wipro launches @nowStudio in Brazil,2022/09/07,16:30
2431,WIPRO,Wipro expands collaboration with Palo Alto Net...,2022/09/06,18:04


In [33]:
art_scores_df = pd.merge(articles_df, art_scores_df, left_index=True, right_index=True)

Unnamed: 0,Ticker,Headline,Date,Time,neg,neu,pos,compound
0,ADANIENT,Adani group planning to spin off businesses of...,2023/01/23,11:11,0.0,1.000,0.000,0.0000
1,ADANIENT,Adani Enterprises’ arm incorporates JV Company,2023/01/21,17:16,0.0,1.000,0.000,0.0000
2,ADANIENT,Adani Enterprises planning to enter water segment,2023/01/20,11:06,0.0,1.000,0.000,0.0000
3,ADANIENT,"Adani Enterprises files for Rs 20,000 crore FPO",2023/01/19,11:27,0.0,1.000,0.000,0.0000
4,ADANIENT,Adani Enterprises inks pact with Ashok Leyland...,2023/01/18,10:51,0.0,1.000,0.000,0.0000
...,...,...,...,...,...,...,...,...
2428,WIPRO,Wipro enters into partnership with Finastra,2022/09/20,17:58,0.0,1.000,0.000,0.0000
2429,WIPRO,Wipro informs about press release,2022/09/20,09:37,0.0,1.000,0.000,0.0000
2430,WIPRO,Wipro launches @nowStudio in Brazil,2022/09/07,16:30,0.0,1.000,0.000,0.0000
2431,WIPRO,Wipro expands collaboration with Palo Alto Net...,2022/09/06,18:04,0.0,0.811,0.189,0.1027


--------------------------

In [None]:
test_tickers = ['3MINDIA', 'APLAPOLLO', 'ABBOTINDIA', 'ABSLAMC', 'ALKYLAMINE', 'AMARAJABAT', 'AMBER', 'APOLLOTYRE', 'APTUS', 'ASTRAZEN', 'BEML', 'BAJAJELEC', 'BALKRISIND', 'BATAINDIA', 'BAYERCROP', 'BERGEPAINT', 'BHARATRAS', 'BLUEDART', 'BRITANNIA', 'MAPMYINDIA', 'CESC', 'CAMPUS', 'CEATLTD', 'CDSL', 'CENTURYPLY', 'CHOLAHLDNG', 'COLPAL', 'CUMMINSIND', 'DEVYANI', 'DIVISLAB', 'DIXON', 'EIHOTEL', 'EPL', 'EMAMILTD', 'NYKAA', 'FACT', 'FINEORG', 'GAIL', 'GICRE', 'GODFRYPHLP', 'GUJALKALI', 'GUJGASLTD', 'GSPL', 'HLEGLAS', 'HIKAL', 'HGS', 'HAL', 'HONAUT', 'HUDCO', 'IFBIND', 'INDIACEM', 'IBULHSGFIN', 'IRFC', 'IGL', 'IPCALAB', 'JBCHEPHARM', 'KNRCON', 'KPITTECH', 'KANSAINER', 'L&TFH', 'LAXMIMACH', 'LUXIND', 'MRF', 'MGL', 'M&MFIN', 'M&M', 'MAHLOG', 'MAZDOCK', 'MEDPLUS', 'METROPOLIS', 'MSUMI', 'MUTHOOTFIN', 'NLCINDIA', 'NATIONALUM', 'NAVINFLUOR', 'NESTLEIND', 'NAM-INDIA', 'NUVOCO', 'OBEROIRLTY', 'OIL', 'OLECTRA', 'PCBL', 'PATANJALI', 'PFIZER', 'POLYPLEX', 'PFC', 'PRESTIGE', 'PRINCEPIPE', 'PGHL', 'RAIN', 'RAINBOW', 'RATNAMANI', 'REDINGTON', 'RBA', 'SIS', 'SRF', 'SHILPAMED', 'SAIL', 'SUMICHEM', 'SUNTV', 'SUNDARMFIN', 'SUPREMEIND', 'SYMPHONY', 'TCNSBRANDS', 'TTML', 'TEJASNET', 'THERMAX', 'THYROCARE', 'TORNTPHARM', 'TIINDIA', 'UPL', 'UTIAMC', 'VIPIND', 'VAIBHAVGBL', 'VBL', 'MANYAVAR', 'VIJAYA', 'VOLTAS', 'WHIRLPOOL', 'WOCKPHARMA', 'ZFCVINDIA', 'ZEEL', 'ZENSARTECH', 'ZOMATO', 'ECLERX']

In [None]:
len(test_tickers)

In [None]:
news_url = 'https://ticker.finology.in/company/'
# list to store article data
article_data = []
unavailable_tickers = []
# list to store tickers for which data is unavailable
# length of companies
companies_len = len(test_tickers)
tickers_length = companies_len
#days_limit = datetime.datetime.now() - datetime.timedelta(days=30) #only 30 days old or newer articles
print('Fetching Article data..')
for i in range(tickers_length):
    print(i, test_tickers[i])
    url= '{}/{}'.format(news_url, test_tickers[i])
    header={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}
    response = requests.get(url, headers=header)
    html = BeautifulSoup(response.content, 'lxml')
    news_links = html.select('#newsarticles > a')
    if len(news_links) == 0:
        print('No news found for {}'.format(test_tickers[i]))
        unavailable_tickers.append(test_tickers[i])
        continue
    # for tickers which are not recognized by finology website, it returns home-page. There's also a news section on homepage. so for unrecognized tickers, this function will scrape general financial news instead of ticker specific news
    # var to store article count
    ticker_articles_counter = 0
    for link in news_links:
        art_title = link.find('span', class_='h6').text
        #separate date and time from datetime object
        date_time_obj = datetime.datetime.strptime(link.find('small').text, '%d %b %Y, %I:%M%p')
        #if (date_time_obj <= days_limit):
        #     continue
        art_date = date_time_obj.date().strftime('%Y/%m/%d')
        art_time = date_time_obj.time().strftime('%H:%M')
        article_data.append([test_tickers[i], art_title, art_date, art_time])
        ticker_articles_counter += 1
    if(ticker_articles_counter==0):
        print('Article length = 0 for {}'.format(test_tickers[i]))
        unavailable_tickers.append(tickers_list[i])  

In [None]:
pprint(article_data)

--------------------------

In [None]:
url= 'https://ticker.finology.in/company/EMAMILTD'
header={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}
response = requests.get(url, headers=header)
html = BeautifulSoup(response.content, 'lxml')

In [None]:
news_links = html.select('#newsarticles > a')

In [None]:
news_links