Imports

In [1]:
import pandas as pd
import numpy as np
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
from pprint import pprint
from nsepython import *
import datetime
import time

Get Company Tickers from [NSE Website](https://www1.nseindia.com/content/indices/ind_nifty500list.csv)

In [2]:
nifty_500_ticker_url = 'https://www1.nseindia.com/content/indices/ind_nifty500list.csv'
nifty_50_ticker_url = 'https://www1.nseindia.com/content/indices/ind_nifty50list.csv'
tickers_file = pd.read_csv(nifty_50_ticker_url)
tickers_df = tickers_file[['Symbol', 'Company Name']]
tickers = tickers_df['Symbol']
tickers.head(n=10)

0      ADANIENT
1    ADANIPORTS
2    APOLLOHOSP
3    ASIANPAINT
4      AXISBANK
5    BAJAJ-AUTO
6    BAJFINANCE
7    BAJAJFINSV
8          BPCL
9    BHARTIARTL
Name: Symbol, dtype: object

Scrape Article Headlines and Dates

In [3]:
news_url = 'https://ticker.finology.in/company/'

In [4]:
# list to store article data
data = []
companies_len = len(tickers)
days_limit = datetime.datetime.now() - datetime.timedelta(days=30)
for i in range(10):
    print(i)
    req = Request(url= '{}/{}'.format(news_url, tickers[i]),headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
    response = urlopen(req)
    html = BeautifulSoup(response) 
    news_links = html.select('.newslink')  
    for link in news_links:
        title = link.find('span', class_='h6').text
        #separate date and time from datetime object
        date_time_obj = datetime.datetime.strptime(link.find('small').text, '%d %b %Y, %I:%M%p')
        if (date_time_obj <= days_limit):
             continue
        art_date = date_time_obj.date().strftime('%Y/%m/%d')
        art_time = date_time_obj.time().strftime('%H:%M')
        data.append([tickers[i], title, art_date, art_time]) 
    '''if (i != 0 and i%200 == 0):
        print('sleeping')
        time.sleep(30)'''
df = pd.DataFrame(data, columns=['Ticker', 'Headline', 'Date', 'Time'])

0
1
2
3
4
5
6
7
8
9


In [5]:
xc_indices = pd.read_csv('XC-tickers.csv', header=0)

In [6]:
xc_indices.head()

Unnamed: 0,Ticker,Sector,Industry
0,AMARAJABAT,XC - CODI,XC - ANC
1,AMTEKAUTO,XC - CODI,XC - ANC
2,AMTEKINDIA,XC - CODI,XC - ANC
3,APOLLOTYRE,XC - CODI,XC - ANC
4,ASAHIINDIA,XC - CODI,XC - ANC


In [7]:
pprint(df)

        Ticker                                           Headline        Date  \
0     ADANIENT    Adani Enterprises files for Rs 20,000 crore FPO  2023/01/19   
1     ADANIENT  Adani Enterprises inks pact with Ashok Leyland...  2023/01/18   
2     ADANIENT  Adani Group planning to invest Rs 60,000 crore...  2023/01/12   
3     ADANIENT  Adani to pay additional amount for NDTV shares...  2023/01/04   
4     ADANIENT  Adani Enterprises informs about update on open...  2023/01/03   
..         ...                                                ...         ...   
59  BHARTIARTL          Bharti Airtel informs about press release  2022/12/22   
60  BHARTIARTL  Bharti Airtel launches cutting edge 5G service...  2022/12/22   
61  BHARTIARTL  Bharti Airtel launches cutting edge 5G service...  2022/12/21   
62  BHARTIARTL  Bharti Airtel launches cutting edge 5G service...  2022/12/21   
63  BHARTIARTL          Bharti Airtel informs about press release  2022/12/21   

     Time  
0   11:27  
1  

Sentiment Analysis

In [8]:
import nltk
nltk.downloader.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\singh\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [9]:
vader = SentimentIntensityAnalyzer()
scores = df['Headline'].apply(vader.polarity_scores).tolist()
scores_df = pd.DataFrame(scores)

In [10]:
new_df = pd.merge(left=df, right=scores_df, on=df.index.values).drop(['key_0'], axis=1)

In [11]:
new_df

Unnamed: 0,Ticker,Headline,Date,Time,neg,neu,pos,compound
0,ADANIENT,"Adani Enterprises files for Rs 20,000 crore FPO",2023/01/19,11:27,0.000,1.000,0.000,0.0000
1,ADANIENT,Adani Enterprises inks pact with Ashok Leyland...,2023/01/18,10:51,0.000,1.000,0.000,0.0000
2,ADANIENT,"Adani Group planning to invest Rs 60,000 crore...",2023/01/12,11:30,0.000,1.000,0.000,0.0000
3,ADANIENT,Adani to pay additional amount for NDTV shares...,2023/01/04,16:31,0.096,0.753,0.151,0.2023
4,ADANIENT,Adani Enterprises informs about update on open...,2023/01/03,14:41,0.000,1.000,0.000,0.0000
...,...,...,...,...,...,...,...,...
59,BHARTIARTL,Bharti Airtel informs about press release,2022/12/22,12:02,0.000,1.000,0.000,0.0000
60,BHARTIARTL,Bharti Airtel launches cutting edge 5G service...,2022/12/22,11:59,0.158,0.842,0.000,-0.1280
61,BHARTIARTL,Bharti Airtel launches cutting edge 5G service...,2022/12/21,14:13,0.143,0.857,0.000,-0.1280
62,BHARTIARTL,Bharti Airtel launches cutting edge 5G service...,2022/12/21,12:17,0.158,0.842,0.000,-0.1280


In [12]:
final_df = new_df.groupby('Ticker').mean()

  final_df = new_df.groupby('Ticker').mean()


In [13]:
final_df

Unnamed: 0_level_0,neg,neu,pos,compound
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ADANIENT,0.0096,0.9753,0.0151,0.02023
ADANIPORTS,0.0,0.851,0.149,0.25954
APOLLOHOSP,0.133333,0.739333,0.127333,-0.0172
ASIANPAINT,0.0,0.9,0.1,0.18342
AXISBANK,0.0,0.926333,0.073667,0.167417
BAJAJ-AUTO,0.0,1.0,0.0,0.0
BAJAJFINSV,0.0965,0.7915,0.112,-0.04745
BAJFINANCE,0.0,0.776,0.224,0.3818
BHARTIARTL,0.054923,0.945077,0.0,-0.049231
BPCL,0.05,0.851,0.099,0.112725


Get Company Sector, industry data

In [14]:
sector = []
industry = []
mCap = []
companyName = []
for i in range(10):
    print(tickers[i])
    meta = nse_eq(tickers[i])
    print(tickers[i])
    sector.append(meta['industryInfo']['macro'])
    pprint('Sector: {}'.format(meta['industryInfo']['macro']))
    industry.append(meta['industryInfo']['sector'])
    pprint('Industry: {}'.format(meta['industryInfo']['sector']))
    ticker_mcap = round((meta['priceInfo']['previousClose'] * meta['securityInfo']['issuedSize'])/1000000000, 2)
    mCap.append(ticker_mcap)
    companyName.append(meta['info']['companyName'])
    print('market cap is Rs {}'.format(ticker_mcap))
    print('\n')

#final_df['sector'] = sector
#final_df['industry'] = industry
final_df['mCap (Billion)'] = mCap
final_df['Company Name'] = companyName

ADANIENT
ADANIENT
'Sector: Commodities'
'Industry: Metals & Mining'
market cap is Rs 4100.24


ADANIPORTS
ADANIPORTS
'Sector: Services'
'Industry: Services'
market cap is Rs 1698.95


APOLLOHOSP
APOLLOHOSP
'Sector: Healthcare'
'Industry: Healthcare'
market cap is Rs 624.0


ASIANPAINT
ASIANPAINT
'Sector: Consumer Discretionary'
'Industry: Consumer Durables'
market cap is Rs 2825.08


AXISBANK
AXISBANK
'Sector: Financial Services'
'Industry: Financial Services'
market cap is Rs 2841.69


BAJAJ-AUTO
BAJAJ-AUTO
'Sector: Consumer Discretionary'
'Industry: Automobile and Auto Components'
market cap is Rs 1021.45


BAJFINANCE
BAJFINANCE
'Sector: Financial Services'
'Industry: Financial Services'
market cap is Rs 3631.76


BAJAJFINSV
BAJAJFINSV
'Sector: Financial Services'
'Industry: Financial Services'
market cap is Rs 1306.71


BPCL
BPCL
'Sector: Energy'
'Industry: Oil Gas & Consumable Fuels'
market cap is Rs 750.67


BHARTIARTL
BHARTIARTL
'Sector: Telecommunication'
'Industry: Telecommunic

In [15]:
final_df = final_df.reset_index()

In [16]:
final_df

Unnamed: 0,Ticker,neg,neu,pos,compound,mCap (Billion),Company Name
0,ADANIENT,0.0096,0.9753,0.0151,0.02023,4100.24,Adani Enterprises Limited
1,ADANIPORTS,0.0,0.851,0.149,0.25954,1698.95,Adani Ports and Special Economic Zone Limited
2,APOLLOHOSP,0.133333,0.739333,0.127333,-0.0172,624.0,Apollo Hospitals Enterprise Limited
3,ASIANPAINT,0.0,0.9,0.1,0.18342,2825.08,Asian Paints Limited
4,AXISBANK,0.0,0.926333,0.073667,0.167417,2841.69,Axis Bank Limited
5,BAJAJ-AUTO,0.0,1.0,0.0,0.0,1021.45,Bajaj Auto Limited
6,BAJAJFINSV,0.0965,0.7915,0.112,-0.04745,3631.76,Bajaj Finance Limited
7,BAJFINANCE,0.0,0.776,0.224,0.3818,1306.71,Bajaj Finserv Limited
8,BHARTIARTL,0.054923,0.945077,0.0,-0.049231,750.67,Bharat Petroleum Corporation Limited
9,BPCL,0.05,0.851,0.099,0.112725,4327.54,Bharti Airtel Limited


In [17]:
final_df = pd.merge(final_df, xc_indices, left_on='Ticker', right_on='Ticker', how='inner')

In [18]:
final_df

Unnamed: 0,Ticker,neg,neu,pos,compound,mCap (Billion),Company Name,Sector,Industry
0,ADANIENT,0.0096,0.9753,0.0151,0.02023,4100.24,Adani Enterprises Limited,XC - INDS,XC - CAG
1,ADANIPORTS,0.0,0.851,0.149,0.25954,1698.95,Adani Ports and Special Economic Zone Limited,XC - INDS,XC - SHP
2,APOLLOHOSP,0.133333,0.739333,0.127333,-0.0172,624.0,Apollo Hospitals Enterprise Limited,XC - HECA,XC - HCS
3,ASIANPAINT,0.0,0.9,0.1,0.18342,2825.08,Asian Paints Limited,XC - COST,XC - HPP
4,AXISBANK,0.0,0.926333,0.073667,0.167417,2841.69,Axis Bank Limited,XC - FNCL,XC - PVT
5,BAJAJ-AUTO,0.0,1.0,0.0,0.0,1021.45,Bajaj Auto Limited,XC - CODI,XC - ATO
6,BAJAJFINSV,0.0965,0.7915,0.112,-0.04745,3631.76,Bajaj Finance Limited,XC - FNCL,XC - NFC
7,BAJFINANCE,0.0,0.776,0.224,0.3818,1306.71,Bajaj Finserv Limited,XC - FNCL,XC - NFC
8,BHARTIARTL,0.054923,0.945077,0.0,-0.049231,750.67,Bharat Petroleum Corporation Limited,XC - COSE,XC - TEL
9,BPCL,0.05,0.851,0.099,0.112725,4327.54,Bharti Airtel Limited,XC - ENRG,XC - REF


In [19]:
final_df.columns = ['Symbol', 'Negative', 'Neutral', 'Positive', 'Sentiment Score','MCap (Billion)', 'Company Name', 'Sector', 'Industry']

In [20]:
final_df

Unnamed: 0,Symbol,Negative,Neutral,Positive,Sentiment Score,MCap (Billion),Company Name,Sector,Industry
0,ADANIENT,0.0096,0.9753,0.0151,0.02023,4100.24,Adani Enterprises Limited,XC - INDS,XC - CAG
1,ADANIPORTS,0.0,0.851,0.149,0.25954,1698.95,Adani Ports and Special Economic Zone Limited,XC - INDS,XC - SHP
2,APOLLOHOSP,0.133333,0.739333,0.127333,-0.0172,624.0,Apollo Hospitals Enterprise Limited,XC - HECA,XC - HCS
3,ASIANPAINT,0.0,0.9,0.1,0.18342,2825.08,Asian Paints Limited,XC - COST,XC - HPP
4,AXISBANK,0.0,0.926333,0.073667,0.167417,2841.69,Axis Bank Limited,XC - FNCL,XC - PVT
5,BAJAJ-AUTO,0.0,1.0,0.0,0.0,1021.45,Bajaj Auto Limited,XC - CODI,XC - ATO
6,BAJAJFINSV,0.0965,0.7915,0.112,-0.04745,3631.76,Bajaj Finance Limited,XC - FNCL,XC - NFC
7,BAJFINANCE,0.0,0.776,0.224,0.3818,1306.71,Bajaj Finserv Limited,XC - FNCL,XC - NFC
8,BHARTIARTL,0.054923,0.945077,0.0,-0.049231,750.67,Bharat Petroleum Corporation Limited,XC - COSE,XC - TEL
9,BPCL,0.05,0.851,0.099,0.112725,4327.54,Bharti Airtel Limited,XC - ENRG,XC - REF


In [22]:
final_df.round(3)

Unnamed: 0,Symbol,Negative,Neutral,Positive,Sentiment Score,MCap (Billion),Company Name,Sector,Industry
0,ADANIENT,0.01,0.975,0.015,0.02,4100.24,Adani Enterprises Limited,XC - INDS,XC - CAG
1,ADANIPORTS,0.0,0.851,0.149,0.26,1698.95,Adani Ports and Special Economic Zone Limited,XC - INDS,XC - SHP
2,APOLLOHOSP,0.133,0.739,0.127,-0.017,624.0,Apollo Hospitals Enterprise Limited,XC - HECA,XC - HCS
3,ASIANPAINT,0.0,0.9,0.1,0.183,2825.08,Asian Paints Limited,XC - COST,XC - HPP
4,AXISBANK,0.0,0.926,0.074,0.167,2841.69,Axis Bank Limited,XC - FNCL,XC - PVT
5,BAJAJ-AUTO,0.0,1.0,0.0,0.0,1021.45,Bajaj Auto Limited,XC - CODI,XC - ATO
6,BAJAJFINSV,0.096,0.792,0.112,-0.047,3631.76,Bajaj Finance Limited,XC - FNCL,XC - NFC
7,BAJFINANCE,0.0,0.776,0.224,0.382,1306.71,Bajaj Finserv Limited,XC - FNCL,XC - NFC
8,BHARTIARTL,0.055,0.945,0.0,-0.049,750.67,Bharat Petroleum Corporation Limited,XC - COSE,XC - TEL
9,BPCL,0.05,0.851,0.099,0.113,4327.54,Bharti Airtel Limited,XC - ENRG,XC - REF


Plotting

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import plotly
import plotly.express as px

In [None]:
fig = px.treemap(
    final_df, path=[px.Constant('Nifty 500'), 'Sector', 'Industry', 'Symbol'], values='MCap (Billion)', color='Sentiment Score',
    hover_data=['Company Name', 'Negative', 'Neutral', 'Positive', 'Sentiment Score'], color_continuous_scale=['#FF0000', "#000000", '#00FF00'], color_continuous_midpoint=0
    )
fig.data[0].customdata = final_df[['Company Name', 'Negative', 'Neutral', 'Positive', 'Sentiment Score']]
fig.data[0].texttemplate = "%{label}<br>%{customdata[4]}"
fig.update_traces(textposition="middle center")
fig.update_layout(margin = dict(t=30, l=10, r=10, b=10), font_size=20)
fig.show()

In [None]:
'''
req = Request(url= '{}/{}'.format(news_url, tickers[0]),headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
response = urlopen(req)
html = BeautifulSoup(response)
data = [] 
news_links = html.select('.newslink')
for link in news_links:
    title = link.find('span', class_='h6').text
    date = link.find('small').text
    data.append([title, date])

df = pd.DataFrame(data, columns=['Title', 'Date'])

print(df)
'''

References

https://blog.devgenius.io/best-way-to-speed-up-a-bulk-of-http-requests-in-python-4ec75badabed