Imports

In [1]:
import pandas as pd
import numpy as np
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
from pprint import pprint
from nsepython import *
from datetime import datetime

Get Company Tickers from [NSE Website](https://www1.nseindia.com/content/indices/ind_nifty500list.csv)

In [2]:
ticker_url = 'https://www1.nseindia.com/content/indices/ind_nifty500list.csv'
tickers_file = pd.read_csv(ticker_url)
tickers_df = tickers_file[['Symbol', 'Company Name']]
tickers = tickers_df['Symbol']
tickers.head()

0      3MINDIA
1          ABB
2          ACC
3       AIAENG
4    APLAPOLLO
Name: Symbol, dtype: object

Scrape Article Headlines and Dates

In [3]:
news_url = 'https://ticker.finology.in/company/'

In [4]:
# list to store article data
data = []
companies_len = len(tickers)
for i in range(50):
    req = Request(url= '{}/{}'.format(news_url, tickers[i]),headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
    response = urlopen(req)
    html = BeautifulSoup(response) 
    news_links = html.select('.newslink')  
    for link in news_links:
        title = link.find('span', class_='h6').text
        #separate date and time from datetime object
        date_time_obj = datetime.strptime(link.find('small').text, '%d %b %Y, %I:%M%p')
        date = date_time_obj.date().strftime('%Y/%m/%d')
        time = date_time_obj.time().strftime('%H:%M')
        data.append([tickers[i], title, date, time])      

df = pd.DataFrame(data, columns=['Ticker', 'Headline', 'Date', 'Time'])

In [5]:
pprint(df)

         Ticker                                           Headline  \
0       3MINDIA        3m India informs about postal ballot notice   
1       3MINDIA      3M India informs about trading window closure   
2       3MINDIA  3M India’s promoter to exit PFAS manufacturing...   
3       3MINDIA   3M India informs about loss of share certificate   
4       3MINDIA  3M India informs about issuance of duplicate s...   
...         ...                                                ...   
2305  BAJAJELEC  Bajaj Electricals informs about compliance cer...   
2306  BAJAJELEC    Bajaj Electricals informs about conference meet   
2307  BAJAJELEC  Bajaj Electricals informs about issuance of du...   
2308  BAJAJELEC     Bajaj Electricals informs about investor calls   
2309  BAJAJELEC      Bajaj Electricals informs about investor call   

            Date   Time  
0     2022/12/24  12:59  
1     2022/12/23  15:38  
2     2022/12/22  12:27  
3     2022/12/19  17:00  
4     2022/11/21  16:09  
...

Sentiment Analysis

In [6]:
import nltk
nltk.downloader.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\singh\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [7]:
vader = SentimentIntensityAnalyzer()
scores = df['Headline'].apply(vader.polarity_scores).tolist()
scores_df = pd.DataFrame(scores)

In [8]:
new_df = pd.merge(left=df, right=scores_df, on=df.index.values).drop(['key_0'], axis=1)

In [9]:
new_df

Unnamed: 0,Ticker,Headline,Date,Time,neg,neu,pos,compound
0,3MINDIA,3m India informs about postal ballot notice,2022/12/24,12:59,0.000,1.000,0.000,0.0000
1,3MINDIA,3M India informs about trading window closure,2022/12/23,15:38,0.000,1.000,0.000,0.0000
2,3MINDIA,3M India’s promoter to exit PFAS manufacturing...,2022/12/22,12:27,0.000,1.000,0.000,0.0000
3,3MINDIA,3M India informs about loss of share certificate,2022/12/19,17:00,0.219,0.571,0.210,-0.0258
4,3MINDIA,3M India informs about issuance of duplicate s...,2022/11/21,16:09,0.000,0.784,0.216,0.2960
...,...,...,...,...,...,...,...,...
2305,BAJAJELEC,Bajaj Electricals informs about compliance cer...,2021/10/12,16:45,0.000,1.000,0.000,0.0000
2306,BAJAJELEC,Bajaj Electricals informs about conference meet,2021/09/02,10:07,0.000,1.000,0.000,0.0000
2307,BAJAJELEC,Bajaj Electricals informs about issuance of du...,2021/09/02,10:06,0.000,0.784,0.216,0.2960
2308,BAJAJELEC,Bajaj Electricals informs about investor calls,2021/08/25,13:53,0.000,1.000,0.000,0.0000


In [10]:
final_df = new_df.groupby('Ticker').mean()

  final_df = new_df.groupby('Ticker').mean()


In [11]:
final_df

Unnamed: 0_level_0,neg,neu,pos,compound
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3MINDIA,0.00876,0.93256,0.05868,0.071982
AARTIDRUGS,0.023298,0.926255,0.050447,0.04013
AAVAS,0.0,0.9734,0.0266,0.040926
ABB,0.01882,0.90626,0.07492,0.0884
ABBOTINDIA,0.008383,0.926872,0.064723,0.080911
ABCAPITAL,0.01876,0.92874,0.0525,0.060862
ABFRL,0.03424,0.90206,0.0637,0.054644
ABSLAMC,0.0,0.989737,0.010263,0.023179
ACC,0.01504,0.92032,0.06464,0.080172
ADANIENT,0.006,0.95478,0.03922,0.051824


Get Company Sector, industry data

In [12]:
sector = []
industry = []
mCap = []
for i in range(50):
    meta = nse_eq(tickers[i])
    print(tickers[i])
    sector.append(meta['industryInfo']['macro'])
    pprint('Sector: {}'.format(meta['industryInfo']['macro']))
    industry.append(meta['industryInfo']['sector'])
    pprint('Industry: {}'.format(meta['industryInfo']['sector']))
    ticker_mcap = round((meta['priceInfo']['previousClose'] * meta['securityInfo']['issuedSize'])/1000000000, 2)
    mCap.append(ticker_mcap)
    print('market cap is Rs {}'.format(ticker_mcap))
    print('\n')

final_df['sector'] = sector
final_df['industry'] = industry
final_df['mCap (Billion)'] = mCap

3MINDIA
'Sector: Diversified'
'Industry: Diversified'
market cap is Rs 252.3


ABB
'Sector: Industrials'
'Industry: Capital Goods'
market cap is Rs 614.52


ACC
'Sector: Commodities'
'Industry: Construction Materials'
market cap is Rs 444.53


AIAENG
'Sector: Industrials'
'Industry: Capital Goods'
market cap is Rs 238.16


APLAPOLLO
'Sector: Industrials'
'Industry: Capital Goods'
market cap is Rs 314.77


AUBANK
'Sector: Financial Services'
'Industry: Financial Services'
market cap is Rs 413.07


AARTIDRUGS
'Sector: Healthcare'
'Industry: Healthcare'
market cap is Rs 41.42


AAVAS
'Sector: Financial Services'
'Industry: Financial Services'
market cap is Rs 143.67


ABBOTINDIA
'Sector: Healthcare'
'Industry: Healthcare'
market cap is Rs 304.71


ADANIENT
'Sector: Commodities'
'Industry: Metals & Mining'
market cap is Rs 4242.12


ADANIGREEN
'Sector: Utilities'
'Industry: Power'
market cap is Rs 3034.45


ADANIPORTS
'Sector: Services'
'Industry: Services'
market cap is Rs 1716.55


ATGL


In [13]:
final_df = final_df.reset_index()

In [14]:
final_df

Unnamed: 0,Ticker,neg,neu,pos,compound,sector,industry,mCap (Billion)
0,3MINDIA,0.00876,0.93256,0.05868,0.071982,Diversified,Diversified,252.3
1,AARTIDRUGS,0.023298,0.926255,0.050447,0.04013,Industrials,Capital Goods,614.52
2,AAVAS,0.0,0.9734,0.0266,0.040926,Commodities,Construction Materials,444.53
3,ABB,0.01882,0.90626,0.07492,0.0884,Industrials,Capital Goods,238.16
4,ABBOTINDIA,0.008383,0.926872,0.064723,0.080911,Industrials,Capital Goods,314.77
5,ABCAPITAL,0.01876,0.92874,0.0525,0.060862,Financial Services,Financial Services,413.07
6,ABFRL,0.03424,0.90206,0.0637,0.054644,Healthcare,Healthcare,41.42
7,ABSLAMC,0.0,0.989737,0.010263,0.023179,Financial Services,Financial Services,143.67
8,ACC,0.01504,0.92032,0.06464,0.080172,Healthcare,Healthcare,304.71
9,ADANIENT,0.006,0.95478,0.03922,0.051824,Commodities,Metals & Mining,4242.12


In [15]:
final_df = pd.merge(final_df, tickers_df, left_on='Ticker', right_on='Symbol').drop('Symbol', axis=1)

In [16]:
final_df.columns = ['Symbol', 'Negative', 'Neutral', 'Positive', 'Sentiment Score', 'Sector', 'Industry', 'MCap (Billion)', 'Company Name']

In [17]:
final_df

Unnamed: 0,Symbol,Negative,Neutral,Positive,Sentiment Score,Sector,Industry,MCap (Billion),Company Name
0,3MINDIA,0.00876,0.93256,0.05868,0.071982,Diversified,Diversified,252.3,3M India Ltd.
1,AARTIDRUGS,0.023298,0.926255,0.050447,0.04013,Industrials,Capital Goods,614.52,Aarti Drugs Ltd.
2,AAVAS,0.0,0.9734,0.0266,0.040926,Commodities,Construction Materials,444.53,Aavas Financiers Ltd.
3,ABB,0.01882,0.90626,0.07492,0.0884,Industrials,Capital Goods,238.16,ABB India Ltd.
4,ABBOTINDIA,0.008383,0.926872,0.064723,0.080911,Industrials,Capital Goods,314.77,Abbott India Ltd.
5,ABCAPITAL,0.01876,0.92874,0.0525,0.060862,Financial Services,Financial Services,413.07,Aditya Birla Capital Ltd.
6,ABFRL,0.03424,0.90206,0.0637,0.054644,Healthcare,Healthcare,41.42,Aditya Birla Fashion and Retail Ltd.
7,ABSLAMC,0.0,0.989737,0.010263,0.023179,Financial Services,Financial Services,143.67,Aditya Birla Sun Life AMC Ltd.
8,ACC,0.01504,0.92032,0.06464,0.080172,Healthcare,Healthcare,304.71,ACC Ltd.
9,ADANIENT,0.006,0.95478,0.03922,0.051824,Commodities,Metals & Mining,4242.12,Adani Enterprises Ltd.


Plotting

In [18]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import plotly
import plotly.express as px

In [21]:
fig = px.treemap(
    final_df, path=[px.Constant('Nifty 500'), 'Sector', 'Industry', 'Symbol'], values='MCap (Billion)', color='Sentiment Score',
    hover_data=['Company Name', 'Negative', 'Neutral', 'Positive', 'Sentiment Score'], color_continuous_scale=['#FF0000', "#000000", '#00FF00'], color_continuous_midpoint=0
    )
fig.data[0].customdata = final_df[['Company Name', 'Negative', 'Neutral', 'Positive', 'Sentiment Score']]
fig.data[0].texttemplate = "%{label}<br>%{customdata[4]}"
fig.update_traces(textposition="middle center")
fig.update_layout(margin = dict(t=30, l=10, r=10, b=10), font_size=20)
fig.show()

In [None]:
'''
req = Request(url= '{}/{}'.format(news_url, tickers[0]),headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
response = urlopen(req)
html = BeautifulSoup(response)
data = [] 
news_links = html.select('.newslink')
for link in news_links:
    title = link.find('span', class_='h6').text
    date = link.find('small').text
    data.append([title, date])

df = pd.DataFrame(data, columns=['Title', 'Date'])

print(df)
'''