ChatGPT

In [1]:
import requests
from bs4 import BeautifulSoup
import concurrent.futures
import time
import pandas as pd
import numpy as np
import datetime
from nsepython import *

In [20]:
import nltk
nltk.downloader.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\singh\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
header={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}

In [3]:
universe = pd.read_csv('./datasets/NIFTY_50.csv')
tickers_df = universe[['Symbol', 'Company Name']]
tickers_list = tickers_df['Symbol']

In [4]:
tickers_df

Unnamed: 0,Symbol,Company Name
0,ADANIENT,Adani Enterprises Ltd.
1,ADANIPORTS,Adani Ports and Special Economic Zone Ltd.
2,APOLLOHOSP,Apollo Hospitals Enterprise Ltd.
3,ASIANPAINT,Asian Paints Ltd.
4,AXISBANK,Axis Bank Ltd.
5,BAJAJ-AUTO,Bajaj Auto Ltd.
6,BAJFINANCE,Bajaj Finance Ltd.
7,BAJAJFINSV,Bajaj Finserv Ltd.
8,BPCL,Bharat Petroleum Corporation Ltd.
9,BHARTIARTL,Bharti Airtel Ltd.


In [5]:
news_url = 'https://ticker.finology.in/company/'

In [6]:
# list to store article data
article_data = []

#list to store ticker metadata
ticker_meta = []

# list to store tickers for which data is unavailable
unavailable_tickers = []

In [7]:
# length of companies
companies_len = len(tickers_list)
tickers_length = companies_len

In [8]:
def get_url_content(ticker):
    url= '{}/{}'.format(news_url, ticker)
    response = requests.get(url, headers=header)
    soup = BeautifulSoup(response.content, 'lxml')
    meta = nse_eq(ticker)
    # scrape page contents using bs4 library
    return ticker,soup, meta

In [9]:
def ticker_article_fetch(i, ticker, soup):
    print('Fetching Article')
    news_links = soup.select('#newsarticles > a')
    if len(news_links) == 0:
        print('No news found for {}'.format(ticker))
        return True
    ticker_articles_counter = 0
    for link in news_links:
        art_title = link.find('span', class_='h6').text
        #separate date and time from datetime object
        date_time_obj = datetime.datetime.strptime(link.find('small').text, '%d %b %Y, %I:%M%p')
        #if (date_time_obj <= days_limit):
        #continue
        art_date = date_time_obj.date().strftime('%Y/%m/%d')
        art_time = date_time_obj.time().strftime('%H:%M')
        article_data.append([ticker, art_title, art_date, art_time])
        ticker_articles_counter += 1
    print('No of articles: {}'.format(ticker_articles_counter))

In [10]:
def ticker_meta_fetch(i, ticker, meta):
    print('Fetching meta')
    try:
        sector = meta['industryInfo']['macro']
    except KeyError:
        print('{} sector info is not available'.format(ticker))
        sector = np.nan
        industry = np.nan
        mCap = np.nan
        companyName = np.nan
        ticker_meta.append([ticker, sector, industry, mCap, companyName])
        return True
    try:
        industry = meta['industryInfo']['industry']
    except KeyError:
        print('{} industry info is not available'.format(ticker))
        industry = np.nan
        mCap = np.nan
        companyName = np.nan
        ticker_meta.append([ticker, sector, industry, mCap, companyName])
        return True
    try:
        mCap = round((meta['priceInfo']['previousClose'] * meta['securityInfo']['issuedSize'])/1000000000, 2)
    except KeyError:
        print('{} mCap data is not available'.format(ticker))
        mCap = np.nan
        companyName = np.nan
        ticker_meta.append([ticker, sector, industry, mCap, companyName])
        return True
    try:
        companyName = meta['info']['companyName']
    except KeyError:
        print('{} company Name is not available'.format(ticker))
        companyName = np.nan
        ticker_meta.append([ticker, sector, industry, mCap, companyName])
        return True
    ticker_meta.append([ticker, sector, industry, mCap, companyName])

In [11]:
start_time = time.time()
# send multiple concurrent requests using concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
    results = [executor.submit(get_url_content, ticker) for ticker in tickers_list]
    for i,future in enumerate(concurrent.futures.as_completed(results)):
        ticker, soup, meta = future.result()
        print(i, ticker)
        ticker_article_response = ticker_article_fetch(i, ticker, soup)
        if ticker_article_response:
            unavailable_tickers.append(ticker)
            print('skipping meta check for {}'.format(ticker))
            continue
        ticker_meta_response = ticker_meta_fetch(i,ticker,meta)
        if ticker_meta_response:
            unavailable_tickers.append(ticker)
end_time = time.time()

0 ADANIENT
Fetching Article
No of articles: 50
Fetching meta
1 ADANIPORTS
Fetching Article
No of articles: 50
Fetching meta
2 ASIANPAINT
Fetching Article
No of articles: 50
Fetching meta
3 BAJFINANCE
Fetching Article
No of articles: 50
Fetching meta
4 AXISBANK
Fetching Article
No of articles: 50
Fetching meta
5 BAJAJ-AUTO
Fetching Article
No of articles: 50
Fetching meta
6 APOLLOHOSP
Fetching Article
No of articles: 50
Fetching meta
7 BHARTIARTL
Fetching Article
No of articles: 50
Fetching meta
8 BRITANNIA
Fetching Article
No of articles: 50
Fetching meta
9 CIPLA
Fetching Article
No of articles: 50
Fetching meta
10 COALINDIA
Fetching Article
No of articles: 50
Fetching meta
11 DRREDDY
Fetching Article
No of articles: 50
Fetching meta
12 BAJAJFINSV
Fetching Article
No of articles: 50
Fetching meta
13 EICHERMOT
Fetching Article
No of articles: 50
Fetching meta
14 BPCL
Fetching Article
No of articles: 50
Fetching meta
15 DIVISLAB
Fetching Article
No of articles: 46
Fetching meta
16 HCLTEC

In [12]:
print(unavailable_tickers)

['M&M']


In [13]:
# calculate and print the time taken to send requests
time_taken = end_time - start_time
print("Time taken to send requests: {:.2f} seconds".format(time_taken))

Time taken to send requests: 53.07 seconds


In [14]:
articles_df = pd.DataFrame(article_data, columns=['Ticker', 'Headline', 'Date', 'Time'])

In [15]:
articles_df

Unnamed: 0,Ticker,Headline,Date,Time
0,ADANIENT,Adani Enterprises informs about media release,2023/01/28,15:08
1,ADANIENT,"Adani Enterprises raises Rs 5,985 crore from a...",2023/01/27,11:28
2,ADANIENT,Adani Enterprises submits media statement,2023/01/25,16:33
3,ADANIENT,Adani group planning to spin off businesses of...,2023/01/23,11:11
4,ADANIENT,Adani Enterprises’ arm incorporates JV Company,2023/01/21,17:16
...,...,...,...,...
2429,INDUSINDBK,Indusind Bank - Quaterly Results,2022/07/20,16:13
2430,INDUSINDBK,Indusind Bank - Quaterly Results,2022/07/20,16:13
2431,INDUSINDBK,Indusind Bank - Quaterly Results,2022/07/20,16:13
2432,INDUSINDBK,IndusInd Bank gets nod to raise funds upto Rs ...,2022/07/19,09:44


In [19]:
len(articles_df['Ticker'].unique())

49

In [21]:
# Sentiment Analysis
print('Performing Sentiment Analysis')
vader = SentimentIntensityAnalyzer()
# Perform sentiment Analysis on the Headline column of all_news_df 
# It returns a dictionary, transform it into a list
art_scores_df = pd.DataFrame(articles_df['Headline'].apply(vader.polarity_scores).to_list())

Performing Sentiment Analysis


In [22]:
art_scores_df

Unnamed: 0,neg,neu,pos,compound
0,0.0,1.0,0.0,0.0
1,0.0,1.0,0.0,0.0
2,0.0,1.0,0.0,0.0
3,0.0,1.0,0.0,0.0
4,0.0,1.0,0.0,0.0
...,...,...,...,...
2429,0.0,1.0,0.0,0.0
2430,0.0,1.0,0.0,0.0
2431,0.0,1.0,0.0,0.0
2432,0.0,1.0,0.0,0.0


In [16]:
ticker_meta_df = pd.DataFrame(ticker_meta, columns=['Ticker', 'Sector', 'Industry', 'Market Cap', 'Company Name'])

In [23]:
art_scores_df = pd.merge(articles_df, art_scores_df, left_index=True, right_index=True)

In [24]:
art_scores_df

Unnamed: 0,Ticker,Headline,Date,Time,neg,neu,pos,compound
0,ADANIENT,Adani Enterprises informs about media release,2023/01/28,15:08,0.0,1.0,0.0,0.0
1,ADANIENT,"Adani Enterprises raises Rs 5,985 crore from a...",2023/01/27,11:28,0.0,1.0,0.0,0.0
2,ADANIENT,Adani Enterprises submits media statement,2023/01/25,16:33,0.0,1.0,0.0,0.0
3,ADANIENT,Adani group planning to spin off businesses of...,2023/01/23,11:11,0.0,1.0,0.0,0.0
4,ADANIENT,Adani Enterprises’ arm incorporates JV Company,2023/01/21,17:16,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...
2429,INDUSINDBK,Indusind Bank - Quaterly Results,2022/07/20,16:13,0.0,1.0,0.0,0.0
2430,INDUSINDBK,Indusind Bank - Quaterly Results,2022/07/20,16:13,0.0,1.0,0.0,0.0
2431,INDUSINDBK,Indusind Bank - Quaterly Results,2022/07/20,16:13,0.0,1.0,0.0,0.0
2432,INDUSINDBK,IndusInd Bank gets nod to raise funds upto Rs ...,2022/07/19,09:44,0.0,1.0,0.0,0.0
