[Reference](https://dswharshit.medium.com/building-a-structured-financial-newsfeed-using-spacy-and-streamlit-d19736fdd70c)

# Step 1: Extracting the trending stocks news data

In [1]:
import requests
resp = requests.get("https://economictimes.indiatimes.com/markets/stocks/rssfeeds/2146842.cms")

In [2]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(resp.content, features='xml')
soup.findAll('title')

[<title>Stocks-Markets-Economic Times</title>,
 <title>Economic Times</title>,
 <title>Wall Street opens lower on slowing China growth, inflation worries</title>,
 <title>NCLT gives nod to creditors, shareholders of Reliance Retail to hold meetings for proposed Future Group deal</title>,
 <title>Market Watch: Should you be cautious now as market turns frothy?</title>,
 <title>Biggest gainers &amp; losers of the day: Paras Defence soars, Antony Waste tanks 11%</title>,
 <title>Market Movers: Has the DMart gravy turn finally come to a halt?</title>,
 <title>UltraTech Cement Q2 Results: Net profit rises 7.6% YoY to Rs 1,300 cr, misses Street estimates</title>,
 <title>Day Trading Guide: 2 stock recommendations for Tuesday</title>,
 <title>Tech View: Nifty50 in overbought zone; consolidation looks imminent</title>,
 <title>F&amp;O: Regular higher lows for Nifty50 hold promise, but VIX spike a worry</title>,
 <title>Stock market update: Nifty Bank index  advances  0.87%</title>,
 <title>Sto

# Step 2: Extracting entities from the headlines

In [4]:
!python -m spacy download en_core_web_sm

Collecting en_core_web_sm==2.2.5
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.5/en_core_web_sm-2.2.5.tar.gz (12.0 MB)
[K     |████████████████████████████████| 12.0 MB 12.5 MB/s 
[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_sm')


In [6]:
import spacy

In [7]:
nlp = spacy.load("en_core_web_sm")

In [14]:
headlines = soup.findAll('title')

In [16]:
processed_hline = nlp(headlines[4].text)
print(headlines[4])

for token in processed_hline:
    print(token)

<title>Market Watch: Should you be cautious now as market turns frothy?</title>
Market
Watch
:
Should
you
be
cautious
now
as
market
turns
frothy
?


In [15]:
# make sure you extract the text out of <title> tags
processed_hline = nlp(headlines[4].text)
print(headlines[4])

for token in processed_hline:
    print(token.text, "-----", token.pos_)

<title>Market Watch: Should you be cautious now as market turns frothy?</title>
Market ----- PROPN
Watch ----- PROPN
: ----- PUNCT
Should ----- VERB
you ----- PRON
be ----- AUX
cautious ----- ADJ
now ----- ADV
as ----- SCONJ
market ----- NOUN
turns ----- VERB
frothy ----- ADJ
? ----- PUNCT


In [17]:
# make sure you extract the text out of <title> tags
processed_hline = nlp(headlines[4].text)
print(headlines[4])

for token in processed_hline:
    print(token.text, "-----", token.dep_)

<title>Market Watch: Should you be cautious now as market turns frothy?</title>
Market ----- compound
Watch ----- ROOT
: ----- punct
Should ----- aux
you ----- nsubj
be ----- ROOT
cautious ----- acomp
now ----- advmod
as ----- mark
market ----- nsubj
turns ----- advcl
frothy ----- acomp
? ----- punct


## Entity extraction

In [18]:
spacy.displacy.render(processed_hline, style='dep',jupyter=True, options={'distance': 120})

In [19]:
companies = []
for title in headlines:
    doc = nlp(title.text)
    for token in doc.ents:
        if token.label_ == 'ORG':
            companies.append(token.text)
        else:
            pass

# Step 3 — Named Entity Linking

In [21]:
!pip install yfinance

Collecting yfinance
  Downloading yfinance-0.1.63.tar.gz (26 kB)
Collecting lxml>=4.5.1
  Downloading lxml-4.6.3-cp37-cp37m-manylinux2014_x86_64.whl (6.3 MB)
[K     |████████████████████████████████| 6.3 MB 2.7 MB/s 
Building wheels for collected packages: yfinance
  Building wheel for yfinance (setup.py) ... [?25l[?25hdone
  Created wheel for yfinance: filename=yfinance-0.1.63-py2.py3-none-any.whl size=23918 sha256=9242faefd0be4645f5d8c039a26a9b9414c4500982146cf9187e95104dbda677
  Stored in directory: /root/.cache/pip/wheels/fe/87/8b/7ec24486e001d3926537f5f7801f57a74d181be25b11157983
Successfully built yfinance
Installing collected packages: lxml, yfinance
  Attempting uninstall: lxml
    Found existing installation: lxml 4.2.6
    Uninstalling lxml-4.2.6:
      Successfully uninstalled lxml-4.2.6
Successfully installed lxml-4.6.3 yfinance-0.1.63


In [23]:
import yfinance as yf

## collect various market attributes of a stock
stock_dict = {
    'Org': [],
    'Symbol': [],
    'currentPrice': [],
    'dayHigh': [],
    'dayLow': [],
    'forwardPE': [],
    'dividendYield': []
}

## for each company look it up and gather all market data on it
for company in companies:
    try:
        if stocks_df['Company Name'].str.contains(company).sum():
            symbol = stocks_df[stocks_df['Company Name'].\
                                str.contains(company)]['Symbol'].values[0]
            org_name = stocks_df[stocks_df['Company Name'].\
                                str.contains(company)]['Company Name'].values[0]
            stock_dict['Org'].append(org_name)
            stock_dict['Symbol'].append(symbol)
            stock_info = yf.Ticker(symbol+".NS").info
            stock_dict['currentPrice'].append(stock_info['currentPrice'])
            stock_dict['dayHigh'].append(stock_info['dayHigh'])
            stock_dict['dayLow'].append(stock_info['dayLow'])
            stock_dict['forwardPE'].append(stock_info['forwardPE'])
            stock_dict['dividendYield'].append(stock_info['dividendYield'])
        else:
            pass
    except:
        pass

import pandas as pd
## create a dataframe to display the buzzing stocks
pd.DataFrame(stock_dict)

Unnamed: 0,Org,Symbol,currentPrice,dayHigh,dayLow,forwardPE,dividendYield


# Step 4 — Building a web app using Streamlit

In [24]:
pip install Streamlit

Collecting Streamlit
  Downloading streamlit-1.0.0-py2.py3-none-any.whl (8.3 MB)
[K     |████████████████████████████████| 8.3 MB 2.6 MB/s 
Collecting pydeck>=0.1.dev5
  Downloading pydeck-0.7.0-py2.py3-none-any.whl (4.3 MB)
[K     |████████████████████████████████| 4.3 MB 36.3 MB/s 
Collecting watchdog
  Downloading watchdog-2.1.6-py3-none-manylinux2014_x86_64.whl (76 kB)
[K     |████████████████████████████████| 76 kB 5.0 MB/s 
Collecting gitpython!=3.1.19
  Downloading GitPython-3.1.24-py3-none-any.whl (180 kB)
[K     |████████████████████████████████| 180 kB 73.0 MB/s 
[?25hCollecting validators
  Downloading validators-0.18.2-py3-none-any.whl (19 kB)
Collecting blinker
  Downloading blinker-1.4.tar.gz (111 kB)
[K     |████████████████████████████████| 111 kB 69.3 MB/s 
Collecting base58
  Downloading base58-2.1.0-py3-none-any.whl (5.6 kB)
Collecting gitdb<5,>=4.0.1
  Downloading gitdb-4.0.7-py3-none-any.whl (63 kB)
[K     |████████████████████████████████| 63 kB 1.6 MB/s 
C

In [26]:
import pandas as pd
import requests
import spacy
import streamlit as st
from bs4 import BeautifulSoup

import yfinance as yf

In [27]:
st.title('Buzzing Stocks :zap:')