In [49]:
import requests
from io import StringIO
import pandas as pd
from datetime import datetime

import asyncio
from asyncio import AbstractEventLoop

import aiohttp
import bs4
from colorama import Fore

# Ewald's fix for asyncio not working in Jupyter
# (Ref: https://github.com/erdewit/nest_asyncio)
import nest_asyncio
nest_asyncio.apply()

In [50]:
# Scrapes pages from NSE
url1 = 'https://www.nseindia.com/content/fo/fo_mktlots.csv'
url2 = 'https://www.nseindia.com/live_market/dynaContent/live_watch/option_chain/optionKeys.jsp?symbol='

In [51]:
def get_lots():
    '''Get the lots with expiry dates
    Arg: None
    Returns: lots dataframe with expiry as YYYYMM''' 

    url = 'https://www.nseindia.com/content/fo/fo_mktlots.csv'
    req = requests.get(url)
    data = StringIO(req.text)
    lots_df = pd.read_csv(data)

    lots_df = lots_df[list(lots_df)[1:5]]

    # strip whitespace from columns and make it lower case
    lots_df.columns = lots_df.columns.str.strip().str.lower() 

    # strip all string contents of whitespaces
    lots_df = lots_df.applymap(lambda x: x.strip() if type(x) is str else x)

    # remove 'Symbol' row
    lots_df = lots_df[lots_df.symbol != 'Symbol']

    # convert the columns to expiry date string with year and month
    lots_df = lots_df.rename(columns={d: datetime.strftime(datetime.strptime(d, '%b-%y').date(), '%Y%m') 
                            for d in list(lots_df)[1:]})

    return lots_df.reset_index(drop=True)

In [58]:
# get the symbols
nse_symbols = sorted(list(get_lots().symbol.unique()))

In [80]:
headers = { 
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36', 
'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
'Accept-Language' : 'en-US,en;q=0.5',
'Accept-Encoding' : 'gzip', 
'DNT' : '1', # Do Not Track Request Header 
'Connection' : 'close'
}
def main():
    # Create loop
    loop = asyncio.get_event_loop()
    loop.run_until_complete(get_title_range(loop))
    print("Done.")

async def get_html(symbol: str) -> str:
    print(f"Getting HTML for symbol {symbol}", flush=True)
    
    # Make this async with aiohttp's ClientSession
    url = url2+symbol
    
    async with aiohttp.ClientSession(headers=headers) as session:
        async with session.get(url) as resp:
            resp.raise_for_status()
            await asyncio.sleep(1)
            return await resp.text()

def get_title(html: str, symbol: str) -> str:
    print(Fore.CYAN + f"Getting TITLE for symbol {symbol}", flush=True)
    soup = bs4.BeautifulSoup(html, 'html.parser')
    header = soup.select_one('h1')
    if not header:
        return "MISSING"
    
    return header.text.strip()
        
async def get_title_range(loop: AbstractEventLoop):
    tasks = []
    for n in nse_symbols:
        tasks.append((loop.create_task(get_html(n)), n))
        
    for task, n in tasks:
        html = await task
        title = get_title(html, n)
        print(Fore.WHITE + f"Title found: {title}", flush=True)
        
if __name__ == '__main__':
    main()
    

Getting HTML for symbol ACC
Getting HTML for symbol ADANIENT
Getting HTML for symbol ADANIPORTS
Getting HTML for symbol ADANIPOWER
Getting HTML for symbol AJANTPHARM
Getting HTML for symbol ALBK
Getting HTML for symbol AMARAJABAT
Getting HTML for symbol AMBUJACEM
Getting HTML for symbol APOLLOHOSP
Getting HTML for symbol APOLLOTYRE
Getting HTML for symbol ARVIND
Getting HTML for symbol ASHOKLEY
Getting HTML for symbol ASIANPAINT
Getting HTML for symbol AUROPHARMA
Getting HTML for symbol AXISBANK
Getting HTML for symbol BAJAJ-AUTO
Getting HTML for symbol BAJAJFINSV
Getting HTML for symbol BAJFINANCE
Getting HTML for symbol BALKRISIND
Getting HTML for symbol BANKBARODA
Getting HTML for symbol BANKINDIA
Getting HTML for symbol BANKNIFTY
Getting HTML for symbol BATAINDIA
Getting HTML for symbol BEL
Getting HTML for symbol BEML
Getting HTML for symbol BERGEPAINT
Getting HTML for symbol BHARATFIN
Getting HTML for symbol BHARATFORG
Getting HTML for symbol BHARTIARTL
Getting HTML for symbol BH

ClientResponseError: 403, message='Forbidden'

In [55]:
import aiohttp
import asyncio

async def fetch(session, url):
    async with session.get(url) as response:
        return await response.text()

async def main():
    async with aiohttp.ClientSession() as session:
        html = await fetch(session, 'http://python.org')
        print(html)

loop = asyncio.get_event_loop()
loop.run_until_complete(main())

<!doctype html>
<!--[if lt IE 7]>   <html class="no-js ie6 lt-ie7 lt-ie8 lt-ie9">   <![endif]-->
<!--[if IE 7]>      <html class="no-js ie7 lt-ie8 lt-ie9">          <![endif]-->
<!--[if IE 8]>      <html class="no-js ie8 lt-ie9">                 <![endif]-->
<!--[if gt IE 8]><!--><html class="no-js" lang="en" dir="ltr">  <!--<![endif]-->

<head>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">

    <link rel="prefetch" href="//ajax.googleapis.com/ajax/libs/jquery/1.8.2/jquery.min.js">

    <meta name="application-name" content="Python.org">
    <meta name="msapplication-tooltip" content="The official home of the Python Programming Language">
    <meta name="apple-mobile-web-app-title" content="Python.org">
    <meta name="apple-mobile-web-app-capable" content="yes">
    <meta name="apple-mobile-web-app-status-bar-style" content="black">

    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <meta name="HandheldFriendly" conte

In [94]:
nse_url_base = "http://www.nseindia.com/live_market/dynaContent/live_watch/"
option_chain_url = nse_url_base + "option_chain/optionKeys.jsp?&instrument=OPTSTK&symbol="
headers = { 
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36', 
'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
'Accept-Language' : 'en-US,en;q=0.5',
'Accept-Encoding' : 'gzip', 
'DNT' : '1', # Do Not Track Request Header 
'Connection' : 'close'
}

symbol = 'ACC'
u = option_chain_url + symbol

chainhtml = requests.get(u, headers=headers).content
chain = pd.read_html(chainhtml)[1][:-1]  # read the first table and drop the total
chain.columns=chain.columns.droplevel(0) # drop the first row of the header
chain = chain.drop('Chart', 1)           # drop the charts

# rename the columns
chain = chain.rename(columns={'OI': 'pOI',
 'Chng in OI':'pOI_Chng',
 'Volume': 'pVolume',
 'IV': 'pIV',
 'LTP': 'pLTP',
 'Net Chng': 'pNetChng',
 'BidQty': 'pBidQty',
 'BidPrice': 'pBid',
 'AskPrice': 'pAsk',
 'AskQty': 'pAskQty',
 'Strike Price': 'undPrice',
 'BidQty': 'cBidQty',
 'BidPrice': 'cBid',
 'AskPrice': 'cAsk',
 'AskQty': 'cAskQty',
 'Net Chng': 'cNetChng',
 'LTP': 'cLTP',
 'IV': 'cIV',
 'Volume': 'cVolume',
 'Chng in OI': 'cOI_Chng',
 'OI': 'cOI'})

chain = chain.iloc[2:] # remove the first two rows

# convert all to numeric
chain = chain.apply(pd.to_numeric, errors = 'coerce')

In [97]:
chainhtml

b'\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\r\n<html xmlns="http://www.w3.org/1999/xhtml">\r\n<head>\r\n<!-- Global site tag (gtag.js) - Google Analytics -->\r\n<script async src="https://www.googletagmanager.com/gtag/js?id=UA-108453261-1"></script>\r\n<script>\r\n  window.dataLayer = window.dataLayer || [];\r\n  function gtag(){dataLayer.push(arguments);}\r\n  gtag(\'js\', new Date());\r\n\r\n  gtag(\'config\', \'UA-108453261-1\');\r\n</script>\r\n<meta http-equiv="X-UA-Compatible" content="IE=8" />\r\n\r\n\r\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\r\n<title>NSE - National Stock Exchange of India Ltd.</title>\r\n<script type="text/javascript">\r\nvar page=["livewth_optch","liveMarket"];\r\n</script>\r\n\r\n<script type="text/javascript" src="/common/js/jquery-1.4.4.min.js"></script>\r\n<script type="text/javascript" src="/common/js/

In [None]:
from IPython.display import SVG
# help(SVG)
SVG(filename=r"C:\Users\kashir\Downloads\icon.svg")


<'svg>
<'ellipse style="fill:#00ff00;stroke:#000000;" cx="50" cy="50" rx="48" ry="48">
<'ellipse>
<'/svg>