# Scrape Screener
## Jupyter notebook that scrapes stocks data from Screener.in 

* Sheet 1: Top Ratios 
* Sheet 2: Quarterly Results
* Sheet 3: Profit & Loss
* Sheet 4: Compounded Sales Growth
* Sheet 5: Compounded Profit Growth
* Sheet 6: Stock Price CAGR 
* Sheet 7: Return on Equity
* Sheet 8: Balance Sheet
* Sheet 9: Cash Flows
* Sheet 10: Ratios
* Sheet 11: Shareholdin
ng Pattern

### 1.1 Imports: 

In [1]:
import requests as rq
import pandas as pd 
import time
import datetime
from bs4 import BeautifulSoup
import re
import os


>  ⚠️ <span style="color:red"> **Update the CSV File below to read as needed:**</span>

In [2]:
# For My Watchlisted stocks
input_csv_file = "../Input/" + "stocks-watchlisted.csv"
# For PSU stocks
input_csv_file = "../Input/" + "stocks-psu.csv"


In [3]:
input_csv_file = "../Input/" + "stocks-watchlisted.csv"

### 1.2 Read Stock List: 

In [4]:
df = pd.read_csv(input_csv_file)
df.head(3)

Unnamed: 0,Screener Stock Symbol,Url Segment
0,526433,consolidated
1,532407,consolidated
2,544021,consolidated


### 1.3.1 Function that transforms Unordered list to dataframe: 

In [5]:
def csvfy(lines):
    import re

    # Line - 0
    if m := re.match(r"Market Cap ₹ ([0-9,.-]+) Cr.", lines[0]):
        line_0 = m.group(1).replace(',','')
    else: 
        line_0 = "NaN"

    # Line - 1
    if m := re.match(r"Current Price ₹ ([0-9,.-]+)", lines[1]):
        line_1 = m.group(1).replace(',','')
    else: 
        line_1 = "NaN"

    # Line - 2a
    if m := re.match(r"High \/ Low ₹ ([0-9,..-]+) \/ ([0-9,.]+)", lines[2]):
        line_2a = m.group(1).replace(',','')
    else: 
        line_2a = "NaN"

    # Line - 2b
    if m := re.match(r"High \/ Low ₹ ([0-9,.-]+) \/ ([0-9,.]+)", lines[2]):
        line_2b = m.group(2).replace(',','')
    else: 
        line_2b = "NaN"

    # Line - 3
    if m := re.match(r"Stock P\/E ([0-9,.-]+)", lines[3]):
        line_3 = m.group(1).replace(',','')
    else: 
        line_3 = "NaN"

    # Line - 4
    if m := re.match(r"Book Value ₹ ([0-9,.-]+)", lines[4]):
        line_4 = m.group(1).replace(',','')
    else: 
        line_4 = "NaN"

    # Line - 5
    if m := re.match(r"Dividend Yield ([0-9,.-]+) %", lines[5]):
        line_5 = m.group(1).replace(',','')
    else: 
        line_5 = "NaN"

    # Line - 6
    if m := re.match(r"ROCE ([0-9,.-]+) %", lines[6]):
        line_6 = m.group(1).replace(',','')
    else: 
        line_6 = "NaN"

    # Line - 7
    if m := re.match(r"ROE ([0-9,.-]+) %", lines[7]):
        line_7 = m.group(1).replace(',','')
    else: 
        line_7 = "NaN"

    # Line - 8
    if m := re.match(r"Face Value ₹ ([0-9,.-]+)", lines[8]):
        line_8 = m.group(1).replace(',','')
    else: 
        line_8 = "NaN"

    x = [{
    "Market Cap in Cores Rupees":line_0,
    "Current Price in Rupees":line_1,
    "High in Rupees":line_2a,
    "Low in Rupees":line_2b,
    "Stock PE":line_3,
    "Book Value in Rupees":line_4,
    "Dividend Yield %": line_5,
    "ROCE %":line_6,
    "ROE %":line_7,
    "Face Value in Rupees":line_8,
    }]

    df = pd.DataFrame(x)

    return df.copy()

### 1.3.2 Scrape and create reports: 

In [6]:
i = 0
writer = None
dir_path = "../Output/RawReports-01/"+datetime.datetime.now().strftime("%Y%m%d-%H%M%S")+"/"

if not os.path.exists(dir_path):
    os.makedirs(dir_path)

for index, row in df.iterrows():
    stock_symbol = row["Screener Stock Symbol"]
    url_segment = row["Url Segment"]
    if url_segment=="consolidated":
        url =  "https://www.screener.in/company/"+stock_symbol+"/consolidated/"
    else:
        url =  "https://www.screener.in/company/"+stock_symbol+"/"
    # print(url)
    print("Loading Page for ",stock_symbol)
    tables = pd.read_html(url)
    time.sleep(1) # Seconds
    
    # Read the Top Ration. Top Ratios section is a List Item, not a table hence need 
    # to parse manually usiang bsoup 
    response = rq.get(url)
    # Parse the HTML content
    soup = BeautifulSoup(response.content, "html.parser")
    # Find the unordered list
    ul = soup.find("ul#top-ratios")  # You might need to use a more specific selector like ul.my-list
    lines = []
    for ul in soup.findAll('ul', id='top-ratios'):
        for li in ul.findAll('li'):
            li_parsed_text = li.text
            li_parsed_text = re.sub('[\s ]+', ' ', li_parsed_text)
            li_parsed_text = li_parsed_text.strip()
            #print(li_parsed_text)
            lines.append(li_parsed_text)
   
    time.sleep(1) # Seconds
    
    # Get all Tables in separate dataframes
    df_top_ratios               = csvfy(lines) # Call function
    df_quaterly_results         = tables[0] # Quarterly Results
    df_profit_n_loss            = tables[1] # Profit & Loss
    df_compounded_sales_growth  = tables[2] # Compounded Sales Growth
    df_compounded_profit_growth = tables[3] # Compounded Profit Growth
    df_stock_price_cagr         = tables[4] # Stock Price CAGR
    df_return_on_equity         = tables[5] # Return on Equity
    df_balance_sheet            = tables[6] # Balance Sheet
    df_cash_flows               = tables[7] # Cash Flows
    df_ratios                   = tables[8] # Ratios
    df_shareholding_pattern     = tables[9] # Shareholding Pattern
    
    # Cleanup table: Quarterly Results
    df_quaterly_results.rename(columns={'Unnamed: 0':'Quarterly Results'}, inplace=True)
    df_quaterly_results.replace(u"\u00A0\+", "", regex=True,inplace=True) 
    
    # Cleanup table: Profit & Loss
    df_profit_n_loss.rename(columns={'Unnamed: 0':'Profit and Loss'}, inplace=True)
    df_profit_n_loss.replace(u"\u00A0\+", "", regex=True,inplace=True) 

    # Cleanup table: Compounded Sales Growth
    df_compounded_sales_growth.replace(":", "", regex=True,inplace=True) 
    
    # Cleanup table: Compounded Profit Growth
    df_compounded_profit_growth.replace(":", "", regex=True,inplace=True) 
    
    # Cleanup table: Stock Price CAGR
    df_stock_price_cagr.replace(":", "", regex=True,inplace=True) 
    
    # Cleanup table: Return on Equity
    df_return_on_equity.replace(":", "", regex=True,inplace=True) 
    
    # Cleanup table: Balance Sheet
    df_balance_sheet.rename(columns={'Unnamed: 0':'Balance Sheet'}, inplace=True)
    df_balance_sheet.replace(u"\u00A0\+", "", regex=True,inplace=True) 
    
    # Cleanup table: Cash Flows
    df_cash_flows.rename(columns={'Unnamed: 0':'Cash Flows'}, inplace=True)
    df_cash_flows.replace(u"\u00A0\+", "", regex=True,inplace=True) 
    
    # Cleanup table: Ratios
    df_ratios.rename(columns={'Unnamed: 0':'Ratios'}, inplace=True)
    
    # Cleanup table: Shareholding Pattern
    df_shareholding_pattern.rename(columns={'Unnamed: 0':'Shareholding Pattern'}, inplace=True)
    df_shareholding_pattern.replace(u"\u00A0\+", "", regex=True,inplace=True)

    df_top_ratios 
    
    sheet_names = ["Top Ratios","Quarterly Results", "Profit & Loss", "Compounded Sales Growth", "Compounded Profit Growth", 
                   "Stock Price CAGR", "Return on Equity", "Balance Sheet", "Cash Flows", "Ratios", "Shareholding Pattern"]
    dataframes  = [df_top_ratios, df_quaterly_results, df_profit_n_loss , df_compounded_sales_growth, df_compounded_profit_growth, 
                   df_stock_price_cagr, df_return_on_equity, df_balance_sheet, df_cash_flows, df_ratios, df_shareholding_pattern]
    
    writer = pd.ExcelWriter(dir_path + stock_symbol + ".xlsx" , engine='xlsxwriter')
    for i, frame in enumerate(dataframes):
        frame.to_excel(writer, sheet_name = sheet_names[i], index=False)
    writer.close()
    writer.handles = None

print("All Done!")



Loading Page for  526433
Loading Page for  532407
Loading Page for  544021
Loading Page for  AARTIDRUGS
Loading Page for  AARTIIND
Loading Page for  AARTIPHARM
Loading Page for  AARTISURF
Loading Page for  ABB
Loading Page for  ACC
Loading Page for  ADANIENT
Loading Page for  AFFLE
Loading Page for  ALKYLAMINE
Loading Page for  ANGELONE
Loading Page for  ARE&M
Loading Page for  ASAHIINDIA
Loading Page for  ASHOKLEY
Loading Page for  ASIANPAINT
Loading Page for  ASTRAL
Loading Page for  AXISBANK
Loading Page for  BAJAJ-AUTO
Loading Page for  BAJAJELEC
Loading Page for  BAJAJFINSV
Loading Page for  BAJFINANCE
Loading Page for  BALAMINES
Loading Page for  BATAINDIA
Loading Page for  BEL
Loading Page for  BERGEPAINT
Loading Page for  BHARTIARTL
Loading Page for  BOROLTD
Loading Page for  BRITANNIA
Loading Page for  BSE
Loading Page for  CAMS
Loading Page for  CAMPUS
Loading Page for  CDSL
Loading Page for  CENTUM
Loading Page for  CGPOWER
Loading Page for  COCHINSHIP
Loading Page for  COFO

In [2]:
if 'consolidated' not in 'https://www.screener.in/company/DMART/': 
   print('Standalone')

Standalone


In [8]:
datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

'20240831-095829'

In [7]:
import datetime