In [1]:
import warnings
warnings.filterwarnings('ignore')
import os
import requests
import xlsxwriter
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
from msedge.selenium_tools import Edge, EdgeOptions

options = EdgeOptions()
options.use_chromium = True
driver = Edge(options=options)
driver.get('https://www.screener.in')

input("Press Enter after you have searched for a company and the desired company page has loaded")

soup = BeautifulSoup(driver.page_source, 'html.parser')

'''---------------------------------------------------------------------------------
                            Storing Company Name
---------------------------------------------------------------------------------'''

name_section = driver.find_element_by_xpath('/html/body/div/div[1]/h1')
company = name_section.text

'''---------------------------------------------------------------------------------
                            Storing Company About Section
---------------------------------------------------------------------------------'''

about_section = driver.find_element_by_xpath('//*[@id="top"]/div[3]/div[1]/div[1]/div[2]/p')
about_text = about_section.text
about_text = about_text[:-3]

'''---------------------------------------------------------------------------------
                            Storing Company Profile Section
---------------------------------------------------------------------------------'''

data_section = soup.find('ul', {'id': 'top-ratios'})
data_items = data_section.find_all('li')
data_dict = {}
for item in data_items:
    key = item.find('span', {'class': 'name'}).text.strip()
    value = ' '.join(item.find('span', {'class': 'value'}).text.split())
    data_dict[key] = value
df_data_dict = pd.DataFrame(data_dict, index=[1])

df_data_dict = df_data_dict.T
df_data_dict = df_data_dict.rename(columns={1:'Value'})
# Create a new dataframe with the new row
new_row = pd.DataFrame({'Value': [about_text]}, index=['About'])
# Concatenate the dataframes
df_data_dict = pd.concat([new_row, df_data_dict])
df_data_dict = df_data_dict.reset_index()
df_data_dict.insert(0,'Company',company)

'''---------------------------------------------------------------------------------
                           Storing Company's Quarterly Results Section
---------------------------------------------------------------------------------'''

table = soup.find('section', {'id': 'quarters'}).find('table')
data = []
headers = [header.text.strip() for header in table.find_all('th')]
rows = table.find_all('tr')
for row in rows[1:]:
    cols = row.find_all('td')
    cols = [ele.text.strip() for ele in cols]
    data.append([ele for ele in cols if ele])
df_quarters = pd.DataFrame(data, columns=headers)
df_quarters = df_quarters[:-1]
df_quarters = df_quarters.set_index("")

df_quarters = df_quarters.apply(lambda x: x.str.replace(',', '').str.replace('%', '') if x.dtype == 'object' else x)

# Melt the dataframe
melted_df = df_quarters.reset_index()

melted_df = melted_df.rename(columns={'':'index'})

melted_df = melted_df.melt(id_vars='index', var_name='Column', value_name='Value')

# Rename the columns
df_quarters = melted_df.rename(columns={'index': 'Row'})

#Set the Company Name as First column
df_quarters.insert(0,'Company',company)

'''---------------------------------------------------------------------------------
                           Storing Company's Profit & Loss Section
---------------------------------------------------------------------------------'''

table = soup.find('section', {'id': 'profit-loss'}).find('table')
data = []
headers = [header.text.strip() for header in table.find_all('th')]
rows = table.find_all('tr')
for row in rows[1:]:
    cols = row.find_all('td')
    cols = [ele.text.strip() for ele in cols]
    data.append([ele for ele in cols if ele])
df_profit_loss = pd.DataFrame(data, columns=headers)
df_profit_loss = df_profit_loss.set_index("")

df_profit_loss = df_profit_loss.apply(lambda x: x.str.replace(',', '').str.replace('%', '') if x.dtype == 'object' else x)

# Melt the dataframe
melted_df = df_profit_loss.reset_index()

melted_df = melted_df.rename(columns={'':'index'})

melted_df = melted_df.melt(id_vars='index', var_name='Column', value_name='Value')

# Rename the columns
df_profit_loss = melted_df.rename(columns={'index': 'Row'})

#Set the Company Name as First column
df_profit_loss.insert(0,'Company',company)

'''---------------------------------------------------------------------------------
                           Storing Company's Balance Sheet Section
---------------------------------------------------------------------------------'''

table = soup.find('section', {'id': 'balance-sheet'}).find('table')
data = []
headers = [header.text.strip() for header in table.find_all('th')]
rows = table.find_all('tr')
for row in rows[1:]:
    cols = row.find_all('td')
    cols = [ele.text.strip() if ele.text.strip() else np.nan for ele in cols]
    data.append([ele for ele in cols if ele])
df_balance_sheet = pd.DataFrame(data, columns=headers)
df_balance_sheet = df_balance_sheet.set_index("")

df_balance_sheet = df_balance_sheet.apply(lambda x: x.str.replace(',', '').str.replace('%', '') if x.dtype == 'object' else x)

# Melt the dataframe
melted_df = df_balance_sheet.reset_index()

melted_df = melted_df.rename(columns={'':'index'})

melted_df = melted_df.melt(id_vars='index', var_name='Column', value_name='Value')

# Rename the columns
df_balance_sheet = melted_df.rename(columns={'index': 'Row'})

#Set the Company Name as First column
df_balance_sheet.insert(0,'Company',company)

'''---------------------------------------------------------------------------------
                           Storing Company's Cash Flow Section
---------------------------------------------------------------------------------'''

table = soup.find('section', {'id': 'cash-flow'}).find('table')
data = []
headers = [header.text.strip() for header in table.find_all('th')]
rows = table.find_all('tr')
for row in rows[1:]:
    cols = row.find_all('td')
    cols = [ele.text.strip() for ele in cols]
    data.append([ele for ele in cols if ele])
df_cash_flow = pd.DataFrame(data, columns=headers)
df_cash_flow = df_cash_flow.set_index("")

df_cash_flow = df_cash_flow.apply(lambda x: x.str.replace(',', '').str.replace('%', '') if x.dtype == 'object' else x)

# Melt the dataframe
melted_df = df_cash_flow.reset_index()

melted_df = melted_df.rename(columns={'':'index'})

melted_df = melted_df.melt(id_vars='index', var_name='Column', value_name='Value')

# Rename the columns
df_cash_flow = melted_df.rename(columns={'index': 'Row'})

#Set the Company Name as First column
df_cash_flow.insert(0,'Company',company)

'''---------------------------------------------------------------------------------
                           Storing Company's Ratios Section
---------------------------------------------------------------------------------'''

table = soup.find('section', {'id': 'ratios'}).find('table')
data = []
headers = [header.text.strip() for header in table.find_all('th')]
rows = table.find_all('tr')
for row in rows[1:]:
    cols = row.find_all('td')
    cols = [ele.text.strip() if ele.text.strip() else np.nan for ele in cols]
    data.append([ele for ele in cols if ele])
df_ratios = pd.DataFrame(data, columns=headers)
df_ratios = df_ratios.set_index("")

df_ratios = df_ratios.apply(lambda x: x.str.replace(',', '').str.replace('%', '') if x.dtype == 'object' else x)

# Melt the dataframe
melted_df = df_ratios.reset_index()

melted_df = melted_df.rename(columns={'':'index'})

melted_df = melted_df.melt(id_vars='index', var_name='Column', value_name='Value')

# Rename the columns
df_ratios = melted_df.rename(columns={'index': 'Row'})

#Set the Company Name as First column
df_ratios.insert(0,'Company',company)

'''---------------------------------------------------------------------------------
                           Storing Company's Shareholding Section
---------------------------------------------------------------------------------'''

table = soup.find('section', {'id': 'shareholding'}).find('table')
data = []
headers = [header.text.strip() for header in table.find_all('th')]
rows = table.find_all('tr')
for row in rows[1:]:
    cols = row.find_all('td')
    cols = [ele.text.strip() for ele in cols]
    data.append([ele for ele in cols if ele])
df_shareholding = pd.DataFrame(data, columns=headers)
df_shareholding = df_shareholding.set_index("")

# Melt the dataframe
melted_df = df_shareholding.reset_index()

melted_df = melted_df.rename(columns={'':'index'})

melted_df = melted_df.melt(id_vars='index', var_name='Column', value_name='Value')

# Rename the columns
df_shareholding = melted_df.rename(columns={'index': 'Row'})

#Set the Company Name as First column
df_shareholding.insert(0,'Company',company)

driver.quit()

'''---------------------------------------------------------------------------------
    Storing Top 20 Articles about the searched company from Money Control Website
---------------------------------------------------------------------------------'''

try:
    
    def get_top_moneycontrol_articles(query, num_articles=20):
        url = f"https://www.moneycontrol.com/news/tags/{query}.html"
        req = requests.get(url)
        soup = BeautifulSoup(req.content, "html.parser")
        articles = soup.find_all("li", class_="clearfix")
        top_articles = []
        for i, article in enumerate(articles[:num_articles], start=1):
            title = article.find("h2").text.strip()
            link = article.find("a")["href"]
            top_articles.append({"Article No": i, "Title": title, "Link": link})
        return pd.DataFrame(top_articles)

    # Obtain rop 20 articles of the company
    phrase = company.strip(' Ltd')
    top_articles = get_top_moneycontrol_articles(phrase, num_articles=20)
    top_articles = top_articles.set_index("Article No")

    top_articles = top_articles.reset_index()

    #Set the Company Name as First column
    top_articles.insert(0,'Company',company)

    top_articles.loc[len(top_articles)] = [i for i in top_articles.columns]

    # Select the last row
    last_row = top_articles.iloc[[-1]]

    # Concatenate the last row with the rest of the dataframe
    top_articles = pd.concat([last_row, top_articles.iloc[:-1]])

except Exception as e:
    print("Articles not available")

'''---------------------------------------------------------------------------------
                            Converting into excel report
---------------------------------------------------------------------------------'''

dfs = [
    ('COMPANY PROFILE', df_data_dict if 'df_data_dict' in locals() else None),
    ('QUARTERLY RESULTS', df_quarters if 'df_quarters' in locals() else None),
    ('PROFIT & LOSS', df_profit_loss if 'df_profit_loss' in locals() else None),
    ('BALANCE SHEET', df_balance_sheet if 'df_balance_sheet' in locals() else None),
    ('CASH FLOWS', df_cash_flow if 'df_cash_flow' in locals() else None),
    ('RATIOS', df_ratios if 'df_ratios' in locals() else None),
    ('SHAREHOLDING PATTERN', df_shareholding if 'df_shareholding' in locals() else None),
    ('TOP ARTICLES', top_articles if 'top_articles' in locals() else None)
]

directory = "Screener Individual Data"

if not os.path.exists(directory):
    os.makedirs(directory)

for title, df in dfs:
    if df is not None:
        file_name = "Screener Individual Data/" + title + ".xlsx"
        workbook = xlsxwriter.Workbook(file_name)
        worksheet = workbook.add_worksheet('Sheet1')

        for row_num, row_data in enumerate(df.values):
            cleaned_row_data = [x if pd.notnull(x) else "" for x in row_data]
            worksheet.write_row(row_num, 0, cleaned_row_data)

        workbook.close()

Press Enter after you have searched for a company and the desired company page has loaded
