In [141]:
import requests
from bs4 import BeautifulSoup
import json

# URL of the webpage
url = 'https://www.screener.in/company/SUNPHARMA/consolidated/'

# Send HTTP request and get the HTML response
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Save raw HTML in a variable
    html = response.text

    # Parse HTML content
    soup = BeautifulSoup(html, 'html.parser')

    # Rest of your code...
else:
    print(f"Failed to retrieve webpage. Status code: {response.status_code}")

In [142]:
import json
from bs4 import BeautifulSoup

def extract_company_data(html):
    """
    Extract company data from the given HTML.

    Args:
        html (str): The HTML content to parse.

    Returns:
        dict: A dictionary containing the extracted company data.
    """

    try:
        # Parse HTML content
        soup = BeautifulSoup(html, 'html.parser')

        # Find the company-info div
        company_info_div = soup.find('div', id='top')
        if company_info_div is None:
            print("Company info div not found.")
            return None

        # Extract company information
        company_name = company_info_div.find('h1', class_='margin-0')
        company_name = company_name.text.strip() if company_name else "Company name not found."

        # Extract stock price and percentage change
        stock_price_div = company_info_div.find('div', class_='font-size-18')
        if stock_price_div:
            stock_price = stock_price_div.find('span').text.strip().replace('\n', '').replace('₹', '').replace('¹', '').replace('�','').replace('â‚', '').strip() + ' INR'
            percentage_change = stock_price_div.find('span', class_='font-size-12').text.strip()
        else:
            stock_price = "Stock price not found."
            percentage_change = "Percentage change not found."

        # Extract market cap, current price, high/low, stock P/E, book value, dividend yield, ROCE, ROE, and face value
        ratios_div = company_info_div.find('ul', id='top-ratios')
        if ratios_div:
            ratios = {}
            for ratio in ratios_div.find_all('li'):
                name = ratio.find('span', class_='name').text.strip()
                value = ratio.find('span', class_='value').text.strip().replace('\n', '').replace('₹', '').replace('¹', '').replace('�','').replace('â‚', '').strip()
                # Remove extra whitespaces
                value = ' '.join(value.split())
                if 'Cr' in value:
                    value = value + ' INR'
                elif value.replace('.','',1).isdigit():
                    value = value + ' INR'
                ratios[name] = value
        else:
            ratios = {"Ratios not found." : "No data available."}

        # Extract about, key points, and company links
        company_data = {}
        company_profile_div = company_info_div.find('div', class_='company-profile')
        if company_profile_div:
            about = company_profile_div.find('div', class_='sub').text.strip().replace('\n', ' ') if company_profile_div.find('div', class_='sub') else "About not found."
            key_points = company_profile_div.find('div', class_='sub commentary').text.strip() if company_profile_div.find('div', class_='sub commentary') else "Key points not found."
            company_links = [link.get('href') for link in company_profile_div.find_all('a')]
            company_data["about_and_key_points"] = f"About: {about} Key_points: {key_points}"
            company_data["company_links"] = company_links
        else:
            company_data["about_and_key_points"] = "About not found. Key_points not found."
            company_data["company_links"] = []

        # Return extracted data
        return {
            'company_name': company_name,
            'stock_price': stock_price,
            'percentage_change': percentage_change,
            'ratios': ratios,
            **company_data
        }
    except Exception as e:
        print(f"An error occurred: {e}")
        return None
    

# Extract company data
company_data = extract_company_data(html)
# Print extracted data
if company_data is not None:
    for key, value in company_data.items():
        print(f"{key.capitalize()}: {value}")
# Store extracted data in JSON format
if company_data is not None:
    with open('json/company_data.json', 'w') as f:
        json.dump(company_data, f, indent=4)

    print("Data stored in company_data.json")
else:
    print("No data extracted.")

Company_name: Sun Pharmaceuticals Industries Ltd
Stock_price: 1,809 INR
Percentage_change: -2.66%
Ratios: {'Market Cap': '4,33,946 Cr. INR', 'Current Price': '1,809', 'High / Low': '1,960 / 1,140', 'Stock P/E': '38.7 INR', 'Book Value': '288 INR', 'Dividend Yield': '0.75 %', 'ROCE': '17.3 %', 'ROE': '16.7 %', 'Face Value': '1.00 INR'}
About_and_key_points: About: Sun Pharmaceutical Industries Ltd is engaged in the business of manufacturing, developing and marketing a wide range of branded and generic formulations and Active Pharma Ingredients (APIs). The company and its subsidiaries has various manufacturing facilities spread across the world with trading and other incidental and related activities extending to global market.[1] It is the largest pharmaceutical company in India.[2] Key_points: Key points not found.
Company_links: ['https://www.bseindia.com/bseplus/AnnualReport/524715/69342524715.pdf#page=193', 'https://sunpharma.com/wp-content/uploads/2021/07/SPIL-IR-Presentation-June-

In [143]:
import json
from bs4 import BeautifulSoup

def extract_quarters_data(html):
    """
    Extract quarters data from the given HTML.

    Args:
        html (str): The HTML content to parse.

    Returns:
        dict: A dictionary containing the extracted quarters data.
    """

    # Parse HTML content
    soup = BeautifulSoup(html, 'html.parser')

    # Find the quarters section
    quarters_section = soup.find('section', id='quarters')
    if quarters_section is None:
        print("Quarters section not found.")
        return None

    # Find the table
    table = quarters_section.find('table', class_='data-table')
    if table is None:
        print("Table not found.")
        return None

    # Extract table headers
    headers = [th.text.strip() for th in table.find_all('th')][1:]  # Skip the first column

    # Extract table data
    data = []
    for row in table.find_all('tr')[1:]:
        cols = row.find_all('td')
        cols = [col.text.strip().replace(' ', '').replace('+', '').replace('<span class="blue-icon">', '').replace('</span>', '').encode('ascii', 'ignore').decode('ascii') for col in cols][1:]  # Skip the first column
        data.append([col for col in cols if col]) # Get rid of empty values

    # Create a dictionary with the extracted data
    categories = ['Sales', 'Expenses', 'Operating Profit', 'OPM %', 'Other Income', 'Interest', 'Depreciation', 'Profit before tax', 'Tax %', 'Net Profit', 'EPS in Rs']
    quarters_data = {}
    for i, header in enumerate(headers):
        quarters_data[header] = {}
        for j, category in enumerate(categories):
            if j < len(data) and i < len(data[j]):
                quarters_data[header][category] = data[j][i]
            else:
                quarters_data[header][category] = 'null'

    return quarters_data


# Extract quarters data
quarters_data = extract_quarters_data(html)

# Store extracted data in JSON format
if quarters_data is not None:
    with open('json/quarters_data.json', 'w') as f:
        json.dump(quarters_data, f, indent=4)

    print("Data stored in quarters_data.json")
else:
    print("No data extracted.")

Data stored in quarters_data.json


In [144]:
import json
from bs4 import BeautifulSoup

def extract_data(html):

    # Parse HTML content
    soup = BeautifulSoup(html, 'html.parser')

    # Extract Profit & Loss data
    profit_loss_section = soup.find('section', id='profit-loss')
    if profit_loss_section is None:
        print("Profit & Loss section not found.")
        return None

    table = profit_loss_section.find('table', class_='data-table')
    if table is None:
        print("Table not found.")
        return None

    headers = [th.text.strip() for th in table.find_all('th')][1:]  
    data = []
    for row in table.find_all('tr')[1:]:
        cols = row.find_all('td')
        cols = [col.text.strip().replace(' ', '').replace('+', '').replace('<span class="blue-icon">', '').replace('</span>', '').encode('ascii', 'ignore').decode('ascii') for col in cols][1:]  
        data.append([col for col in cols if col]) 

    categories = ['Sales', 'Expenses', 'Operating Profit', 'OPM %', 'Other Income', 'Interest', 'Depreciation', 'Profit before tax', 'Tax %', 'Net Profit', 'EPS in Rs']
    profit_loss_data = {}
    for i, header in enumerate(headers):
        profit_loss_data[header] = {}
        for j, category in enumerate(categories):
            if j < len(data) and i < len(data[j]):
                profit_loss_data[header][category] = data[j][i]
            else:
                profit_loss_data[header][category] = 'null'

    # Extract Compounded Growth data
    growth_tables = soup.find_all('table', class_='ranges-table')
    if not growth_tables:
        print("Growth tables not found.")
        return None

    compounded_sales_growth = {}
    compounded_profit_growth = {}
    stock_price_cagr = {}
    return_on_equity = {}

    for i, table in enumerate(growth_tables):
        rows = table.find_all('tr')[1:]  
        for row in rows:
            cols = row.find_all('td')
            metric = cols[0].text.strip()
            value = cols[1].text.strip()
            if i == 0:
                compounded_sales_growth[metric] = value
            elif i == 1:
                compounded_profit_growth[metric] = value
            elif i == 2:
                stock_price_cagr[metric] = value
            elif i == 3:
                return_on_equity[metric] = value

    compounded_growth_data = {
        'Compounded Sales Growth': compounded_sales_growth,
        'Compounded Profit Growth': compounded_profit_growth,
        'Stock Price CAGR': stock_price_cagr,
        'Return on Equity': return_on_equity
    }

    # Combine data into a single dictionary
    data = {
        'Profit & Loss': profit_loss_data,
        'Compounded Growth': compounded_growth_data
    }

    return data


# Extract data
data = extract_data(html)

# Store extracted data in JSON format
if data is not None:
    with open('json/profit_loss_data.json', 'w') as f:
        json.dump(data, f, indent=4)

    print("Data stored in profit_loss_data.json")
else:
    print("No data extracted.")

Data stored in profit_loss_data.json


  def setup(self, parent=None, previous_element=None, next_element=None,


In [145]:
import json
from bs4 import BeautifulSoup

def extract_balance_sheet_data(html):

    # Parse HTML content
    soup = BeautifulSoup(html, 'html.parser')

    # Find the balance sheet section
    balance_sheet_section = soup.find('section', id='balance-sheet')
    if balance_sheet_section is None:
        print("Balance Sheet section not found.")
        return None

    # Find the table
    table = balance_sheet_section.find('table', class_='data-table')
    if table is None:
        print("Table not found.")
        return None

    # Extract table headers
    headers = [th.text.strip() for th in table.find_all('th')][1:]  

    # Extract table data
    data = []
    for row in table.find_all('tr')[1:]:
        cols = row.find_all('td')
        cols = [col.text.strip().replace(' ', '').replace('+', '').replace('<span class="blue-icon">', '').replace('</span>', '').replace('\u00a0+', '').encode('ascii', 'ignore').decode('ascii') for col in cols][1:]  
        data.append([col for col in cols if col]) 

    # Create a dictionary with the extracted data
    balance_sheet_data = {}
    categories = [row.find_all('td')[0].text.strip().replace('\u00a0+', '') for row in table.find_all('tr')[1:]]
    for i, header in enumerate(headers):
        balance_sheet_data[header] = {}
        for j, category in enumerate(categories):
            if j < len(data) and i < len(data[j]):
                balance_sheet_data[header][category] = data[j][i]
            else:
                balance_sheet_data[header][category] = 'null'

    return balance_sheet_data


# Extract balance sheet data
balance_sheet_data = extract_balance_sheet_data(html)


# Store extracted data in JSON format
if balance_sheet_data is not None:
    with open('json/balance_sheet_data.json', 'w') as f:
        json.dump(balance_sheet_data, f, indent=4)

    print("Data stored in balance_sheet_data.json")
else:
    print("No data extracted.")

Data stored in balance_sheet_data.json


In [146]:
import json
from bs4 import BeautifulSoup

def extract_cash_flows_data(html):

    # Parse HTML content
    soup = BeautifulSoup(html, 'html.parser')

    # Find the cash flows section
    cash_flows_section = soup.find('section', id='cash-flow')
    if cash_flows_section is None:
        print("Cash Flows section not found.")
        return None

    # Find the table
    table = cash_flows_section.find('table', class_='data-table')
    if table is None:
        print("Table not found.")
        return None

    # Extract table headers
    headers = [th.text.strip() for th in table.find_all('th')][1:]  

    # Extract table data
    data = []
    for row in table.find_all('tr')[1:]:
        cols = row.find_all('td')
        cols = [col.text.strip().replace(' ', '').replace('+', '').replace('<span class="blue-icon">', '').replace('</span>', '').replace('\u00a0+', '').encode('ascii', 'ignore').decode('ascii') for col in cols][1:]  
        data.append([col for col in cols if col]) 

    # Create a dictionary with the extracted data
    cash_flows_data = {}
    categories = [row.find_all('td')[0].text.strip().replace('\u00a0+', '') for row in table.find_all('tr')[1:]]
    for i, header in enumerate(headers):
        cash_flows_data[header] = {}
        for j, category in enumerate(categories):
            if j < len(data) and i < len(data[j]):
                cash_flows_data[header][category] = data[j][i]
            else:
                cash_flows_data[header][category] = 'null'

    return cash_flows_data


# Extract cash flows data
cash_flows_data = extract_cash_flows_data(html)


# Store extracted data in JSON format
if cash_flows_data is not None:
    with open('json/cash_flows_data.json', 'w') as f:
        json.dump(cash_flows_data, f, indent=4)

    print("Data stored in cash_flows_data.json")
else:
    print("No data extracted.")

Data stored in cash_flows_data.json


In [147]:
import json
from bs4 import BeautifulSoup

def extract_ratios_data(html):

    # Parse HTML content
    soup = BeautifulSoup(html, 'html.parser')

    # Find the ratios section
    ratios_section = soup.find('section', id='ratios')
    if ratios_section is None:
        print("Ratios section not found.")
        return None

    # Find the table
    table = ratios_section.find('table', class_='data-table')
    if table is None:
        print("Table not found.")
        return None

    # Extract table headers
    headers = [th.text.strip() for th in table.find_all('th')][1:]  

    # Extract table data
    data = []
    for row in table.find_all('tr')[1:]:
        cols = row.find_all('td')
        cols = [col.text.strip().replace(' ', '').replace('%', '').encode('ascii', 'ignore').decode('ascii') for col in cols][1:]  
        data.append([col for col in cols if col]) 

    # Create a dictionary with the extracted data
    ratios_data = {}
    categories = [row.find_all('td')[0].text.strip() for row in table.find_all('tr')[1:]]
    for i, header in enumerate(headers):
        ratios_data[header] = {}
        for j, category in enumerate(categories):
            if j < len(data) and i < len(data[j]):
                ratios_data[header][category] = data[j][i]
            else:
                ratios_data[header][category] = 'null'

    return ratios_data


# Extract ratios data
ratios_data = extract_ratios_data(html)


# Store extracted data in JSON format
if ratios_data is not None:
    with open('json/ratios_data.json', 'w') as f:
        json.dump(ratios_data, f, indent=4)

    print("Data stored in ratios_data.json")
else:
    print("No data extracted.")

Data stored in ratios_data.json


In [148]:
import json
from bs4 import BeautifulSoup

def extract_shareholding_data(html):

    # Parse HTML content
    soup = BeautifulSoup(html, 'html.parser')

    # Find the shareholding section
    shareholding_section = soup.find('section', id='shareholding')
    if shareholding_section is None:
        print("Shareholding section not found.")
        return None

    # Find the quarterly and yearly tables
    quarterly_table = shareholding_section.find('div', id='quarterly-shp').find('table')
    yearly_table = shareholding_section.find('div', id='yearly-shp').find('table')

    # Extract quarterly data
    quarterly_data = {}
    quarterly_headers = [th.text.strip() for th in quarterly_table.find_all('th')][1:]  
    quarterly_rows = quarterly_table.find_all('tr')[1:]
    for row in quarterly_rows:
        cols = row.find_all('td')
        category = cols[0].text.strip().replace('-', '').replace('+', '').replace('\u00a0', '')
        data = [col.text.strip().replace('-', '').replace('+', '').replace('\u00a0', '') for col in cols[1:]]  
        quarterly_data[category] = {}
        for i, header in enumerate(quarterly_headers):
            quarterly_data[category][header] = data[i]

    # Extract yearly data
    yearly_data = {}
    yearly_headers = [th.text.strip() for th in yearly_table.find_all('th')][1:]  
    yearly_rows = yearly_table.find_all('tr')[1:]
    for row in yearly_rows:
        cols = row.find_all('td')
        category = cols[0].text.strip().replace('-', '').replace('+', '').replace('\u00a0', '')
        data = [col.text.strip().replace('-', '').replace('+', '').replace('\u00a0', '') for col in cols[1:]]  
        yearly_data[category] = {}
        for i, header in enumerate(yearly_headers):
            yearly_data[category][header] = data[i]

    # Create a dictionary with the extracted data
    shareholding_data = {
        'Quarterly': quarterly_data,
        'Yearly': yearly_data
    }

    return shareholding_data


# Extract shareholding data
shareholding_data = extract_shareholding_data(html)


# Store extracted data in JSON format
if shareholding_data is not None:
    with open('json/shareholding_data.json', 'w') as f:
        json.dump(shareholding_data, f, indent=4)

    print("Data stored in shareholding_data.json")
else:
    print("No data extracted.")

Data stored in shareholding_data.json


In [152]:
import json
import os

# Define the path to the JSON files
json_folder = 'json'
companies_folder = 'companies'

# Define the list of JSON files
json_files = [
    'company_data.json',
    'quarters_data.json',
    'balance_sheet_data.json',
    'cash_flows_data.json',
    'profit_loss_data.json',
    'ratios_data.json',
    'shareholding_data.json'
]

# Load company name from company_data.json

# Load company data
with open(os.path.join(json_folder, 'company_data.json'), 'r') as f:
    company_data = json.load(f)
    company_name = company_data['company_name']

# Create a dictionary to store the combined data
combined_data = {}

# Iterate through the JSON files
json_files = [f for f in os.listdir(json_folder) if f.endswith('.json') and f != 'company_data.json']

for filename in json_files:
    with open(os.path.join(json_folder, filename), 'r') as f:
        data = json.load(f)
        combined_data[filename.replace('.json', '')] = data

# Add company name to combined data
combined_data['company_name'] = company_name

# Create a combined JSON file with company name
company_file_path = os.path.join(companies_folder, f'{company_name}.json')

with open(company_file_path, 'w') as f:
    json.dump(combined_data, f, indent=4)

print(f"Combined data stored in {company_file_path}")

Combined data stored in companies\Sun Pharmaceuticals Industries Ltd.json


In [150]:
# import csv
# import json
# import os

# # Define the path to the JSON folder
# json_folder = 'json'

# # Load company data from JSON file
# with open(os.path.join(json_folder, 'company_data.json')) as f:
#     company_data = json.load(f)

# # Load quarters data from JSON file
# with open(os.path.join(json_folder, 'quarters_data.json')) as f:
#     quarters_data = json.load(f)
    
# # Extract company data

# def print_company_data(company_data):
#     for key, value in company_data.items():
#         print(f"{key.capitalize()}: {value}")

# def print_quarters_data(quarters_data):
#     for key, value in quarters_data.items():
#         print(f"{key}:")
#         for k, v in value.items():
#             print(f"  {k}: {v}")
            
# def store_company_data_in_csv(company_data, filename):
#     with open(filename, 'w', newline='') as csvfile:
#         writer = csv.writer(csvfile)
        
#         # Store company data
#         writer.writerow(["Category", "Key", "Value"])  # header
#         for key, value in company_data.items():
#             if key not in ["ratios", "company_links", "about_and_key_points"]:
#                 writer.writerow(["Company", key, value])
        
#         # Store ratios
#         writer.writerow(["Category", "Key", "Value"])  # header
#         for ratio_key, ratio_value in company_data["ratios"].items():
#             writer.writerow(["Ratios", ratio_key, ratio_value])
        
#         # Store company links
#         writer.writerow(["Category", "Key", "Value"])  # header
#         for link_key, link_value in enumerate(company_data["company_links"]):
#             writer.writerow(["Company Links", f"Link {link_key+1}", link_value])
        
#         # Store about and key points
#         writer.writerow(["Category", "Key", "Value"])  # header
#         about_and_key_points = company_data["about_and_key_points"].split("Key_points: ")
#         about = about_and_key_points[0]
#         key_points = about_and_key_points[1]
#         writer.writerow(["About", "", about])
#         writer.writerow(["Key Points", "", key_points])



# def store_quarters_data_in_csv(quarters_data, filename):
#     with open(filename, 'w', newline='') as csvfile:
#         writer = csv.writer(csvfile)
        
#         # Write header
#         categories = list(quarters_data.keys())
#         quarters = list(quarters_data[categories[0]].keys())
#         writer.writerow(["Category"] + quarters)  # header
        
#         # Write data
#         for category in categories:
#             row = [category]
#             for quarter in quarters:
#                 if quarter in quarters_data[category]:
#                     row.append(quarters_data[category][quarter])
#                 else:
#                     row.append("null")  # Use "null" as a placeholder
#             writer.writerow(row)
            

# def store_data_in_csv(company_data, quarters_data, filename):
#     with open(filename, 'w', newline='') as csvfile:
#         writer = csv.writer(csvfile)
        
#         # Store company data
#         writer.writerow(["Category", "Key", "Value"])  # header
#         for key, value in company_data.items():
#             if key not in ["ratios", "company_links", "about_and_key_points"]:
#                 writer.writerow(["Company", key, value])
        
#         # Store ratios
#         for ratio_key, ratio_value in company_data["ratios"].items():
#             writer.writerow(["Ratios", ratio_key, ratio_value])
        
#         # Store company links
#         for link_key, link_value in enumerate(company_data["company_links"]):
#             writer.writerow(["Company Links", f"Link {link_key+1}", link_value])
        
#         # Store about and key points
#         about_and_key_points = company_data["about_and_key_points"].split("Key_points: ")
#         about = about_and_key_points[0]
#         key_points = about_and_key_points[1]
#         writer.writerow(["About", "", about])
#         writer.writerow(["Key Points", "", key_points])
        
#         # Store quarters data
#         writer.writerow(["Quarters Data"])  # header
#         categories = list(quarters_data.keys())
#         quarters = list(quarters_data[categories[0]].keys())
#         writer.writerow(["Category"] + quarters)  # header
#         for category in categories:
#             row = [category]
#             for quarter in quarters:
#                 if quarter in quarters_data[category]:
#                     row.append(quarters_data[category][quarter])
#                 else:
#                     row.append("null")  # Use "null" as a placeholder
#             writer.writerow(row)

# def load_json_data(file_name):
#     with open(os.path.join(json_folder, file_name)) as f:
#         return json.load(f)

# # Load company data
# company_data = load_json_data('company_data.json')

# # Load quarters data
# quarters_data = load_json_data('quarters_data.json')
# # Print the data
# if company_data is not None:
#     print_company_data(company_data)
# if quarters_data is not None:
#     print_quarters_data(quarters_data)

# # Store the data in a CSV file
# if company_data is not None or quarters_data is not None:
#     store_data_in_csv(company_data, quarters_data, 'company_data.csv')

In [151]:
# import json
# from bs4 import BeautifulSoup
# from requests_html import HTMLSession
# import asyncio
# from requests_html import AsyncHTMLSession


# async def simulate_button_clicks(url):
#     session = AsyncHTMLSession()
#     r = await session.get(url)
#     await r.html.arender()

#     # Find buttons and simulate clicks
#     buttons = r.html.find('.button-plain')
#     for button in buttons:
#         # Check if button has onclick attribute
#         if button.attrs.get('onclick'):
#             # Execute JavaScript onclick code
#             await r.html.arender(script=button.attrs.get('onclick'))
#         # Check if button has href attribute
#         elif button.attrs.get('href'):
#             # Send GET request to href URL
#             r = await session.get(button.attrs.get('href'))
#             await r.html.arender()

#     return r.html.html


# def extract_shareholding_data(html_shareholder):
#     # Parse HTML content
#     soup = BeautifulSoup(html_shareholder, 'html.parser')

#     # Find the shareholding section
#     shareholding_section = soup.find('section', id='shareholding')
#     if shareholding_section is None:
#         print("Shareholding section not found.")
#         return None

#     # Find the quarterly and yearly tables
#     quarterly_table = shareholding_section.find('div', id='quarterly-shp').find('table')
#     yearly_table = shareholding_section.find('div', id='yearly-shp').find('table')

#     # Extract quarterly data
#     quarterly_data = {}
#     quarterly_headers = [th.text.strip() for th in quarterly_table.find_all('th')][1:]  
#     quarterly_rows = quarterly_table.find_all('tr')

#     for row in quarterly_rows:
#         cols = row.find_all('td')
#         if len(cols) < 2:
#             continue

#         category = cols[0].text.strip().replace('-', '').replace('+', '').replace('\u00a0', '')
#         if category == '':
#             continue

#         data = [col.text.strip().replace('-', '').replace('+', '').replace('\u00a0', '') for col in cols[1:]]  
#         quarterly_data[category] = {}
#         for i, header in enumerate(quarterly_headers):
#             if i < len(data):
#                 quarterly_data[category][header] = data[i]

#     # Extract yearly data
#     yearly_data = {}
#     yearly_headers = [th.text.strip() for th in yearly_table.find_all('th')][1:]  
#     yearly_rows = yearly_table.find_all('tr')

#     for row in yearly_rows:
#         cols = row.find_all('td')
#         if len(cols) < 2:
#             continue

#         category = cols[0].text.strip().replace('-', '').replace('+', '').replace('\u00a0', '')
#         if category == '':
#             continue

#         data = [col.text.strip().replace('-', '').replace('+', '').replace('\u00a0', '') for col in cols[1:]]  
#         yearly_data[category] = {}
#         for i, header in enumerate(yearly_headers):
#             if i < len(data):
#                 yearly_data[category][header] = data[i]

#     # Create a dictionary with the extracted data
#     shareholding_data = {
#         'Quarterly': quarterly_data,
#         'Yearly': yearly_data
#     }

#     return shareholding_data


# # Simulate button clicks and extract shareholding data
# url = 'https://www.screener.in/company/SUNPHARMA/consolidated/'
# html_shareholder = asyncio.run(simulate_button_clicks(url))

# if html_shareholder:
#     shareholding_data = extract_shareholding_data(html_shareholder)
    
#     # Store extracted data in JSON format
#     if shareholding_data is not None:
#         with open('json/shareholding_data.json', 'w') as f:
#             json.dump(shareholding_data, f, indent=4)

#         print("Data stored in shareholding_data.json")
#     else:
#         print("No data extracted.")
# else:
#     print("Failed to retrieve HTML content.")