In [4]:
!pip install -U --quiet requests beautifulsoup4 selenium ollama

In [5]:
#https://www.screener.in/company/RELIANCE/consolidated/

import pandas as pd            #to work with the data frames
import requests                #to request webpages for scrapping 
from bs4 import BeautifulSoup  #to parse html
from selenium import webdriver #to scrappages that require javascript
from selenium.webdriver.common.by import By #to select html elements by their attributes
from selenium.webdriver.chrome.service import Service   #to use the chrome driver
from selenium.webdriver.chrome.options import Options #to set options for the chrome driver
from webdriver_manager.chrome import ChromeDriverManager #to manage the chrome driver

## Getting user input and make screener link for the same

In [6]:
#symbol  = 'RELIANCE'
symbol = input("Enter symbol of the company : ").strip().upper()

def get_screener_link(sym):
    sym_url = f"https://www.screener.in/company/{sym}/consolidated/"
    return sym_url
    
url = get_screener_link(symbol)

## Web scrapping for the screener link

In [7]:
# Set up Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Ensure GUI is off
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

# Set up the WebDriver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)

# Navigate to the URL
driver.get(url)

page_source = driver.page_source

# Close the WebDriver
driver.quit()




#### Extracting Company website

In [8]:
from bs4 import BeautifulSoup

def extract_company_website(html_content):
    # Parse the HTML content
    soup = BeautifulSoup(html_content, 'html.parser')

    # Find the first <a> tag within the div class 'company-links'
    company_links_div = soup.find('div', class_='company-links')
    if company_links_div:
        website_link = company_links_div.find('a', href=True)  # Look for the first <a> tag with href
        if website_link:
            return website_link['href']
    return None

# Call the function
company_website = extract_company_website(page_source)
print(f"Company Website: {company_website}")


Company Website: https://www.shreecement.com/


### Getting annual reports links

In [9]:

# Parse the HTML
soup = BeautifulSoup(page_source, 'html.parser')

# Find the first `ul` with class `list-links` and extract the links
ul = soup.find_all('ul', class_='list-links')[1]  # Adjust index to select the desired `ul`

# Extract all <a> tags within the selected `ul`
links = ul.find_all('a', href=True)

# Print the links and their text
for link in links:
    href = link['href']
    text = link.get_text(strip=True)
    print(f"Link: {href}, Text: {text}")


Link: https://www.bseindia.com/stockinfo/AnnPdfOpen.aspx?Pname=fe3c32f3-1652-436c-8c50-27fd0e77e5c8.pdf, Text: Financial Year 2024from bse
Link: https://www.bseindia.com/stockinfo/AnnPdfOpen.aspx?Pname=\b2acbd8a-8f1f-403c-b7d4-c63a7b4bf443.pdf, Text: Financial Year 2023from bse
Link: https://www.bseindia.com/bseplus/AnnualReport/500387/73445500387.pdf, Text: Financial Year 2022from bse
Link: https://www.bseindia.com/bseplus/AnnualReport/500387/68964500387.pdf, Text: Financial Year 2021from bse
Link: https://www.bseindia.com/bseplus/AnnualReport/500387/5003870320.pdf, Text: Financial Year 2020from bse
Link: https://www.bseindia.com/bseplus/AnnualReport/500387/5003870319.pdf, Text: Financial Year 2019from bse
Link: https://www.bseindia.com/bseplus/AnnualReport/500387/5003870318.pdf, Text: Financial Year 2018from bse
Link: https://www.bseindia.com/bseplus/AnnualReport/500387/5003870317.pdf, Text: Financial Year 2017from bse
Link: https://www.bseindia.com/bseplus/AnnualReport/500387/500387

### Fetching from company website

In [10]:
import requests
from bs4 import BeautifulSoup

# Fetch the sitemap XML
sitemap_url =  f"{company_website}/sitemap.xml"
response = requests.get(sitemap_url)
sitemap_xml = response.content

# Parse the XML
soup = BeautifulSoup(sitemap_xml, 'xml')

# Extract all <loc> tags which contain the URLs
urls = [loc.get_text() for loc in soup.find_all('loc')]

investor_keywords = ['corporate-governance']
filtered_links = [url for url in urls if any(keyword in url for keyword in investor_keywords)]

# Print the URLs
for url in filtered_links:
    print(url)

https://www.shreecement.com/investors/corporate-governance


In [11]:
def extract_governance_links(url):
    """
    Extracts all governance-related links from the specified URL, including nested sections.

    Args:
    - url (str): The URL of the webpage to scrape.

    Returns:
    - list: A list of dictionaries with 'text' and 'link' keys for governance-related links.
    """
    try:
        # Fetch the webpage content
        response = requests.get(url)
        response.raise_for_status()  # Raise error for bad status codes
        
        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find all 'a' tags with 'href' attribute
        links = soup.find_all('a', href=True)
        
        # Keywords related to governance
        governance_keywords = [
            "governance", "policy", "compliance", "audit", "code of conduct",
            "board of directors", "committee", "ethics", "regulations"
        ]
        
        # Filter links based on governance keywords
        governance_links = []
        for link in links:
            link_text = link.get_text(strip=True).lower()
            link_href = link['href']
            
            # Check if the link text contains any governance-related keyword
            if any(keyword in link_text for keyword in governance_keywords):
                governance_links.append({'text': link_text, 'link': link_href})
        
        return governance_links

    except requests.exceptions.RequestException as e:
        print(f"Error fetching the webpage: {e}")
        return []

def fetch_governance_links_from_multiple_pages(filtered_links):
    """
    Fetches governance-related links from a list of URLs (filtered_links).

    Args:
    - filtered_links (list): A list of URLs to scrape.

    Returns:
    - list: A list of dictionaries with 'text' and 'link' keys for governance-related links.
    """
    all_governance_links = []
    
    for url in filtered_links:
        print(f"Fetching governance links from: {url}")
        governance_links = extract_governance_links(url)
        all_governance_links.extend(governance_links)
    
    return all_governance_links



governance_links_from_all_pages = fetch_governance_links_from_multiple_pages(filtered_links)

for item in governance_links_from_all_pages:
    print(f"Text: {item['text']}, Link: {item['link']}")


Fetching governance links from: https://www.shreecement.com/investors/corporate-governance
Text: board of directors, Link: https://www.shreecement.com/about-us#board-directors
Text: corporate governance, Link: https://www.shreecement.com/investors/corporate-governance
Text: disclosures under regulation 46 and 62 of sebi (lodr) regulations, Link: https://www.shreecement.com/investors/disclosure-regulation
Text: corporate governance report june 2024, Link: https://www.shreecement.com/uploads/cleanupload/Corporate Governance Report June 2024.pdf
Text: corporate governance report september 2024, Link: https://www.shreecement.com/uploads/cleanupload/Corporate Governance Report September 2024.pdf
Text: corporate governance report december 2024, Link: https://www.shreecement.com/uploads/cleanupload/Corporate_Governance_Report_December_2024.pdf
Text: corporate governance report march 2024, Link: https://www.shreecement.com/uploads/cleanupload/Corporate Governance Report March 2024.pdf
Text: cg

In [12]:
import json

# Convert the list of dictionaries to a JSON string
governance_links_json = json.dumps(governance_links_from_all_pages, indent=2)

# Print the JSON string
print(governance_links_json)

[
  {
    "text": "board of directors",
    "link": "https://www.shreecement.com/about-us#board-directors"
  },
  {
    "text": "corporate governance",
    "link": "https://www.shreecement.com/investors/corporate-governance"
  },
  {
    "text": "disclosures under regulation 46 and 62 of sebi (lodr) regulations",
    "link": "https://www.shreecement.com/investors/disclosure-regulation"
  },
  {
    "text": "corporate governance report june 2024",
    "link": "https://www.shreecement.com/uploads/cleanupload/Corporate Governance Report June 2024.pdf"
  },
  {
    "text": "corporate governance report september 2024",
    "link": "https://www.shreecement.com/uploads/cleanupload/Corporate Governance Report September 2024.pdf"
  },
  {
    "text": "corporate governance report december 2024",
    "link": "https://www.shreecement.com/uploads/cleanupload/Corporate_Governance_Report_December_2024.pdf"
  },
  {
    "text": "corporate governance report march 2024",
    "link": "https://www.shreece

## Running ChatPDF

CHATPDF_API = 'sec_c3FrcK3DSUfwnXo9M3OtqtS467NO3KOe'

https://www.shreecement.com/uploads/cleanupload/Corporate_Governance_Report_December_2024.pdf

https://www.bseindia.com/xml-data/corpfiling/AttachHis/c83496de-aca9-48d8-b4b5-642ed791ee9e.pdf

In [45]:
import requests

headers = {
  'x-api-key': 'sec_c3FrcK3DSUfwnXo9M3OtqtS467NO3KOe',
  'Content-Type': 'application/json'
}
data = {'url': 'https://www.shreecement.com/uploads/cleanupload/Corporate_Governance_Report_December_2024.pdf'}

response = requests.post(
    'https://api.chatpdf.com/v1/sources/add-url', headers=headers, json=data)

if response.status_code == 200:
    print('Source ID:', response.json()['sourceId'])
else:
    print('Status:', response.status_code)
    print('Error:', response.text)

Source ID: src_F9AU1fH7LT0nQesvfsE18


In [48]:

import requests

headers = {
    'x-api-key': 'sec_c3FrcK3DSUfwnXo9M3OtqtS467NO3KOe',
    "Content-Type": "application/json",
}


data = {
    'sourceId': "src_F9AU1fH7LT0nQesvfsE18",
    'messages': [
        {
            'role': "user",
            'content': "List all the people on the board along with their date of appintment, date of reappointment, gender and their category. Give answer in JSON format.",
        }
    ]
}

response = requests.post(
    'https://api.chatpdf.com/v1/chats/message', headers=headers, json=data)

if response.status_code == 200:
    print('Result:', response.json()['content'])
else:
    print('Status:', response.status_code)
    print('Error:', response.text)

Result: Here is the information in JSON format:

```json
[
    {
        "name": "Hari Mohan Bangur",
        "date_of_appointment": "31-07-1992",
        "date_of_reappointment": null,
        "gender": "Male",
        "category": "Executive/Chairperson"
    },
    {
        "name": "Prashant Bangur",
        "date_of_appointment": "23-08-2012",
        "date_of_reappointment": null,
        "gender": "Male",
        "category": "Executive"
    },
    {
        "name": "Neeraj Akhoury",
        "date_of_appointment": "14-10-2022",
        "date_of_reappointment": null,
        "gender": "Male",
        "category": "Executive/MD"
    },
    {
        "name": "Uma Ghurka",
        "date_of_appointment": "11-11-2019",
        "date_of_reappointment": "11-11-2024",
        "gender": "Female",
        "category": "Non-Executive - Independent Director"
    },
    {
        "name": "Sanjiv Krishnaji Shelgikar",
        "date_of_appointment": "05-08-2015",
        "date_of_reappointment": "05

In [48]:

import requests

headers = {
    'x-api-key': 'sec_c3FrcK3DSUfwnXo9M3OtqtS467NO3KOe',
    "Content-Type": "application/json",
}


data = {
    "referenceSources": True,
    'sourceId': "src_IPMHJb1iEnGuhZUNHyiCQ",
    'messages': [
        {
            'role': "user",
            'content': "Has it mentioned that there are different class of shares? who owns them and how is it structured?",
        }
    ]
}

response = requests.post(
    'https://api.chatpdf.com/v1/chats/message', headers=headers, json=data)

if response.status_code == 200:
    print('Result:', response.json()['content'])
else:
    print('Status:', response.status_code)
    print('Error:', response.text)

Result: The provided pages do not explicitly mention different classes of shares. However, they do outline the shareholding pattern as of March 31, 2024, which includes categories such as Promoters & Promoters Group and Public Shareholdings. 

The structure is as follows:

1. **Promoters & Promoters Group**: 
   - Individual (Indian Nationals): 1,91,93,320 shares (12.29%)
   - Individual (Foreign National): 1,00,40,486 shares (6.43%)
   - Body Corporate: 2,39,58,225 shares (15.34%)
   - Promoters Group: 3,25,530 shares (0.21%)
   - **Total**: 5,35,17,561 shares (34.27%)

2. **Public Shareholdings**:
   - Mutual Funds: 4,73,28,947 shares (30.31%)
   - Alternate Investment Funds: 25,60,332 shares (1.64%)
   - Foreign Portfolio Investor (Corporate): 2,00,63,290 shares (12.85%)
   - Financial Institutions: 6,32,622 shares (0.41%)
   - Individuals: 2,67,33,101 shares (17.12%)
   - Others: 53,34,248 shares (3.4%)
   - **Total**: 10,26,52,540 shares (65.73%)

3. **Non-Promoter - Non Public Sh

In [None]:

import requests

headers = {
    'x-api-key': 'sec_c3FrcK3DSUfwnXo9M3OtqtS467NO3KOe',
    "Content-Type": "application/json",
}


data = {
    "referenceSources": True,
    'sourceId': "src_IPMHJb1iEnGuhZUNHyiCQ",
    'messages': [
        {
            'role': "user",
            'content': "Can you list all related party transactions and whom is it given? Also add a column if the recepient is part of board of directors or key management personeel or any subcidiary.",
        }
    ]
}

response = requests.post(
    'https://api.chatpdf.com/v1/chats/message', headers=headers, json=data)

if response.status_code == 200:
    print('Result:', response.json()['content'])
else:
    print('Status:', response.status_code)
    print('Error:', response.text)

Result: The source does not contain a comprehensive list of all related party transactions along with the specific recipients and their affiliations with the board of directors, key management personnel, or subsidiaries. However, it does mention that during the fiscal 2024, the Company had material contracts or arrangements with TD Power Systems Europe GmbH, which is a wholly owned subsidiary of the Company. 

Here are the details available:

| Related Party                     | Nature of Transaction | Recipient Type                     |
|-----------------------------------|-----------------------|------------------------------------|
| TD Power Systems Europe GmbH      | Sale and purchase transactions | Wholly owned subsidiary            |

For further details on specific transactions, including amounts and terms, please refer to Annexure 2 of the Directors Report in Form AOC 2 and note No.43 to the Annual Accounts, as mentioned in the document [P27]. 

If you need more specific inf