### Importing necessary libraries

In [3]:
import time
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.select import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup

### Data Collection

In [6]:
class IPO_Data:
    def __init__(self,driver,year,wait_time):
        self.driver = driver
        self.year = year
        self.wait_time_in_sec = wait_time
        self.wait = WebDriverWait(self.driver,self.wait_time_in_sec)
        self.sme_year_next_click = {2019:4, 2020:4, 2021:5, 2022:7, 2023:9, 2024:9, 2025:6}
        self.mainboard_year_next_click = {2021:5, 2022:4, 2023:5, 2024:6}
        self.year_next_click = {2019:5, 2020:4, 2021:7, 2022:8, 2023:9, 2024:9,2025:7}
        

    def wait_to_load(self):
        page_title = self.driver.title
        try:
            self.wait.until(
                lambda d:d.execute_script("return document.readyState") == 'complete'
            )
    
        except:
                print(f"The given \'{page_title}\' page is not fuly loaded in given time.")


    def get_sme_ipo_data(self):
        sme_data = []
        for year in range(2019,self.year):
            self.driver.get(f'https://trendlyne.com/ipo/screener/year-{year}/')
            
            self.wait_to_load()


            self.wait.until(
                EC.presence_of_element_located((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table'))
            )

            sme_button = self.wait.until(
                EC.element_to_be_clickable((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[1]/div/div[2]/div/div[2]/a'))
            )
            sme_button.click()
            self.wait_to_load()

            select_feild = self.wait.until(
                EC.element_to_be_clickable((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[2]/div/div/select'))
            )

            select_button = Select(select_feild)
            select_button.select_by_visible_text('All')

            last_count = 0
            stable_count = 0
            
            for _ in range(20):  # check up to 20 times (i.e., max 20 seconds)
                rows = self.driver.find_elements(By.XPATH, '//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table/tbody/tr')
                current_count = len(rows)
                
                if current_count == last_count:
                    stable_count += 1
                    if stable_count >= 10:  # count stable 6 times in a row
                        break
                else:
                    stable_count = 0
                    last_count = current_count
                
                time.sleep(1)  # wait 1 second before checking again
            
            company_count = len(rows)
            print(f"{year}: {company_count} companies loaded.")
            
            
                
            
            self.wait_to_load()
            company_counter = 0

            rows_loaded = False
            for _ in range(5):
                try:
                    self.wait.until(
                        EC.presence_of_element_located((By.TAG_NAME,'table'))
                    )
                    rows_loaded = True
                    page_html = driver.page_source
                    break
                except:
                    time.sleep(1)

            
            soup = BeautifulSoup(page_html,'html.parser')

            for _ in range(5):
                try:
                    rows = soup.find('div',class_='recent_detail').find('table').find('tbody').find_all('tr')
                    break

                except:
                    time.sleep(1)

            for row in rows:
                values = row.find_all('td')

                data = {
                    'Company':values[0].text,
                    'Issue type':'SME',
                    'LTP':values[1].text,
                    'Market Cap(in CR)':values[2].text,
                    'Listing date':values[3].text,
                    'Issue size':values[4].text,
                    'Issue price':values[5].text,
                    'QIB subscription':values[6].text,
                    'HNI subscription':values[7].text,
                    'Retail subscription':values[8].text,
                    'Total subscription':values[9].text,
                    'Listing open':values[10].text,
                    'Listing close':values[11].text,
                    'Listing gain':values[12].text,
                    'Current gain':values[13].text
                    }
                sme_data.append(data)
                company_counter += 1
            print(f"Year {year} data is collected. Total {company_counter} no. of companies data are collected.") 
        
        return sme_data


    def get_mainboard_ipo_data(self):
        mainboard_data = []
        for year in range(2019,self.year):
            self.driver.get(f'https://trendlyne.com/ipo/screener/year-{year}/')
            
            self.wait_to_load()


            self.wait.until(
                EC.presence_of_element_located((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table'))
            )

            main_board_button = self.wait.until(
                EC.element_to_be_clickable((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[1]/div/div[2]/div/div[3]/a'))
            )
            main_board_button.click()

            self.wait_to_load()

            select_feild = self.wait.until(
                EC.element_to_be_clickable((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[2]/div/div/select'))
            )

            select_button = Select(select_feild)
            select_button.select_by_visible_text('All')

            last_count = 0
            stable_count = 0
            
            for _ in range(20):  # check up to 20 times (i.e., max 20 seconds)
                rows = self.driver.find_elements(By.XPATH, '//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table/tbody/tr')
                current_count = len(rows)
                
                if current_count == last_count:
                    stable_count += 1
                    if stable_count >= 6:  # count stable 6 times in a row
                        break
                else:
                    stable_count = 0
                    last_count = current_count
                
                time.sleep(1)  # wait 1 second before checking again
            
            company_count = len(rows)
            print(f"{year}: {company_count} companies loaded.")
            
            
                
            # company_count = len(rows)
            self.wait_to_load()
            company_counter = 0

            rows_loaded = False
            for _ in range(5):
                try:
                    self.wait.until(
                        EC.presence_of_element_located((By.TAG_NAME,'table'))
                    )
                    rows_loaded = True
                    page_html = driver.page_source
                    break
                except:
                    time.sleep(1)

            
            soup = BeautifulSoup(page_html,'html.parser')

            for _ in range(5):
                try:
                    rows = soup.find('div',class_='recent_detail').find('table').find('tbody').find_all('tr')
                    break

                except:
                    time.sleep(1)

            for row in rows:
                values = row.find_all('td')

                data = {
                    'Company':values[0].text,
                    'Issue type':'MainBoard',
                    'LTP':values[1].text,
                    'Market Cap(in CR)':values[2].text,
                    'Listing date':values[3].text,
                    'Issue size':values[4].text,
                    'Issue price':values[5].text,
                    'QIB subscription':values[6].text,
                    'HNI subscription':values[7].text,
                    'Retail subscription':values[8].text,
                    'Total subscription':values[9].text,
                    'Listing open':values[10].text,
                    'Listing close':values[11].text,
                    'Listing gain':values[12].text,
                    'Current gain':values[13].text
                    }
                mainboard_data.append(data)
                company_counter += 1
            print(f"Year {year} data is collected. Total {company_counter} no. of companies data are collected.") 
        
        return mainboard_data


    def get_annual_financial_data(self):
        financial_data = []
        universal_header = ["CAGR 3 Yrs","CAGR 5 Yrs","TTM","Mar '25","Mar '24","Mar '23","Mar '22","Mar '21","Mar '20","Mar '19","Mar '18","Mar '17","Mar '16","Mar '15"]
        for year in range(2019,self.year):
            self.driver.get(f'https://trendlyne.com/ipo/screener/year-{year}/')
            
            self.wait_to_load()


            self.wait.until(
                EC.presence_of_element_located((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table'))
            )

            select_feild = self.wait.until(
                EC.element_to_be_clickable((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[2]/div/div/select'))
            )

            select_button = Select(select_feild)
            select_button.select_by_visible_text('All')

            last_count = 0
            stable_count = 0
            
            for _ in range(20):  # check up to 20 times (i.e., max 20 seconds)
                rows = self.driver.find_elements(By.XPATH, '//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table/tbody/tr')
                current_count = len(rows)
                
                if current_count == last_count:
                    stable_count += 1
                    if stable_count >= 6:  # count stable 2 times in a row
                        break
                else:
                    stable_count = 0
                    last_count = current_count
                
                time.sleep(1)  # wait 1 second before checking again
            
            company_count = len(rows)
            print(f"{year}: {company_count} companies loaded.")
            
            
                
            # company_count = len(rows)
            self.wait_to_load()
            company_counter = 0
            
            for row in rows:
                time.sleep(2)
                values = row.find_element(By.TAG_NAME,'td')
                
                try:
                    self.wait.until(
                    EC.presence_of_element_located((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table/tbody/tr[1]/td[1]/a'))
                    )
                    self.wait_to_load()
                    link = values.find_element(By.TAG_NAME, 'a').get_attribute('href')
                    company_name = values.text.strip()
                    
                except Exception as e:
                    print(f"[Retry] Stale element in row {e} for 2 times, refetching row...")

                    for _ in range(2):
                        success = False
                        try:
                            row = self.driver.find_elements(By.XPATH, '//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table/tbody/tr')
                            self.wait.until(
                                EC.presence_of_element_located((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table/tbody/tr[1]/td[1]/a'))
                            )
                            self.wait_to_load()
                            values = row.find_element(By.TAG_NAME, 'td')
                            link = values.find_element(By.TAG_NAME, 'a').get_attribute('href')
                            company_name = values.text.strip()

                            success = True
                            break

                        except Exception as e:
                            print(f"[Retryng stale issue] {e}")

                    if not success:
                        print(f"Skipping row {count}")
                        company_counter += 1
                        continue
                            

                
                else:
                    # Open link in new tab
                    self.driver.execute_script('window.open(arguments[0]);',link)
                    self.driver.switch_to.window(driver.window_handles[1])
                    self.wait_to_load()
                    
                    fund_clicked = False
                    for _ in range(5):
                        try:
                            fundamental = self.wait.until(
                                EC.presence_of_element_located((By.XPATH,'//*[@id="ipo-section"]/main/nav/div/a[2]'))
                            )
                            
                            fundamental.click()
                            
                            fund_clicked = True
                            break

                        except:
                            time.sleep(1)
                            

                    

                    table_loaded = False
                    for _ in range(10):
                        try:
                            time.sleep(1)
                            page_html = self.driver.page_source
                            soup = BeautifulSoup(page_html, 'lxml')
                            table = soup.find('table', class_='table tl-react-table-v7 table-striped')

                            header_table = soup.find('table', class_='table tl-react-table-v7 table-striped').find('thead').find('tr')

                            header = header_table.find_all('th')
    
                            if table:
                                rows = table.find('tbody').find_all('tr')
                                table_loaded = True
                                break
    
                        except:
                            time.sleep(1)
                    for row in rows:
                        table_val = row.find_all('td')
                        indicator = table_val[0].find('div',class_='indicator-value-container').get_text()
                        header_value = [vals.text.strip() for vals in header]
                        
                        ipo_d = {
                            'Company':company_name,
                            'Indicator':indicator,
                        }
                        value_count = 2
                        
                        for head in universal_header:
                            if head in header_value:
                                ipo_d[head] = table_val[value_count].get_text()
                                value_count += 1
                            else:
                                ipo_d[head] = np.nan
                                
                        financial_data.append(ipo_d)
                    
                    company_counter += 1
                        
                    # closing the new tab
                    self.driver.close()
            
                    # switching to main tab
                    self.driver.switch_to.window(self.driver.window_handles[0])
                    self.wait_to_load()
                    
            print(f"Year {year} data is collected. Total {company_counter} no. of companies data are collected.") 
        
        return financial_data




    def get_balance_sheet_data(self):
        balance_sheet_data = []
        universal_header = ["CAGR 3 Yrs","CAGR 5 Yrs","Mar '25","Mar '24","Mar '23","Mar '22","Mar '21","Mar '20","Mar '19","Mar '18","Mar '17","Mar '16","Mar '15"]
        for year in range(2019,self.year):
            self.driver.get(f'https://trendlyne.com/ipo/screener/year-{year}/')
            
            self.wait_to_load()


            self.wait.until(
                EC.presence_of_element_located((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table'))
            )

            select_feild = self.wait.until(
                EC.element_to_be_clickable((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[2]/div/div/select'))
            )

            select_button = Select(select_feild)
            select_button.select_by_visible_text('All')

            last_count = 0
            stable_count = 0
            
            for _ in range(20):  # check up to 20 times (i.e., max 20 seconds)
                rows = self.driver.find_elements(By.XPATH, '//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table/tbody/tr')
                current_count = len(rows)
                
                if current_count == last_count:
                    stable_count += 1
                    if stable_count >= 6:  # count stable 2 times in a row
                        break
                else:
                    stable_count = 0
                    last_count = current_count
                
                time.sleep(1)  # wait 1 second before checking again
            
            company_count = len(rows)
            print(f"{year}: {company_count} companies loaded.")
            
            
                
            # company_count = len(rows)
            self.wait_to_load()
            company_counter = 0
            
            for row in rows:
                time.sleep(2)
                values = row.find_element(By.TAG_NAME,'td')
                
                try:
                    self.wait.until(
                    EC.presence_of_element_located((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table/tbody/tr[1]/td[1]/a'))
                    )
                    self.wait_to_load()
                    link = values.find_element(By.TAG_NAME, 'a').get_attribute('href')
                    company_name = values.text.strip()
                    
                except Exception as e:
                    print(f"[Retry] Stale element in row {e} for 2 times, refetching row...")

                    for _ in range(2):
                        success = False
                        try:
                            row = self.driver.find_elements(By.XPATH, '//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table/tbody/tr')
                            self.wait.until(
                                EC.presence_of_element_located((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table/tbody/tr[1]/td[1]/a'))
                            )
                            self.wait_to_load()
                            values = row.find_element(By.TAG_NAME, 'td')
                            link = values.find_element(By.TAG_NAME, 'a').get_attribute('href')
                            company_name = values.text.strip()

                            success = True
                            break

                        except Exception as e:
                            print(f"[Retryng stale issue] {e}")

                    if not success:
                        print(f"Skipping row {count}")
                        company_counter += 1
                        continue
                            

                
                else:
                    # Open link in new tab
                    self.driver.execute_script('window.open(arguments[0]);',link)
                    self.driver.switch_to.window(driver.window_handles[1])
                    self.wait_to_load()
                    
                    fund_clicked = False
                    for _ in range(5):
                        try:
                            fundamental = self.wait.until(
                                EC.presence_of_element_located((By.XPATH,'//*[@id="ipo-section"]/main/nav/div/a[2]'))
                            )
                            
                            fundamental.click()
                            
                            fund_clicked = True
                            break

                        except:
                            time.sleep(1)

                    for _ in range(5):
                        clicked = False
                        try:
                            for i in range(3,7):
                                balance_sheet = self.wait.until(
                                    EC.presence_of_element_located((By.XPATH,f'//*[@id="fundamental_tables"]/div/div[1]/div/div[1]/div/div[{i}]'))
                                )
        
                                if balance_sheet.text.strip() == 'Balance Sheet':
                                    balance_sheet.click()
        
                                    clicked = True
                                    break

                        except:
                            time.sleep(1)
                        else:
                            break
                            

                    

                    table_loaded = False
                    for _ in range(10):
                        try:
                            time.sleep(1)
                            page_html = self.driver.page_source
                            soup = BeautifulSoup(page_html, 'lxml')
                            table = soup.find('table', class_='table tl-react-table-v7 table-striped')

                            header_table = soup.find('table', class_='table tl-react-table-v7 table-striped').find('thead').find('tr')

                            header = header_table.find_all('th')
    
                            if table:
                                rows = table.find('tbody').find_all('tr')
                                table_loaded = True
                                break
    
                        except:
                            time.sleep(1)
                    for row in rows:
                        table_val = row.find_all('td')
                        indicator = table_val[0].find('div',class_='indicator-value-container').get_text()
                        header_value = [vals.text.strip() for vals in header]
                        
                        ipo_d = {
                            'Company':company_name,
                            'Indicator':indicator,
                        }
                        value_count = 2
                        
                        for head in universal_header:
                            if head in header_value:
                                ipo_d[head] = table_val[value_count].get_text()
                                value_count += 1
                            else:
                                ipo_d[head] = np.nan
                                
                        balance_sheet_data.append(ipo_d)
                    
                    company_counter += 1
                        
                    # closing the new tab
                    self.driver.close()
            
                    # switching to main tab
                    self.driver.switch_to.window(self.driver.window_handles[0])
                    self.wait_to_load()
                    
            print(f"Year {year} data is collected. Total {company_counter} no. of companies data are collected.") 
        
        return balance_sheet_data




    def get_financial_ratio(self):

        financial_ratio_data = []
        universal_header = ["CAGR 3 Yrs","CAGR 5 Yrs","Mar '25","Mar '24","Mar '23","Mar '22","Mar '21","Mar '20","Mar '19","Mar '18","Mar '17","Mar '16","Mar '15"]
        for year in range(2019,self.year):
            self.driver.get(f'https://trendlyne.com/ipo/screener/year-{year}/')
            
            self.wait_to_load()


            self.wait.until(
                EC.presence_of_element_located((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table'))
            )

            select_feild = self.wait.until(
                EC.element_to_be_clickable((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[2]/div/div/select'))
            )

            select_button = Select(select_feild)
            select_button.select_by_visible_text('All')

            last_count = 0
            stable_count = 0
            
            for _ in range(20):  # check up to 20 times (i.e., max 20 seconds)
                rows = self.driver.find_elements(By.XPATH, '//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table/tbody/tr')
                current_count = len(rows)
                
                if current_count == last_count:
                    stable_count += 1
                    if stable_count >= 6:  # count stable 6 times in a row
                        break
                else:
                    stable_count = 0
                    last_count = current_count
                
                time.sleep(1)  # wait 1 second before checking again
            
            company_count = len(rows)
            print(f"{year}: {company_count} companies loaded.")
            
            
                
            # company_count = len(rows)
            self.wait_to_load()
            company_counter = 0
            
            for row in rows:
                time.sleep(2)
                values = row.find_element(By.TAG_NAME,'td')
                
                try:
                    self.wait.until(
                    EC.presence_of_element_located((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table/tbody/tr[1]/td[1]/a'))
                    )
                    self.wait_to_load()
                    link = values.find_element(By.TAG_NAME, 'a').get_attribute('href')
                    company_name = values.text.strip()
                    
                except Exception as e:
                    print(f"[Retry] Stale element in row {e} for 2 times, refetching row...")

                    for _ in range(2):
                        success = False
                        try:
                            row = self.driver.find_elements(By.XPATH, '//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table/tbody/tr')
                            self.wait.until(
                                EC.presence_of_element_located((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table/tbody/tr[1]/td[1]/a'))
                            )
                            self.wait_to_load()
                            values = row.find_element(By.TAG_NAME, 'td')
                            link = values.find_element(By.TAG_NAME, 'a').get_attribute('href')
                            company_name = values.text.strip()

                            success = True
                            break

                        except Exception as e:
                            print(f"[Retryng stale issue] {e}")

                    if not success:
                        print(f"Skipping row {count}")
                        company_counter += 1
                        continue
                            

                
                else:
                    # Open link in new tab
                    self.driver.execute_script('window.open(arguments[0]);',link)
                    self.driver.switch_to.window(driver.window_handles[1])
                    self.wait_to_load()
                    
                    fund_clicked = False
                    for _ in range(5):
                        try:
                            fundamental = self.wait.until(
                                EC.presence_of_element_located((By.XPATH,'//*[@id="ipo-section"]/main/nav/div/a[2]'))
                            )
                            
                            fundamental.click()
                            
                            fund_clicked = True
                            break

                        except:
                            time.sleep(1)
                            

                    for _ in range(5):
                        clicked = False
                        try:
                            for i in range(3,8):
                                financial_ratio = self.wait.until(
                                    EC.presence_of_element_located((By.XPATH,f'//*[@id="fundamental_tables"]/div/div[1]/div/div[1]/div/div[{i}]'))
                                )
        
                                if financial_ratio.text.strip() == 'Financial Ratios':
                                    financial_ratio.click()
        
                                    clicked = True
                                    break

                        except:
                            time.sleep(1)
                        else:
                            break
                        

                    table_loaded = False
                    for _ in range(10):
                        try:
                            time.sleep(1)
                            page_html = self.driver.page_source
                            soup = BeautifulSoup(page_html, 'lxml')
                            table = soup.find('table', class_='table tl-react-table-v7 table-striped')

                            header_table = soup.find('table', class_='table tl-react-table-v7 table-striped').find('thead').find('tr')

                            header = header_table.find_all('th')
    
                            if table:
                                rows = table.find('tbody').find_all('tr')
                                table_loaded = True
                                break
    
                        except:
                            time.sleep(1)
                    for row in rows:
                        table_val = row.find_all('td')
                        indicator = table_val[0].find('div',class_='indicator-value-container').get_text()
                        header_value = [vals.text.strip() for vals in header]
                        
                        ipo_d = {
                            'Company':company_name,
                            'Indicator':indicator,
                        }
                        value_count = 2
                        
                        for head in universal_header:
                            if head in header_value:
                                ipo_d[head] = table_val[value_count].get_text()
                                value_count += 1
                            else:
                                ipo_d[head] = np.nan
                                
                        financial_ratio_data.append(ipo_d)
                    
                    company_counter += 1
                        
                    # closing the new tab
                    self.driver.close()
            
                    # switching to main tab
                    self.driver.switch_to.window(self.driver.window_handles[0])
                    self.wait_to_load()
                    
            print(f"Year {year} data is collected. Total {company_counter} no. of companies data are collected.") 
        
        return financial_ratio_data



    def get_cash_flow(self):
        cash_flow_data = []
        universal_header = ["CAGR 3 Yrs","CAGR 5 Yrs","Mar '25","Mar '24","Mar '23","Mar '22","Mar '21","Mar '20","Mar '19","Mar '18","Mar '17","Mar '16","Mar '15"]
        for year in range(2019,self.year):
            self.driver.get(f'https://trendlyne.com/ipo/screener/year-{year}/')
            
            self.wait_to_load()


            self.wait.until(
                EC.presence_of_element_located((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table'))
            )

            select_feild = self.wait.until(
                EC.element_to_be_clickable((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[2]/div/div/select'))
            )

            select_button = Select(select_feild)
            select_button.select_by_visible_text('All')

            last_count = 0
            stable_count = 0
            
            for _ in range(20):  # check up to 20 times (i.e., max 20 seconds)
                rows = self.driver.find_elements(By.XPATH, '//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table/tbody/tr')
                current_count = len(rows)
                
                if current_count == last_count:
                    stable_count += 1
                    if stable_count >= 6:  # count stable 2 times in a row
                        break
                else:
                    stable_count = 0
                    last_count = current_count
                
                time.sleep(1)  # wait 1 second before checking again
            
            company_count = len(rows)
            print(f"{year}: {company_count} companies loaded.")
            
            
                
            # company_count = len(rows)
            self.wait_to_load()
            company_counter = 0
            
            for row in rows:
                time.sleep(2)
                values = row.find_element(By.TAG_NAME,'td')
                
                try:
                    self.wait.until(
                    EC.presence_of_element_located((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table/tbody/tr[1]/td[1]/a'))
                    )
                    self.wait_to_load()
                    link = values.find_element(By.TAG_NAME, 'a').get_attribute('href')
                    company_name = values.text.strip()
                    
                except Exception as e:
                    print(f"[Retry] Stale element in row {e} for 2 times, refetching row...")

                    for _ in range(2):
                        success = False
                        try:
                            row = self.driver.find_elements(By.XPATH, '//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table/tbody/tr')
                            self.wait.until(
                                EC.presence_of_element_located((By.XPATH,'//*[@id="ipo-section"]/main/div[2]/div[2]/div[1]/table/tbody/tr[1]/td[1]/a'))
                            )
                            self.wait_to_load()
                            values = row.find_element(By.TAG_NAME, 'td')
                            link = values.find_element(By.TAG_NAME, 'a').get_attribute('href')
                            company_name = values.text.strip()

                            success = True
                            break

                        except Exception as e:
                            print(f"[Retryng stale issue] {e}")

                    if not success:
                        print(f"Skipping row {count}")
                        company_counter += 1
                        continue
                            

                
                else:
                    # Open link in new tab
                    self.driver.execute_script('window.open(arguments[0]);',link)
                    self.driver.switch_to.window(driver.window_handles[1])
                    self.wait_to_load()
                   
                    
                    fund_clicked = False
                    for _ in range(5):
                        try:
                            fundamental = self.wait.until(
                                EC.presence_of_element_located((By.XPATH,'//*[@id="ipo-section"]/main/nav/div/a[2]'))
                            )
                            
                            fundamental.click()
                            
                            fund_clicked = True
                            break

                        except:
                            time.sleep(1)
                            

                    for _ in range(5):
                        clicked = False
                        try:
                            for i in range(3,8):
                                cash_flows = self.wait.until(
                                    EC.presence_of_element_located((By.XPATH,f'//*[@id="fundamental_tables"]/div/div[1]/div/div[1]/div/div[{i}]'))
                                )
        
                                if cash_flows.text.strip() == 'Cash Flow':
                                    cash_flows.click()
        
                                    clicked = True
                                    break

                        except:
                            time.sleep(1)
                        else:
                            break

                    table_loaded = False
                    for _ in range(10):
                        try:
                            time.sleep(1)
                            page_html = self.driver.page_source
                            soup = BeautifulSoup(page_html, 'lxml')
                            table = soup.find('table', class_='table tl-react-table-v7 table-striped')

                            header_table = soup.find('table', class_='table tl-react-table-v7 table-striped').find('thead').find('tr')

                            header = header_table.find_all('th')
    
                            if table:
                                rows = table.find('tbody').find_all('tr')
                                table_loaded = True
                                break
    
                        except:
                            time.sleep(1)
                    for row in rows:
                        table_val = row.find_all('td')
                        indicator = table_val[0].find('div',class_='indicator-value-container').get_text()
                        header_value = [vals.text.strip() for vals in header]
                        
                        ipo_d = {
                            'Company':company_name,
                            'Indicator':indicator,
                        }
                        value_count = 2
                        
                        for head in universal_header:
                            if head in header_value:
                                ipo_d[head] = table_val[value_count].get_text()
                                value_count += 1
                            else:
                                ipo_d[head] = np.nan
                                
                        cash_flow_data.append(ipo_d)
                    
                    company_counter += 1
                        
                    # closing the new tab
                    self.driver.close()
            
                    # switching to main tab
                    self.driver.switch_to.window(self.driver.window_handles[0])
                    self.wait_to_load()
                    
            print(f"Year {year} data is collected. Total {company_counter} no. of companies data are collected.") 
        
        return cash_flow_data

In [6]:
driver = webdriver.Chrome()
driver.maximize_window()

ipos = IPO_Data(driver,2026,10)

# sme_data = ipos.get_sme_ipo_data()
# mainboard_data = ipos.get_mainboard_ipo_data()
# annual_resutl = ipos.get_annual_financial_data()
# balance_sheet = ipos.get_balance_sheet_data()
# financial_ratio = ipos.get_financial_ratio()
cash_flow = ipos.get_cash_flow()

driver.quit()

2019: 62 companies loaded.
Year 2019 data is collected. Total 62 no. of companies data are collected.
2020: 44 companies loaded.
Year 2020 data is collected. Total 44 no. of companies data are collected.
2021: 118 companies loaded.
Year 2021 data is collected. Total 118 no. of companies data are collected.
2022: 146 companies loaded.
Year 2022 data is collected. Total 146 no. of companies data are collected.
2023: 238 companies loaded.
Year 2023 data is collected. Total 238 no. of companies data are collected.
2024: 340 companies loaded.
Year 2024 data is collected. Total 340 no. of companies data are collected.
2025: 126 companies loaded.
Year 2025 data is collected. Total 126 no. of companies data are collected.


In [8]:
df = pd.DataFrame(cash_flow)
df.head(40)

Unnamed: 0,Company,Indicator,CAGR 3 Yrs,CAGR 5 Yrs,Mar '25,Mar '24,Mar '23,Mar '22,Mar '21,Mar '20,Mar '19,Mar '18,Mar '17,Mar '16,Mar '15
0,Prince Pipes,Cash from Operating Act. Ann.,-,-,119.0,32.9,360.2,-19.5,307.3,-154.7,218.6,160.9,94.6,87.9,
1,Prince Pipes,PBT Ann.,-44.1%,-17.1%,58.9,243.8,164.8,336.9,299.2,150.6,111.3,95.3,85.0,40.7,22.4
2,Prince Pipes,Interest Ann.,-11.3%,-21.8%,9.7,6.5,11.0,13.9,20.7,33.2,36.3,35.4,35.8,35.3,37.4
3,Prince Pipes,Tax Ann.,-43.6%,-16.2%,15.7,61.3,43.4,87.5,77.3,38.1,29.2,21.8,19.8,9.6,5.2
4,Prince Pipes,Cash from Investing Act. Ann.,-,-,-234.7,-125.8,-148.1,20.5,-85.6,-110.7,-105.1,-170.3,-67.9,-50.8,
5,Prince Pipes,Cash from Financing Act. Ann.,75.4%,-14.1%,119.8,47.5,-122.2,22.2,-213.3,256.6,-104.9,6.9,-24.1,-37.1,
6,Prince Pipes,Net Cash Flow Ann.,-43.9%,-,4.1,-45.3,89.9,23.2,8.4,-8.8,8.7,-2.5,2.7,0.0,
7,Prince Pipes,Cash Plus Cash Eqv. Begin Of Year Ann.,108.5%,53.6%,76.1,121.5,31.6,8.4,0.1,8.9,0.2,2.7,0.1,8.6,
8,Prince Pipes,Cash Plus Cash Eqv. End Of Year Ann.,36.4%,280.9%,80.2,76.1,121.5,31.6,8.4,0.1,8.9,0.2,2.7,8.6,
9,DC Infotech and Comms,Cash from Operating Act. Ann.,-,-,-3.8,-7.6,-11.5,9.6,1.2,-6.5,0.4,0.9,-3.6,,


In [10]:
df.Company.nunique()

1074

In [12]:
df.to_csv('Cash-Flow-Data-from-2019-2025.csv')