In [33]:
import pdfplumber

# Load the PDF
pdf_path = "C:/Users/Lenovo/Desktop/One Big Folder/PDF/Financial Statements.pdf"
with pdfplumber.open(pdf_path) as pdf:
    # Specify page numbers (1-based, adjust to 0-based in code)
    page_numbers = [215]  # Pages 2 and 4
    for page_num in page_numbers:
        page = pdf.pages[page_num - 1]  # Convert to 0-based index
        # Extract text
        text = page.extract_text()
        print(f"Page {page.page_number} Text:\n{text}\n")

        # Extract tables
        tables = page.extract_tables()
        for i, table in enumerate(tables):
            print(f"Page {page.page_number} Table {i+1}:\n{table}\n")

Page 215 Text:
BRAC Bank PLC. and its subsidiaries BRAC Bank PLC. and its subsidiaries
Consolidated Profit and Loss Account Consolidated Profit and Loss Account
For the year ended 31 December 2024 For the year ended 31 December 2024
2024 2023 2024 2023
Particulars Note Particulars Note
Taka Taka Taka Taka
Interest income 25 58,573,509,702 41,863,962,190 Appropriations:
Interest paid on deposits and borrowing etc. 26 42,119,865,188 21,315,015,978 Statutory reserve 1,725,268,197 1,164,617,439
Net interest income 16,453,644,514 20,548,946,212 General reserve - -
Investment income 27 28,812,533,328 12,686,698,490 Dividend 3,217,658,447 2,244,877,986
Commission, exchange and brokerage 28 18,812,230,165 13,038,891,842 Start-up Fund 121,361,698 73,036,047
Other operating income 29 252,335,865 172,424,955 5,064,288,342 3,482,531,472
47,877,099,358 25,898,015,287 Retained surplus 40,311,362,920 33,197,686,658
Total operating income (a) 64,330,743,872 46,446,961,499
Salaries and allowances 17,38

In [34]:
import pdfplumber
import pandas as pd
import os

# Load the PDF
pdf_path = "C:/Users/Lenovo/Desktop/One Big Folder/PDF/Financial Statements.pdf"
output_folder = "C:/Users/Lenovo/Desktop/One Big Folder/MS Office/Excel/financial_tables"
os.makedirs(output_folder, exist_ok=True)

# Initialize a list to store all DataFrames
all_dataframes = []

# Open the PDF and process page 81
with pdfplumber.open(pdf_path) as pdf:
    # Access page 81 (0-based index in pdfplumber)
    page = pdf.pages[215]  # Page 81 is index 80

    # Extract tables
    tables = page.extract_tables()

    # Process each table
    for i, table in enumerate(tables):
        # Skip tables with only headers (e.g., Table 1 and Table 3)
        if len(table) <= 1:
            print(f"Skipping Table {i+1}: Only contains headers")
            continue

        # Convert table to DataFrame
        # First row is the header
        df = pd.DataFrame(table[1:], columns=table[0])

        # Clean the DataFrame
        df.dropna(how="all", inplace=True)  # Remove empty rows
        df.replace("", None, inplace=True)  # Replace empty strings with None
        df.replace("-", None, inplace=True)  # Replace "-" with None

        # Convert numeric columns (2020–2024) to numeric, handling percentages and commas
        for col in df.columns[1:]:  # Skip 'Particulars' column
            df[col] = df[col].apply(lambda x: x.replace("%", "") if isinstance(x, str) and "%" in x else x)
            df[col] = df[col].apply(lambda x: x.replace(",", "") if isinstance(x, str) else x)
            try:
                df[col] = pd.to_numeric(df[col], errors="coerce")
            except:
                pass  # Keep non-numeric columns as is

        # Add a column to identify the table
        df["Table"] = f"Table {i+1}"

        # Append to list of DataFrames
        all_dataframes.append(df)

        # Save individual table to CSV
        df.to_csv(f"{output_folder}/table_{i+1}.csv", index=False)
        print(f"Saved Table {i+1} to {output_folder}/table_{i+1}.csv")
        print(f"Table {i+1}:\n", df, "\n")

    # Combine all DataFrames into a single DataFrame (optional)
    if all_dataframes:
        combined_df = pd.concat(all_dataframes, ignore_index=False)
        # Reorder columns to have 'Table' first
        cols = ["Table"] + [col for col in combined_df.columns if col != "Table"]
        combined_df = combined_df[cols]
        # Save combined DataFrame to CSV
        combined_df.to_csv(f"{output_folder}/combined_financial_tables.csv", index=False)
        print(f"Saved combined DataFrame to {output_folder}/combined_financial_tables.csv")
        print("Combined DataFrame:\n", combined_df)

Saved Table 1 to C:/Users/Lenovo/Desktop/One Big Folder/MS Office/Excel/financial_tables/table_1.csv
Table 1:
                                          latoT  451,304,020,78 748,841,5  \
0                    gnillortnoc-noN\ntseretni                       NaN   
1                            deniateR sgninrae                       NaN   
2      ycnerruc\nnoitalsnart\nevreser\nngieroF                       NaN   
3                         eulav\nevreser\nriaF                       NaN   
4  .tvoG\nnoitaulaveR\nseitiruces\nno\nevreser                       NaN   
5                 noitazilauqe\ndnediviD\ndnuf                       NaN   
6                           evreser\nyrotutatS                       NaN   
7                               muimerp\nerahS                       NaN   
8                            latipac\npu\ndiaP                       NaN   
9                                  sralucitraP                       NaN   

   100,255,520,78  - 967,922,667 167,018,303 - )090,