In [5]:
from pathlib import Path

# --- User Input for Company Folder ---
company_folder_name = input("Enter the company folder name (e.g., PVIAM): ").strip()

if not company_folder_name:
    print("No company folder name entered. Please run this cell again and provide a name.")
else:
    print(f"Company folder set to: {company_folder_name}")

company_base_path = Path(r"D:\Visual Studio Projects\Financial Statement Data Retriever") / company_folder_name

# --- User Input for Year Range ---
try:
    start_year = int(input("Enter start year (e.g., 2021): ").strip())
    end_year = int(input("Enter end year (e.g., 2024): ").strip())
    
    # Generate list of years as strings from start to end (inclusive)
    periods_to_process = [str(year) for year in range(start_year, end_year + 1)]
    
    if not periods_to_process:
        print("No periods generated. Please check if the start year is less than or equal to the end year.")
    else:
        print(f"Periods set for processing: {periods_to_process}")
except ValueError:
    print("Invalid input. Please enter numeric values for years.")
    periods_to_process = []

Company folder set to: CALM
Periods set for processing: ['2020', '2021', '2022', '2023', '2024', '2025']


## <span style = 'color:blue'> Merging excel Files together

In [6]:
# ...existing code...
import pandas as pd
import numpy as np
from pathlib import Path
import os

# Use the dynamically set company_base_path
base_dir = company_base_path / "excel_statements"
period_statements_dir = company_base_path / "period_statements"

# Ensure the period_statements directory exists
period_statements_dir.mkdir(parents=True, exist_ok=True)

# Read in the datasets
financial_statements = []
found_files_count = 0
for period in periods_to_process:
    statement_path = base_dir / f"{period}_statements.xlsx"
    if not statement_path.exists():
        print(f'Warning: Excel file not found for period {period} at {statement_path}. Skipping this period.')
        continue
    try:
        df_statement = pd.read_excel(statement_path)
        financial_statements.append(df_statement)
        found_files_count += 1
    except Exception as e:
        print(f'Error reading {statement_path}: {e}. Skipping this period.')
        continue

if found_files_count > 0:
    print(f'Successfully read in {found_files_count} years of financial statements in Excel \n')
else: 
    print(f'No financial statements were successfully read from Excel files. Please check paths and file existence.')
    # Exit or handle the case where no data is loaded
    # For now, we'll proceed, but concatenated_df will be empty if financial_statements is empty.


# Begin merging the datasets together based on criteria
print(f"{f' BEGINNING CONCATENATING EACH PERIODS ':=^100} ")
row_length = 0
for df_statement in financial_statements:
    row_length += len(df_statement)

if financial_statements: # Only concatenate if there's data to concatenate
    concatenated_df = pd.concat(financial_statements, ignore_index=True)

    if len(concatenated_df) == row_length:
        print(f'1) SUCCESS: Concatenated successfully dataframes from all periods. Total rows: {len(concatenated_df)}')
    else:
        print(f'1) ERROR: There are missing rows or an issue during concatenation. Expected {row_length} rows, got {len(concatenated_df)}.')

    # --- New: Apply proper casing to 'statement_type' column ---
    if 'statement_type' in concatenated_df.columns:
        concatenated_df['statement_type'] = concatenated_df['statement_type'].astype(str).str.title()
        print("Applied proper casing to 'statement_type' column.")
    # --- End New ---

    # --- New Logic: Save the full concatenated_df to Excel ---
    print("\n--- Saving Full Concatenated DataFrame ---")
    full_concatenated_output_path = period_statements_dir / "all_periods_concatenated.xlsx"
    try:
        concatenated_df.to_excel(full_concatenated_output_path, index=False)
        print(f"Successfully saved full concatenated DataFrame to: {full_concatenated_output_path}")
    except Exception as e:
        print(f"ERROR: Could not save full concatenated DataFrame: {e}")
    print("------------------------------------------")
    # --- End New Logic ---

    # --- New Logic: Separate by statement_type and save to individual Excel files ---
    print(f"\n{f' SEPARATING BY STATEMENT TYPE AND SAVING ':=^100} ")
    
    unique_statement_types = concatenated_df['statement_type'].unique()
    
    if len(unique_statement_types) > 0:
        print(f"Found {len(unique_statement_types)} unique statement types: {', '.join(unique_statement_types)}")
        for st_type in unique_statement_types:
            # Filter the concatenated DataFrame for the current statement type
            df_filtered = concatenated_df[concatenated_df['statement_type'] == st_type].copy()
            
            # Define the output path for the individual statement type Excel file
            output_file_path = period_statements_dir / f"{st_type}.xlsx"
            
            # Save to Excel
            try:
                df_filtered.to_excel(output_file_path, index=False)
                print(f"  - Successfully saved '{st_type}' to: {output_file_path}")
            except Exception as e:
                print(f"  - ERROR: Could not save '{st_type}' to {output_file_path}: {e}")
    else:
        print("No unique 'statement_type' found in the concatenated data. No individual files created.")

    print("\n--- Statement Separation and Saving Complete ---")

else:
    print("No dataframes were available for concatenation. Skipping separation by statement type.")

Successfully read in 6 years of financial statements in Excel 

1) SUCCESS: Concatenated successfully dataframes from all periods. Total rows: 545
Applied proper casing to 'statement_type' column.

--- Saving Full Concatenated DataFrame ---
Successfully saved full concatenated DataFrame to: D:\Visual Studio Projects\Financial Statement Data Retriever\CALM\period_statements\all_periods_concatenated.xlsx
------------------------------------------

Found 3 unique statement types: Income Statement, Balance Sheet, Statement Of Cash Flows
  - Successfully saved 'Income Statement' to: D:\Visual Studio Projects\Financial Statement Data Retriever\CALM\period_statements\Income Statement.xlsx
  - Successfully saved 'Balance Sheet' to: D:\Visual Studio Projects\Financial Statement Data Retriever\CALM\period_statements\Balance Sheet.xlsx
  - Successfully saved 'Statement Of Cash Flows' to: D:\Visual Studio Projects\Financial Statement Data Retriever\CALM\period_statements\Statement Of Cash Flows.xl