In [2]:
#Aubay Azzarouk 06/03/25 start
#Gen AI - Financial Analysis under the scope of a junior data scientist. 
#Prioritizing modular and reproducible code. 
# -----------------------------------------------------------------------------

# PHASE 1: 
#Setting up env to aid analysis of extracted financial data from the SEC Edgar (10-K filings - Apple, Tesla, Microsoft).


import pandas as pd # for data manipulation
import os  # handles os interactions for paths, directory 
from pathlib import Path # handles input/output locations of file system paths. 

In [12]:
# -----------------------------------------------------------------------------
# PHASE 2: Data Ingestion

# Reads manually extracted financial data (10-K figures) from excel converted to csv into Python for analysis. 
# The dataset contains the last 3 fiscal years for Apple, Tesla, and Microsoft, 
# Key metrics: Revenue, Net Income, Assets, Liabilities, and Cash Flow from Operations.

# -----------------------------------------------------------------------------

# Define the path to the source file (manually extracted from SEC EDGAR filings).
    # IMPORTANT: Adjust this path if running on a different machine or directory.
xlsx_path = 'Downloads/10K_Financial_Metrics_GenAI_BCG.csv'

# Define an output directory for cleaned/processed files, charts, or exports.
# Using pathlib ensures compatibility across operating systems.
out_dir = Path("outputs")

# Create the output directory if it does not already exist.
# exist_ok=True → no error if the folder already exists.
# parents=True → create parent folders automatically if missing.
out_dir.mkdir(exist_ok=True, parents=True)

# load the CSV into a pandas DataFrame.
# load the entire dataset to inspect structure first.
df = pd.read_csv(xlsx_path)
# preview
df.head()




Unnamed: 0,Company,Year,Total Revenue,Net Income,Total Assets,Total Liabilities,Cash Flow from Ops
0,Microsoft Corp.,2024,245122,88136,512163.0,243686.0,118458
1,Microsoft Corp.,2023,211915,72361,411976.0,205753.0,87582
2,Microsoft Corp.,2022,198270,72738,,,89035
3,"Tesla, inc.",2024,97690,7091,122070.0,48390.0,14923
4,"Tesla, inc.",2023,96773,14997,106618.0,43009.0,13256


In [15]:
# -----------------------------------------------------------------------------
# PHASE 3: Fin. Analysis w/ pd: 
# -----------------------------------------------------------------------------
# Objective:
#   - Clean numeric columns (remove commas, convert to floats)
#   - Compute derived metrics:
#         Revenue Growth (%), Net Income Growth (%),
#         Net Profit Margin (%), Operating CF Margin (%), Leverage
#   - Build summary views for latest year and multi-year trends
# -----------------------------------------------------------------------------

# --- 1) Clean numerics ---
num_cols = ["Total Revenue", "Net Income", "Total Assets",
            "Total Liabilities", "Cash Flow from Ops"]
for col in num_cols:
    df[col] = (
        df[col].astype(str)          # ensure string
              .str.replace(",", "", regex=False)  # strip commas
              .str.strip()
    )
    df[col] = pd.to_numeric(df[col], errors="coerce")  # convert to float

# --- 2) Derived metrics ---
df = df.sort_values(["Company", "Year"]).reset_index(drop=True)

# YoY growth (within each company)
df["Revenue Growth (%)"] = df.groupby("Company")["Total Revenue"].pct_change() * 100
df["Net Income Growth (%)"] = df.groupby("Company")["Net Income"].pct_change() * 100

# Profitability & leverage ratios
df["Net Profit Margin (%)"]   = (df["Net Income"] / df["Total Revenue"]) * 100
df["Operating CF Margin (%)"] = (df["Cash Flow from Ops"] / df["Total Revenue"]) * 100
df["Leverage (Liab/Assets)"]  = (df["Total Liabilities"] / df["Total Assets"])

# --- 3) Build summary views ---
# Latest year summary per company
latest_summary = (
    df.sort_values(["Company", "Year"])
      .groupby("Company")
      .tail(1)
      .reset_index(drop=True)
)

# Multi-year trend view (2022–2024 by company)
trend_view = df.copy()

# --- 4) Preview results ---
#print("=== Latest Year Summary ===")
#display(latest_summary)

print("\n=== Trend View (2022–2024) in the Trillions (USD) ===")
display(trend_view)




=== Trend View (2022–2024) in the Trillions (USD) ===


Unnamed: 0,Company,Year,Total Revenue,Net Income,Total Assets,Total Liabilities,Cash Flow from Ops,Revenue Growth (%),Net Income Growth (%),Net Profit Margin (%),Operating CF Margin (%),Leverage (Liab/Assets)
0,Apple Inc.,2022,394328,99903,,,122151,,,25.335,30.977004,
1,Apple Inc.,2023,383285,99995,143566.0,290437.0,110543,-2.800461,0.092089,26.088942,28.840941,2.023021
2,Apple Inc.,2024,391035,93736,152987.0,308030.0,118254,2.021994,-6.259313,23.971256,30.241283,2.013439
3,Microsoft Corp.,2022,198270,72738,,,89035,,,36.686337,44.905936,
4,Microsoft Corp.,2023,211915,72361,411976.0,205753.0,87582,6.88203,-0.518299,34.146238,41.328835,0.49943
5,Microsoft Corp.,2024,245122,88136,512163.0,243686.0,118458,15.669962,21.800417,35.955973,48.32614,0.475798
6,"Tesla, inc.",2022,81462,12556,,,14724,,,15.413322,18.074685,
7,"Tesla, inc.",2023,96773,14997,106618.0,43009.0,13256,18.795267,19.440905,15.497091,13.698036,0.403393
8,"Tesla, inc.",2024,97690,7091,122070.0,48390.0,14923,0.947578,-52.71721,7.258675,15.275873,0.396412


In [16]:
df.to_csv(out_dir / "financials_analysis.csv", index=False)