## Import Requred Libraries

In [2]:
#

# STEP 1: Import Core Libraries
#
import pandas as pd
# pandas: The "Swiss Army Knife" of data manipulation in Python
# Used for: Reading CSVs, creating DataFrames, data wrangling, aggregations
import numpy as np
# numpy: Numerical computing library for mathematical operations
# Used for: Array operations, calculations, statistical functions
import matplotlib.pyplot as plt
# matplotlib: The foundational plotting library for creating staticvisualizations
# Used for: Bar charts, line plots, scatter plots, and custom figure layouts
import seaborn as sns
# seaborn: High-level statistical visualization library built on matplotlib
# Used for: Heatmaps, regression plots, and aesthetically pleasing charts
# EXPLANATION:
# These four libraries form the backbone of data science in Python.
# In an enterprise setting (like UOB), you would also import:
# - sqlalchemy (for database connections)
# - plotly (for interactive web-based dashboards)
# - scikit-learn (for machine learning models)

## Set Visualization Style & Configuration

In [3]:
# Set the visual style for all plots (professional, clean appearance)
plt.style.use('seaborn-v0_8-whitegrid')
# This applies a professional theme to all matplotlib plots
# Alternative styles: 'ggplot', 'bmh', 'fivethirtyeight', 'dark_background'
# Configure seaborn color palette (banking-appropriate colors)
sns.set_palette("husl")
# This sets a harmonious color scheme for all seaborn plots
# Set figure DPI (resolution) for high-quality exports
plt.rcParams['figure.dpi'] = 100
# DPI = Dots Per Inch. Higher DPI = sharper images for presentations
# EXPLANATION:
# These settings ensure that every chart you create looks professional.
# In a real UOB presentation, you would use the bank's brand colors:
# - Primary: Navy Blue (#001F3F)
# - Secondary: Gold (#FFD700)
# - Accent: Teal (#008B8B)

## Loading Data File for analysis

In [6]:
# Read the CSV file into a pandas DataFrame
uob_df = pd.read_csv("uob_financials_2019_2024.csv", index_col='Year')
# PARAMETER BREAKDOWN:
# - "uob_financials_2019_2024.csv": The filename (must be in the same folder as your notebook)
# - index_col='Year': Sets the 'Year' column as the index (row labels)
# This makes it easier to filter and group data by year
# Verify the data loaded correctly
print("✓ Data loaded successfully!")
print(f"Shape: {uob_df.shape}") # Output: (6, 5) = 6 rows, 5 columns
print("\nFirst few rows:")
display(uob_df.head())
# EXPLANATION:
# The .head() method shows the first 5 rows by default.
# This is a critical sanity check: Are the columns correct? Are the values reasonable?
# In banking, you ALWAYS verify data integrity before analysis.

✓ Data loaded successfully!
Shape: (6, 5)

First few rows:


Unnamed: 0_level_0,Net_Profit_SGD_M,ROE_Pct,CET1_Ratio_Pct,Dividend_Per_Share_SGD,Dividend_Payout_Ratio_Pct
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019,4343,11.6,14.3,1.3,50.0
2020,2915,7.4,14.7,0.78,45.0
2021,4075,10.2,13.5,1.2,49.0
2022,4573,11.9,13.3,1.35,49.0
2023,5714,14.2,15.6,1.7,50.0


## Data Type Verfication

In [8]:
# Display data types for each column
print("\nData Types:")
print(uob_df.dtypes)

print("\nMissing Values:")
print(uob_df.isnull().sum())

# Expected Output: All zeros (no missing data)
# If there were missing values, you would need to handle them:
# - Forward fill: uob_df.fillna(method='ffill')
# - Backward fill: uob_df.fillna(method='bfill')
# - Drop rows: uob_df.dropna()



Data Types:
Net_Profit_SGD_M               int64
ROE_Pct                      float64
CET1_Ratio_Pct               float64
Dividend_Per_Share_SGD       float64
Dividend_Payout_Ratio_Pct    float64
dtype: object

Missing Values:
Net_Profit_SGD_M             0
ROE_Pct                      0
CET1_Ratio_Pct               0
Dividend_Per_Share_SGD       0
Dividend_Payout_Ratio_Pct    0
dtype: int64


## Data Wrangling & Feature Engineering 
Part 1 Calculating Year Over Year Growth Rates

In [14]:
# Calculate Year-over-Year (YoY) Profit Growth
uob_df['Profit_Growth_Pct'] = uob_df['Net_Profit_SGD_M'].pct_change() * 100
# EXPLANATION OF .pct_change():
# This method calculates the percentage change from one row to the next.
# Formula: ((Current Year - Previous Year) / Previous Year) * 100
# Example for 2020: ((2915 - 4343) / 4343) * 100 = -32.9%
# This shows the pandemic's impact on UOB's profitability.
# Calculate Year-over-Year Dividend Growth
uob_df['Dividend_Growth_Pct'] = uob_df['Dividend_Per_Share_SGD'].pct_change() * 100
# EXPLANATION:
# This tracks how fast UOB is increasing shareholder payouts.
# A negative value in 2020 (-40%) shows the MAS-mandated dividend cap.
# A positive value in 2024 (+35%) shows confidence in recovery.
# Calculate Retention Ratio (inverse of Payout Ratio)
uob_df['Retention_Ratio_Pct'] = 100 - uob_df['Dividend_Payout_Ratio_Pct']
# EXPLANATION:
# Retention Ratio = 100% - Payout Ratio
# This shows what percentage of profit the bank keeps for:
# - Building capital (the CET1 buffer)
# - Funding growth initiatives
# - Absorbing future losses
# A 47% retention ratio means UOB keeps nearly half its profit for safety.
print("\nWrangled Data with Growth Metrics:") 
display(uob_df[['Net_Profit_SGD_M', 'Profit_Growth_Pct',
'Dividend_Per_Share_SGD', 'Dividend_Growth_Pct']])


Wrangled Data with Growth Metrics:


Unnamed: 0_level_0,Net_Profit_SGD_M,Profit_Growth_Pct,Dividend_Per_Share_SGD,Dividend_Growth_Pct
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019,4343,,1.3,
2020,2915,-32.880497,0.78,-40.0
2021,4075,39.794168,1.2,53.846154
2022,4573,12.220859,1.35,12.5
2023,5714,24.950798,1.7,25.925926
2024,6059,6.037802,2.3,35.294118
