In [8]:
import pandas as pd
import os
import sys

# Add the parent directory to Python's search path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

# Ensure correct import path (if running inside Jupyter)
from scripts.load_data import load_data  # Ensure lowercase 'scripts'


In [4]:
# Define the Excel file path
file_path = "data/Common Data Warehouse-Orginal.xlsx"

# Load merged dataset
final_df = load_data(file_path)

# Display first few rows
final_df.head()


Unnamed: 0,Agmt ID,GL Acct ID,Source,Cust ID,Amount,Customer ID,Local Customer ID,Customer Name,NACE Code L1,NACE Name L1,...,Amortization Method,GL Account ID,GL Account,GL Name,Group GL Acct ID,GL Account Type,GL Account Name,Group GL Account ID,Group GL Account,Group GL Name
0,1000162418540,1000000008710,229|ADJ,1000007793706,0.0,1000007793706,990597391,HWTYLL LYXLXGHYTYR HWLDXNG ZS,L,REAL ESTATE ACTIVITIES,...,,1000000008710,100400,"Loans non-demand, other",1000000002728,1,Assets,1000000002728,G100400,"Loans non-demand, AC"
1,1000376865946,1000000013691,229,1000009653088,-5198.52,1000009653088,4216007625,,A,"AGRICULTURE, FORESTRY AND FISHING",...,IRREGULAR (CURRENCY LOANS),1000000013691,104413,"Accrued interest income - Loans, L&R - delta",1000000103203,1,Assets,1000000103203,G100601,"Accrued interest income, AC"
2,1000205191664,1000000008710,229|ADJ,1000044609208,0.0,1000044609208,997510461,FWRQS MZRKYRXNG HWLDXNG ZS,K,FINANCIAL AND INSURANCE ACTIVITIES,...,AMORTIZATION FREE (BULLET),1000000008710,100400,"Loans non-demand, other",1000000002728,1,Assets,1000000002728,G100400,"Loans non-demand, AC"
3,1000329416423,1000000008710,229|ADJ,1000009657652,0.0,1000009657652,4333246983,STYNHØJ HWLDXNG Z/S,M,"PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIVITIES",...,IRREGULAR (CURRENCY LOANS),1000000008710,100400,"Loans non-demand, other",1000000002728,1,Assets,1000000002728,G100400,"Loans non-demand, AC"
4,1000239743539,1000000008710,229|ADJ,1000007699415,0.0,1000007699415,936370446,SYRVX ZS,G,WHOLESALE AND RETAIL TRADE,...,AMORTIZATION FREE (BULLET),1000000008710,100400,"Loans non-demand, other",1000000002728,1,Assets,1000000002728,G100400,"Loans non-demand, AC"


In [5]:
# Separate Total Assets and Off-Balance Items
total_assets_df = final_df[final_df["Group GL Name"] != "Off Balance"].groupby("Sector Name").agg(
    Total_Assets=("Amount", "sum")
).reset_index()

off_balance_df = final_df[final_df["Group GL Name"] == "Off Balance"].groupby("Sector Name").agg(
    Off_Balance_Amount=("Amount", "sum"),
    Off_Balance_Count=("Group GL Name", "count")
).reset_index()

# Merge results
sector_analysis = total_assets_df.merge(
    off_balance_df, on="Sector Name", how="left"
).fillna(0)

# Display Results
sector_analysis


Unnamed: 0,Sector Name,Total_Assets,Off_Balance_Amount,Off_Balance_Count
0,Central Banks,-457756.5,0.0,0.0
1,Credit institutions,-4278055000.0,-62011190000.0,11.0
2,General governments,-1003070000.0,-867418000.0,30.0
3,Households,-413853700.0,-11516030.0,7.0
4,Non financial corporations,-41152740000.0,-22332380000.0,1100.0
5,Other financial corporations,-3107687000.0,-1427112000.0,54.0


In [6]:
# Save results to Excel
sector_analysis.to_excel("data/Sector_Analysis.xlsx", index=False)
print("Sector Assets Analysis saved to data/Sector_Analysis.xlsx")


Sector Assets Analysis saved to data/Sector_Analysis.xlsx


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Plot Total Assets by Sector
plt.figure(figsize=(12, 6))
sns.barplot(x="Total_Assets", y="Sector Name", data=sector_analysis, palette="Blues_r")
plt.xlabel("Total Assets ($)")
plt.ylabel("Customer Sector")
plt.title("Total Assets per Customer Sector")
plt.show()

# Plot Off-Balance Amount by Sector
plt.figure(figsize=(12, 6))
sns.barplot(x="Off_Balance_Amount", y="Sector Name", data=sector_analysis, palette="Reds_r")
plt.xlabel("Off-Balance Sheet Amount ($)")
plt.ylabel("Customer Sector")
plt.title("Off-Balance Sheet Amount per Customer Sector")
plt.show()
