In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv

# Load credentials from .env file
load_dotenv()

DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")

# Create database connection URL (for MySQL; change driver if needed)
db_url = f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"

# Create SQLAlchemy engine
engine = create_engine(db_url)

In [17]:
# Step 1: Load base tables from DB
financialrequests = pd.read_sql("SELECT Id, CreatorId FROM financialrequests", engine)
financialrequestinstallments = pd.read_sql("SELECT FinancialRequestId, DueDate, DueAmount, AcutalPaidAmount FROM financialrequestinstallments", engine)
abpusers = pd.read_sql("SELECT Id, UserName, Name, Surname, ManagerId FROM abpusers", engine)
abpuserroles = pd.read_sql("SELECT UserId, RoleId FROM abpuserroles", engine)
abproles = pd.read_sql("SELECT Id, Name FROM abproles", engine)

In [18]:
# Step 2: Investigator loans (CTE 1)
investigator_loans = financialrequests.rename(columns={
    "Id": "FinancialRequestId",
    "CreatorId": "InvestigatorId"
})[["FinancialRequestId", "InvestigatorId"]]
investigator_loans

Unnamed: 0,FinancialRequestId,InvestigatorId
0,08dc27d1-d7d8-4936-8404-6fb0aa29fdce,
1,08dc2876-69f0-4742-8c19-d1a10e7e7e64,
2,08dc3082-5d90-450b-8126-90e8d91484d2,
3,08dc3084-88be-4b0f-847e-1fd78bc98571,
4,08dc3087-e126-41f6-8150-c450cee85778,
...,...,...
210,3a0a3a82-e81d-c69b-0a25-eeca3ee2fcd9,
211,3a0a3a85-d5c3-58e5-b7c1-b3ed7b540d4f,
212,3a0a3a8c-4180-8b27-6169-2e48ff9a6ada,
213,3a0a3c36-4c80-cdc4-22ba-28546e8d677a,


In [19]:
# Step 3: Installments (CTE 2)
installments = financialrequestinstallments.copy()
installments["OutstandingAmount"] = installments["DueAmount"] - installments["AcutalPaidAmount"].fillna(0)
installments

Unnamed: 0,FinancialRequestId,DueDate,DueAmount,AcutalPaidAmount,OutstandingAmount
0,08dd3bb0-a253-415b-8fb0-f181c814eafc,2025-01-23,500.0,0.0,500.0
1,08dd3bb0-a253-415b-8fb0-f181c814eafc,2025-02-23,500.0,0.0,500.0
2,08dd3bb0-a253-415b-8fb0-f181c814eafc,2025-03-23,500.0,0.0,500.0
3,08dd3bb0-a253-415b-8fb0-f181c814eafc,2025-04-23,500.0,0.0,500.0
4,08dd3bb0-a253-415b-8fb0-f181c814eafc,2025-05-23,500.0,0.0,500.0
...,...,...,...,...,...
851,08ddf138-d6ce-450f-8a45-83c4f1ff6c13,2025-12-01,265.0,0.0,265.0
852,08ddf138-d6ce-450f-8a45-83c4f1ff6c13,2026-01-01,265.0,0.0,265.0
853,08ddf138-d6ce-450f-8a45-83c4f1ff6c13,2026-02-01,265.0,0.0,265.0
854,08ddf138-d6ce-450f-8a45-83c4f1ff6c13,2026-03-01,265.0,0.0,265.0


In [20]:
# Step 4: Join investigator_loans with users
df = investigator_loans.merge(
    abpusers.add_prefix("investigator_"),
    left_on="InvestigatorId",
    right_on="investigator_Id",
    how="inner"
)
df

Unnamed: 0,FinancialRequestId,InvestigatorId,investigator_Id,investigator_UserName,investigator_Name,investigator_Surname,investigator_ManagerId
0,3a08d703-fa37-56d4-cdd2-794b2ab38241,3a074590-6ced-5665-38bd-a50c9105bf36,3a074590-6ced-5665-38bd-a50c9105bf36,sobhy,Mahmoud,Sobhy,
1,3a08d726-ba65-0faa-720e-839c2bbd779b,3a074590-6ced-5665-38bd-a50c9105bf36,3a074590-6ced-5665-38bd-a50c9105bf36,sobhy,Mahmoud,Sobhy,


In [21]:
# Step 5: Supervisors
df = df.merge(
    abpusers.add_prefix("supervisor_"),
    left_on="investigator_ManagerId",
    right_on="supervisor_Id",
    how="left"
)
df

Unnamed: 0,FinancialRequestId,InvestigatorId,investigator_Id,investigator_UserName,investigator_Name,investigator_Surname,investigator_ManagerId,supervisor_Id,supervisor_UserName,supervisor_Name,supervisor_Surname,supervisor_ManagerId
0,3a08d703-fa37-56d4-cdd2-794b2ab38241,3a074590-6ced-5665-38bd-a50c9105bf36,3a074590-6ced-5665-38bd-a50c9105bf36,sobhy,Mahmoud,Sobhy,,,,,,
1,3a08d726-ba65-0faa-720e-839c2bbd779b,3a074590-6ced-5665-38bd-a50c9105bf36,3a074590-6ced-5665-38bd-a50c9105bf36,sobhy,Mahmoud,Sobhy,,,,,,


In [22]:
# Step 6: Roles
df = df.merge(
    abpuserroles,
    left_on="InvestigatorId",
    right_on="UserId",
    how="left"
).merge(
    abproles.rename(columns={"Id": "RoleId", "Name": "RoleName"}),
    on="RoleId",
    how="left"
)
df

Unnamed: 0,FinancialRequestId,InvestigatorId,investigator_Id,investigator_UserName,investigator_Name,investigator_Surname,investigator_ManagerId,supervisor_Id,supervisor_UserName,supervisor_Name,supervisor_Surname,supervisor_ManagerId,UserId,RoleId,RoleName
0,3a08d703-fa37-56d4-cdd2-794b2ab38241,3a074590-6ced-5665-38bd-a50c9105bf36,3a074590-6ced-5665-38bd-a50c9105bf36,sobhy,Mahmoud,Sobhy,,,,,,,3a074590-6ced-5665-38bd-a50c9105bf36,3a0677ed-84c1-81f1-8bdc-df4582e31505,Development
1,3a08d726-ba65-0faa-720e-839c2bbd779b,3a074590-6ced-5665-38bd-a50c9105bf36,3a074590-6ced-5665-38bd-a50c9105bf36,sobhy,Mahmoud,Sobhy,,,,,,,3a074590-6ced-5665-38bd-a50c9105bf36,3a0677ed-84c1-81f1-8bdc-df4582e31505,Development


In [23]:
# Step 7: Installments join
df = df.merge(
    installments,
    on="FinancialRequestId",
    how="left"
)
df

Unnamed: 0,FinancialRequestId,InvestigatorId,investigator_Id,investigator_UserName,investigator_Name,investigator_Surname,investigator_ManagerId,supervisor_Id,supervisor_UserName,supervisor_Name,supervisor_Surname,supervisor_ManagerId,UserId,RoleId,RoleName,DueDate,DueAmount,AcutalPaidAmount,OutstandingAmount
0,3a08d703-fa37-56d4-cdd2-794b2ab38241,3a074590-6ced-5665-38bd-a50c9105bf36,3a074590-6ced-5665-38bd-a50c9105bf36,sobhy,Mahmoud,Sobhy,,,,,,,3a074590-6ced-5665-38bd-a50c9105bf36,3a0677ed-84c1-81f1-8bdc-df4582e31505,Development,,,,
1,3a08d726-ba65-0faa-720e-839c2bbd779b,3a074590-6ced-5665-38bd-a50c9105bf36,3a074590-6ced-5665-38bd-a50c9105bf36,sobhy,Mahmoud,Sobhy,,,,,,,3a074590-6ced-5665-38bd-a50c9105bf36,3a0677ed-84c1-81f1-8bdc-df4582e31505,Development,,,,


In [24]:
# # Step 8: Apply filters (roles.Name LIKE 'Picc%')
# df = df[df["RoleName"].str.startswith("Picc", na=False)]
# df

In [25]:
# Step 9: Aggregations (GROUP BY + PAR30 calculation)
now = pd.Timestamp.now()
df["is_par30"] = np.where(
    (df["DueDate"] < now - pd.Timedelta(days=30)) & (df["OutstandingAmount"] > 0),
    df["OutstandingAmount"],
    0
)
df

Unnamed: 0,FinancialRequestId,InvestigatorId,investigator_Id,investigator_UserName,investigator_Name,investigator_Surname,investigator_ManagerId,supervisor_Id,supervisor_UserName,supervisor_Name,supervisor_Surname,supervisor_ManagerId,UserId,RoleId,RoleName,DueDate,DueAmount,AcutalPaidAmount,OutstandingAmount,is_par30
0,3a08d703-fa37-56d4-cdd2-794b2ab38241,3a074590-6ced-5665-38bd-a50c9105bf36,3a074590-6ced-5665-38bd-a50c9105bf36,sobhy,Mahmoud,Sobhy,,,,,,,3a074590-6ced-5665-38bd-a50c9105bf36,3a0677ed-84c1-81f1-8bdc-df4582e31505,Development,,,,,0.0
1,3a08d726-ba65-0faa-720e-839c2bbd779b,3a074590-6ced-5665-38bd-a50c9105bf36,3a074590-6ced-5665-38bd-a50c9105bf36,sobhy,Mahmoud,Sobhy,,,,,,,3a074590-6ced-5665-38bd-a50c9105bf36,3a0677ed-84c1-81f1-8bdc-df4582e31505,Development,,,,,0.0


In [26]:
agg_df = df.groupby([
    "InvestigatorId",
    "investigator_UserName",
    "investigator_Name",
    "investigator_Surname",
    "supervisor_Id",
    "supervisor_UserName",
    "supervisor_Name",
    "supervisor_Surname",
    "RoleName"
]).agg(
    TotalLoansProcessed=("FinancialRequestId", "nunique"),
    PAR30_Numerator=("is_par30", "sum"),
    PAR30_Denominator=("OutstandingAmount", "sum")
).reset_index()
agg_df

Unnamed: 0,InvestigatorId,investigator_UserName,investigator_Name,investigator_Surname,supervisor_Id,supervisor_UserName,supervisor_Name,supervisor_Surname,RoleName,TotalLoansProcessed,PAR30_Numerator,PAR30_Denominator


In [27]:
# Step 10: Compute PAR30_Percentage
agg_df["PAR30_Percentage"] = (agg_df["PAR30_Numerator"] * 100.0 /
                              agg_df["PAR30_Denominator"].replace(0, np.nan))
agg_df

Unnamed: 0,InvestigatorId,investigator_UserName,investigator_Name,investigator_Surname,supervisor_Id,supervisor_UserName,supervisor_Name,supervisor_Surname,RoleName,TotalLoansProcessed,PAR30_Numerator,PAR30_Denominator,PAR30_Percentage


In [28]:
# Step 11: Rename / format columns
final_df = agg_df.rename(columns={
    "investigator_UserName": "InvestigatorUserName",
    "investigator_Name": "InvestigatorName",
    "investigator_Surname": "InvestigatorSurname",
    "RoleName": "InvestigatorRole",
    "supervisor_Id": "SupervisorId",
    "supervisor_UserName": "SupervisorUserName",
    "supervisor_Name": "SupervisorName",
    "supervisor_Surname": "SupervisorSurname"
})
final_df

Unnamed: 0,InvestigatorId,InvestigatorUserName,InvestigatorName,InvestigatorSurname,SupervisorId,SupervisorUserName,SupervisorName,SupervisorSurname,InvestigatorRole,TotalLoansProcessed,PAR30_Numerator,PAR30_Denominator,PAR30_Percentage


In [29]:
# Combine full names
final_df["InvestigatorFullName"] = final_df["InvestigatorName"] + " " + final_df["InvestigatorSurname"]
final_df["SupervisorFullName"] = final_df["SupervisorName"] + " " + final_df["SupervisorSurname"]

# Reorder columns
final_df = final_df[[
    "InvestigatorId",
    "InvestigatorUserName",
    "InvestigatorFullName",
    "InvestigatorRole",
    "SupervisorId",
    "SupervisorUserName",
    "SupervisorFullName",
    "TotalLoansProcessed",
    "PAR30_Percentage"
]].sort_values("PAR30_Percentage", ascending=False)
final_df

Unnamed: 0,InvestigatorId,InvestigatorUserName,InvestigatorFullName,InvestigatorRole,SupervisorId,SupervisorUserName,SupervisorFullName,TotalLoansProcessed,PAR30_Percentage
