## Previous Application Data Aggregation

Applicants may have submitted multiple loan applications in the past.
This notebook aggregates previous application records at the applicant
level to capture historical credit demand and approval behavior.


In [3]:
import pandas as pd

prev_app = pd.read_csv("../data/home-credit-default-risk/previous_application_cleaned.csv")
prev_app.head()


Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NAME_CONTRACT_TYPE,AMT_ANNUITY,AMT_APPLICATION,AMT_CREDIT,AMT_DOWN_PAYMENT,AMT_GOODS_PRICE,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,...,NAME_SELLER_INDUSTRY,CNT_PAYMENT,NAME_YIELD_GROUP,PRODUCT_COMBINATION,DAYS_FIRST_DRAWING,DAYS_FIRST_DUE,DAYS_LAST_DUE_1ST_VERSION,DAYS_LAST_DUE,DAYS_TERMINATION,NFLAG_INSURED_ON_APPROVAL
0,2030495,271877,Consumer loans,1730.43,17145.0,17145.0,0.0,17145.0,SATURDAY,15,...,Connectivity,12.0,middle,POS mobile with interest,365243.0,-42.0,300.0,-42.0,-37.0,0.0
1,2802425,108129,Cash loans,25188.615,607500.0,679671.0,1638.0,607500.0,THURSDAY,11,...,XNA,36.0,low_action,Cash X-Sell: low,365243.0,-134.0,916.0,365243.0,365243.0,1.0
2,2523466,122040,Cash loans,15060.735,112500.0,136444.5,1638.0,112500.0,TUESDAY,11,...,XNA,12.0,high,Cash X-Sell: high,365243.0,-271.0,59.0,365243.0,365243.0,1.0
3,2819243,176158,Cash loans,47041.335,450000.0,470790.0,1638.0,450000.0,MONDAY,7,...,XNA,12.0,middle,Cash X-Sell: middle,365243.0,-482.0,-152.0,-182.0,-177.0,1.0
4,1784265,202054,Cash loans,31924.395,337500.0,404055.0,1638.0,337500.0,THURSDAY,9,...,XNA,24.0,high,Cash Street: high,365243.0,-831.0,-361.0,-537.0,-499.0,0.0


In [4]:
prev_app["is_approved"] = (prev_app["NAME_CONTRACT_STATUS"] == "Approved").astype(int)
prev_app["is_rejected"] = (prev_app["NAME_CONTRACT_STATUS"] == "Refused").astype(int)


In [5]:
prev_agg = prev_app.groupby("SK_ID_CURR").agg(
    prev_app_count = ("SK_ID_PREV", "count"),
    approved_count = ("is_approved", "sum"),
    rejected_count = ("is_rejected", "sum"),
    avg_loan_amount = ("AMT_CREDIT", "mean"),
    avg_annuity = ("AMT_ANNUITY", "mean"),
    avg_decision_days = ("DAYS_DECISION", "mean")
).reset_index()


In [6]:
prev_agg["approval_rate"] = (
    prev_agg["approved_count"] / prev_agg["prev_app_count"]
)


In [7]:
prev_agg.shape
prev_agg.isnull().sum()
prev_agg.head()


Unnamed: 0,SK_ID_CURR,prev_app_count,approved_count,rejected_count,avg_loan_amount,avg_annuity,avg_decision_days,approval_rate
0,100001,1,1,0,23787.0,3951.0,-1740.0,1.0
1,100002,1,1,0,179055.0,9251.775,-606.0,1.0
2,100003,3,3,0,484191.0,56553.99,-1305.0,1.0
3,100004,1,1,0,20106.0,5357.25,-815.0,1.0
4,100005,2,1,0,20076.75,8031.6,-536.0,0.5


In [8]:
prev_agg.to_csv("../data/previous_application_aggregated.csv", index=False)
