In [1]:
import pandas as pd

In [33]:
df = pd.read_csv("customer_panel_test_merge.csv")
df["balance_to_limit"] = df["TotalBalance"] / df["Limit"]
df["credit_utilization_change"] = df.groupby("CustomerID")["Utilisation"].diff().fillna(0)
df["missed_payment_flag"] = (df["PaymentRatio"] == 0).astype(int)
df["payment_ratio_change"] = df.groupby("CustomerID")["PaymentRatio"].diff().fillna(0)
df["utilization_trend"] = (
    df.groupby("CustomerID")["Utilisation"].transform(lambda x: x.diff().fillna(0))
)
df["total_accounts"] = df["NumCredit"] + df["NumChecking"] + df["NumSavings"]
df["credit_to_savings_ratio"] = df["NumCredit"] / (df["NumSavings"] + 1)
df["balance_to_salary"] = df["TotalBalance"] / (df["AnnualSalary"])
df["limit_to_salary"] = df["Limit"] / (df["AnnualSalary"])
df["payment_to_salary_ratio"] = df["PaymentRatio"] / (df["AnnualSalary"])
df["is_high_utilization"] = (df["Utilisation"] > 0.8).astype(int)
df["is_high_inquiry_user"] = (df["HardInquiries"] > df["HardInquiries"].median()).astype(int)
df["is_rapid_credit_growth"] = (df["credit_utilization_change"] > 0.1).astype(int)

In [34]:
agg_features = df.groupby("CustomerID").agg({
    "Utilisation": ["mean", "std", "max"],
    "PaymentRatio": ["mean", "std"],
    "HardInquiries": ["sum", "mean"],
    "balance_to_limit": ["mean", "max"]
})
agg_features.columns = ["_".join(col) for col in agg_features.columns]
agg_features.reset_index(inplace=True)

complete = pd.merge(df, agg_features, on='CustomerID', how = 'left')

In [22]:
complete

Unnamed: 0.1,Unnamed: 0,CustomerID,Week,Utilisation,PaymentRatio,HardInquiries,DefaultLabel,TotalBalance,Limit,NumCredit,...,is_rapid_credit_growth,Utilisation_mean,Utilisation_std,Utilisation_max,PaymentRatio_mean,PaymentRatio_std,HardInquiries_sum,HardInquiries_mean,balance_to_limit_mean,balance_to_limit_max
0,0,C000005,1,0.0,0.0,1,0,300.342311,,0,...,0,0.0,0.0,0.0,0.0,0.0,12,0.461538,,
1,1,C000005,2,0.0,0.0,0,0,300.342311,,0,...,0,0.0,0.0,0.0,0.0,0.0,12,0.461538,,
2,2,C000005,3,0.0,0.0,1,0,300.342311,,0,...,0,0.0,0.0,0.0,0.0,0.0,12,0.461538,,
3,3,C000005,4,0.0,0.0,0,0,300.342311,,0,...,0,0.0,0.0,0.0,0.0,0.0,12,0.461538,,
4,4,C000005,5,0.0,0.0,0,0,300.342311,,0,...,0,0.0,0.0,0.0,0.0,0.0,12,0.461538,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13296,13296,C001000,23,0.0,0.0,1,0,1187.182519,,0,...,0,0.0,0.0,0.0,0.0,0.0,11,0.407407,,
13297,13297,C001000,24,0.0,0.0,0,0,1187.182519,,0,...,0,0.0,0.0,0.0,0.0,0.0,11,0.407407,,
13298,13298,C001000,25,0.0,0.0,0,0,1187.182519,,0,...,0,0.0,0.0,0.0,0.0,0.0,11,0.407407,,
13299,13299,C001000,26,0.0,0.0,0,0,1187.182519,,0,...,0,0.0,0.0,0.0,0.0,0.0,11,0.407407,,


In [24]:
complete.columns

Index(['Unnamed: 0', 'CustomerID', 'Week', 'Utilisation', 'PaymentRatio',
       'HardInquiries', 'DefaultLabel', 'TotalBalance', 'Limit', 'NumCredit',
       'NumChecking', 'NumSavings', 'Age', 'Tenure', 'CreditScore', 'HomeCity',
       'AnnualSalary', 'balance_to_limit', 'credit_utilization_change',
       'missed_payment_flag', 'payment_ratio_change', 'utilization_trend',
       'total_accounts', 'credit_to_savings_ratio', 'balance_to_salary',
       'limit_to_salary', 'payment_to_salary_ratio', 'is_high_utilization',
       'is_high_inquiry_user', 'is_rapid_credit_growth', 'Utilisation_mean',
       'Utilisation_std', 'Utilisation_max', 'PaymentRatio_mean',
       'PaymentRatio_std', 'HardInquiries_sum', 'HardInquiries_mean',
       'balance_to_limit_mean', 'balance_to_limit_max'],
      dtype='object')

In [35]:
complete.to_csv("Additional_test.csv")

In [25]:
df = pd.read_csv("Additional_features_c2.csv")
df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,CustomerID,Week,Utilisation,PaymentRatio,HardInquiries,DefaultLabel,TotalBalance,Limit,...,is_rapid_credit_growth,Utilisation_mean,Utilisation_std,Utilisation_max,PaymentRatio_mean,PaymentRatio_std,HardInquiries_sum,HardInquiries_mean,balance_to_limit_mean,balance_to_limit_max
0,0,0,C000005,1,0.0,0.0,1,0,300.342311,,...,0,0.0,0.0,0.0,0.0,0.0,12,0.461538,,
1,1,1,C000005,2,0.0,0.0,0,0,300.342311,,...,0,0.0,0.0,0.0,0.0,0.0,12,0.461538,,
2,2,2,C000005,3,0.0,0.0,1,0,300.342311,,...,0,0.0,0.0,0.0,0.0,0.0,12,0.461538,,
3,3,3,C000005,4,0.0,0.0,0,0,300.342311,,...,0,0.0,0.0,0.0,0.0,0.0,12,0.461538,,
4,4,4,C000005,5,0.0,0.0,0,0,300.342311,,...,0,0.0,0.0,0.0,0.0,0.0,12,0.461538,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13296,13296,13296,C001000,23,0.0,0.0,1,0,1187.182519,,...,0,0.0,0.0,0.0,0.0,0.0,11,0.407407,,
13297,13297,13297,C001000,24,0.0,0.0,0,0,1187.182519,,...,0,0.0,0.0,0.0,0.0,0.0,11,0.407407,,
13298,13298,13298,C001000,25,0.0,0.0,0,0,1187.182519,,...,0,0.0,0.0,0.0,0.0,0.0,11,0.407407,,
13299,13299,13299,C001000,26,0.0,0.0,0,0,1187.182519,,...,0,0.0,0.0,0.0,0.0,0.0,11,0.407407,,


In [27]:
df = pd.read_csv("customer_panel_test_merge.csv")
complete2 = pd.merge(df, agg_features, on='CustomerID', how = 'left')

In [28]:
complete2.to_csv("Additional_test.csv")