In [1]:
import pandas as pd

df = pd.read_csv("../data/cleaned_transactions.csv")
df['TransactionDate'] = pd.to_datetime(df['TransactionDate'])


In [None]:
#Built customer level profile
customer_profile = (
    df
    .groupby('CustomerID')
    .agg(
        Total_Transactions=('TransactionID', 'count'),
        Avg_Transaction_Amount=('TransactionAmount', 'mean'),
        Avg_Account_Balance=('AccountBalance', 'mean'),
        Avg_RiskScore=('RiskScore', 'mean'),
        Avg_Tenure_Months=('TenureMonths', 'mean')
    )
    .reset_index()
)

customer_profile.head()


Unnamed: 0,CustomerID,Total_Transactions,Avg_Transaction_Amount,Avg_Account_Balance,Avg_RiskScore,Avg_Tenure_Months
0,CUST1042,8,47720.753365,80435.16753,0.4403,99.875
1,CUST1114,8,48225.32074,60127.893845,0.495418,120.75
2,CUST1121,4,99551.279378,83213.472732,0.432653,96.0
3,CUST1189,3,39113.13963,63648.890237,0.509515,144.333333
4,CUST1223,2,33888.2329,88186.197605,0.506309,106.5


In [None]:
#Added credit rating
credit_rating = (
    df
    .groupby('CustomerID')['CreditRating']
    .agg(lambda x: x.mode().iloc[0])
    .reset_index()
)

customer_profile = customer_profile.merge(
    credit_rating,
    on='CustomerID',
    how='left'
)

customer_profile.head()


Unnamed: 0,CustomerID,Total_Transactions,Avg_Transaction_Amount,Avg_Account_Balance,Avg_RiskScore,Avg_Tenure_Months,CreditRating
0,CUST1042,8,47720.753365,80435.16753,0.4403,99.875,309
1,CUST1114,8,48225.32074,60127.893845,0.495418,120.75,349
2,CUST1121,4,99551.279378,83213.472732,0.432653,96.0,411
3,CUST1189,3,39113.13963,63648.890237,0.509515,144.333333,677
4,CUST1223,2,33888.2329,88186.197605,0.506309,106.5,320


In [None]:
#Created Risk Segments
def risk_segment(score):
    if score <= 3:
        return 'Low Risk'
    elif score <= 6:
        return 'Medium Risk'
    else:
        return 'High Risk'

customer_profile['Risk_Segment'] = customer_profile['Avg_RiskScore'].apply(risk_segment)

customer_profile[['CustomerID', 'Avg_RiskScore', 'Risk_Segment']].head()


Unnamed: 0,CustomerID,Avg_RiskScore,Risk_Segment
0,CUST1042,0.4403,Low Risk
1,CUST1114,0.495418,Low Risk
2,CUST1121,0.432653,Low Risk
3,CUST1189,0.509515,Low Risk
4,CUST1223,0.506309,Low Risk


In [5]:
#Customer distribution by risk
risk_distribution = customer_profile['Risk_Segment'].value_counts().reset_index()
risk_distribution.columns = ['Risk_Segment', 'Customer_Count']

risk_distribution


Unnamed: 0,Risk_Segment,Customer_Count
0,Low Risk,188


In [6]:
#Saving outputs
customer_profile.to_excel(
    "../excel_outputs/Task3_Customer_Profile.xlsx",
    index=False
)

risk_distribution.to_excel(
    "../excel_outputs/Task3_Risk_Distribution.xlsx",
    index=False
)

print("Task 3 Excel files saved successfully")


Task 3 Excel files saved successfully


## Insights
-Customers differed significantly in transaction volume, balance stability, and tenure.
-High-risk customers generally exhibited irregular transaction behavior and unstable balances.
-Customer segmentation enables targeted monitoring and personalized financial strategies.