In [34]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.multioutput import MultiOutputClassifier
from xgboost import XGBClassifier

In [35]:
# Load the dataset
df = pd.read_excel('/content/Customer.xlsx', 'Customer')
df.head()

Unnamed: 0,AID,Name,Gender,Age,Occupation,Income,Marital Status,Credit Score,Home Loan,Vehicle Loan,...,Car Insurance,Home Insurance,Stocks Investments,Bonds,Real-Estates,Commodities,Retirement Policy,Child Policy,Term Policy,Endowment Policy
0,121,Lassi,Male,22,Student,50000,Single,0.0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,122,Rani,Female,35,Job,800000,Married,7.5,1,1,...,1,1,1,1,1,1,0,1,1,0
2,123,Vijay,Male,42,Business,1200000,Married,8.5,1,1,...,1,1,1,1,1,1,1,1,1,0
3,124,Sunita,Others,30,Job,600000,Single,6.8,1,1,...,1,1,1,1,1,1,0,0,1,0
4,125,Arjun,Male,55,Business,1500000,Married,9.2,1,1,...,1,1,1,1,1,1,1,1,1,1


In [36]:
# Remove 'Name' and 'AID' columns
df = df.drop(columns=['Name', 'AID'])
# One-hot encode categorical features
df = pd.get_dummies(df, columns=['Gender', 'Marital Status', 'Occupation'])
df.head()

Unnamed: 0,Age,Income,Credit Score,Home Loan,Vehicle Loan,Personal Loan,Education Loan,Equity Funds,Debt Funds,Hybrid Funds,...,Gender_Female,Gender_Male,Gender_Others,Marital Status_Divorced,Marital Status_Married,Marital Status_Single,Occupation_Business,Occupation_Job,Occupation_Retired,Occupation_Student
0,22,50000,0.0,0,0,0,1,1,0,0,...,False,True,False,False,False,True,False,False,False,True
1,35,800000,7.5,1,1,0,0,1,1,1,...,True,False,False,False,True,False,False,True,False,False
2,42,1200000,8.5,1,1,1,0,1,1,1,...,False,True,False,False,True,False,True,False,False,False
3,30,600000,6.8,1,1,0,0,1,1,1,...,False,False,True,False,False,True,False,True,False,False
4,55,1500000,9.2,1,1,1,0,1,1,1,...,False,True,False,False,True,False,True,False,False,False


In [37]:
# Scale the Credit Score, Age, and Income
scaler = StandardScaler()
df[['Credit Score', 'Age', 'Income']] = scaler.fit_transform(df[['Credit Score', 'Age', 'Income']])
df.head()

Unnamed: 0,Age,Income,Credit Score,Home Loan,Vehicle Loan,Personal Loan,Education Loan,Equity Funds,Debt Funds,Hybrid Funds,...,Gender_Female,Gender_Male,Gender_Others,Marital Status_Divorced,Marital Status_Married,Marital Status_Single,Occupation_Business,Occupation_Job,Occupation_Retired,Occupation_Student
0,-1.38358,-1.906645,-2.78596,0,0,0,1,1,0,0,...,False,True,False,False,False,True,False,False,False,True
1,-0.331065,0.366385,0.347935,1,1,0,0,1,1,1,...,True,False,False,False,True,False,False,True,False,False
2,0.235673,1.578668,0.765787,1,1,1,0,1,1,1,...,False,True,False,False,True,False,True,False,False,False
3,-0.735878,-0.239756,0.055438,1,1,0,0,1,1,1,...,False,False,True,False,False,True,False,True,False,False
4,1.288188,2.48788,1.058284,1,1,1,0,1,1,1,...,False,True,False,False,True,False,True,False,False,False


In [38]:
# Define features and targets
features = ['Age', 'Income', 'Credit Score', 'Gender_Female', 'Gender_Male', 'Gender_Others',
            'Marital Status_Divorced', 'Marital Status_Married', 'Marital Status_Single',
            'Occupation_Business', 'Occupation_Job', 'Occupation_Retired', 'Occupation_Student']

targets = ['Home Loan', 'Vehicle Loan', 'Personal Loan', 'Education Loan', 'Equity Funds', 'Debt Funds',
           'Hybrid Funds', 'Index Funds', 'Short-term FD', 'Long-term FD', 'Tax-savings FD',
           'Senior Citizen FD', 'Health Insurance', 'Life Insurance', 'Car Insurance', 'Home Insurance',
           'Stocks Investments', 'Bonds', 'Real-Estates', 'Commodities', 'Retirement Policy',
           'Child Policy', 'Term Policy', 'Endowment Policy']

X = df[features]
y = df[targets]


In [39]:
# Ensure all target values are binary and start from 0
for target in targets:
    label_encoder = LabelEncoder()
    y[target] = label_encoder.fit_transform(y[target])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y[target] = label_encoder.fit_transform(y[target])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y[target] = label_encoder.fit_transform(y[target])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y[target] = label_encoder.fit_transform(y[target])
A value is trying to be set on a copy of a slice fro

In [40]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [41]:
# Initialize MultiOutputClassifier with XGBClassifier
xgb_model = MultiOutputClassifier(XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss'))

# Train the model
xgb_model.fit(X_train, y_train)

# Make predictions
xgb_pred = xgb_model.predict(X_test)

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encode

In [47]:
from sklearn.metrics import accuracy_score


# Calculate overall accuracy
overall_accuracy = accuracy_score(y_test, xgb_pred)

# Print overall accuracy
print(f"Overall Accuracy: {overall_accuracy:.4f}")


Overall Accuracy: 0.6667


In [48]:

# Calculate and display individual metrics for each target column for the XGBoost model
for i, column in enumerate(targets):
    accuracy = accuracy_score(y_test[column], xgb_pred[:, i])
    precision = precision_score(y_test[column], xgb_pred[:, i], zero_division=0)
    recall = recall_score(y_test[column], xgb_pred[:, i], zero_division=0)
    f1 = f1_score(y_test[column], xgb_pred[:, i], zero_division=0)

    print(f"Metrics for {column}:")
    print(f"  Accuracy: {accuracy:.4f}")
    print(f"  Precision: {precision:.4f}")
    print(f"  Recall: {recall:.4f}")
    print(f"  F1-Score: {f1:.4f}")
    print()

# Example of making recommendations for a new customer
new_customer = pd.DataFrame({
    'Age': [65],
    'Income': [650000],
    'Credit Score': [8],  # Use the original value before scaling
    'Gender_Female': [0],
    'Gender_Male': [1],
    'Gender_Others': [0],
    'Marital Status_Divorced': [0],
    'Marital Status_Married': [1],
    'Marital Status_Single': [0],
    'Occupation_Business': [1],
    'Occupation_Job': [0],
    'Occupation_Retired': [0],
    'Occupation_Student': [0]
})

Metrics for Home Loan:
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1-Score: 1.0000

Metrics for Vehicle Loan:
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1-Score: 1.0000

Metrics for Personal Loan:
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1-Score: 1.0000

Metrics for Education Loan:
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1-Score: 1.0000

Metrics for Equity Funds:
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1-Score: 1.0000

Metrics for Debt Funds:
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1-Score: 1.0000

Metrics for Hybrid Funds:
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1-Score: 1.0000

Metrics for Index Funds:
  Accuracy: 1.0000
  Precision: 0.0000
  Recall: 0.0000
  F1-Score: 0.0000

Metrics for Short-term FD:
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1-Score: 1.0000

Metrics for Long-term FD:
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  

In [49]:
# Scale the features of the new customer using the previously fitted scaler
new_customer[['Credit Score', 'Age', 'Income']] = scaler.transform(new_customer[['Credit Score', 'Age', 'Income']])

# Get predictions for the new customer
xgb_recommendations = xgb_model.predict(new_customer)[0]

# Print recommendations
print("Recommendations from XGBoost:")
for target, recommendation in zip(targets, xgb_recommendations):
    print(f"{target}: {'1' if recommendation == 1 else '0'}")


Recommendations from XGBoost:
Home Loan: 1
Vehicle Loan: 1
Personal Loan: 0
Education Loan: 0
Equity Funds: 1
Debt Funds: 1
Hybrid Funds: 1
Index Funds: 0
Short-term FD: 1
Long-term FD: 1
Tax-savings FD: 1
Senior Citizen FD: 0
Health Insurance: 0
Life Insurance: 0
Car Insurance: 1
Home Insurance: 1
Stocks Investments: 0
Bonds: 1
Real-Estates: 1
Commodities: 1
Retirement Policy: 1
Child Policy: 1
Term Policy: 1
Endowment Policy: 0
