In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report


In [2]:
# Load the dataset
data = pd.read_csv('customer.csv')



In [3]:
# Drop non-feature columns
data_cleaned = data.drop(columns=['AID', 'Name'])

# Encode categorical variables
data_encoded = pd.get_dummies(data_cleaned, columns=['Gender', 'Occupation', 'Marital Status'])



In [4]:
# Define features and targets
features = [
    'Age', 'Income', 'Credit Score', 
    'Gender_Female', 'Gender_Male', 'Gender_Others',
    'Occupation_Business', 'Occupation_Job', 'Occupation_Retired', 'Occupation_Student',
    'Marital Status_Divorced', 'Marital Status_Married', 'Marital Status_Single'
]

target = [
    'Home Loan', 'Vehicle Loan', 'Personal Loan', 'Education Loan', 'Equity Funds',
    'Debt Funds', 'Hybrid Funds', 'Index Funds', 'Short-term FD', 'Long-term FD',
    'Tax-savings FD', 'Senior Citizen FD', 'Health Insurance', 'Life Insurance', 
    'Car Insurance', 'Home Insurance', 'Stocks Investments', 'Bonds', 'Real-Estates', 
    'Commodities', 'Retirement Policy', 'Child Policy', 'Term Policy', 'Endowment Policy'
]



In [5]:
# Split data into features and targets
X = data_encoded[features]
y = data_encoded[target]



In [6]:
# Standardize numeric features
scaler = StandardScaler()
X[['Age', 'Income', 'Credit Score']] = scaler.fit_transform(X[['Age', 'Income', 'Credit Score']])



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[['Age', 'Income', 'Credit Score']] = scaler.fit_transform(X[['Age', 'Income', 'Credit Score']])


In [7]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [8]:
# Train the model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
model = MultiOutputClassifier(rf)
model.fit(X_train, y_train)



In [9]:
# Make predictions
y_pred = model.predict(X_test)

In [10]:
from sklearn.metrics import accuracy_score


# Calculate overall accuracy
overall_accuracy = accuracy_score(y_test, y_pred)

# Print overall accuracy
print(f"Overall Accuracy: {overall_accuracy:.4f}")

Overall Accuracy: 0.7619


In [11]:
# Evaluate the model
for i, col in enumerate(target):
    print(f"Evaluating: {col}")
    print(classification_report(y_test[col], y_pred[:, i]))

Evaluating: Home Loan
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         6
           1       1.00      1.00      1.00        15

    accuracy                           1.00        21
   macro avg       1.00      1.00      1.00        21
weighted avg       1.00      1.00      1.00        21

Evaluating: Vehicle Loan
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         6
           1       1.00      1.00      1.00        15

    accuracy                           1.00        21
   macro avg       1.00      1.00      1.00        21
weighted avg       1.00      1.00      1.00        21

Evaluating: Personal Loan
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         9
           1       1.00      1.00      1.00        12

    accuracy                           1.00        21
   macro avg       1.00      1.00      1.00        21
w

In [12]:
# Example: Take input from user for testing
def get_user_input():
    # Example input - adjust as needed
    return pd.DataFrame({
        'Age': [70],
        'Income': [100000],
        'Credit Score': [8],
        'Gender_Female': [0],
        'Gender_Male': [1],
        'Gender_Others': [0],
        'Occupation_Business': [0],
        'Occupation_Job': [0],
        'Occupation_Retired': [1],
        'Occupation_Student': [0],
        'Marital Status_Divorced': [0],
        'Marital Status_Married': [1],
        'Marital Status_Single': [0]
    }).reindex(columns=features, fill_value=0)

In [13]:

# Standardize the new customer data
new_customer = get_user_input()
new_customer[['Age', 'Income', 'Credit Score']] = scaler.transform(new_customer[['Age', 'Income', 'Credit Score']])

In [14]:
# Predict for the new customer
predictions = model.predict(new_customer)

In [15]:
# Format the recommendations
recommendations_df = pd.DataFrame(predictions, columns=target).iloc[0]
print("Recommended Services and Scores:")
for service, score in recommendations_df.items():
    print(f"{service}: {score:.2f}")

Recommended Services and Scores:
Home Loan: 0.00
Vehicle Loan: 0.00
Personal Loan: 0.00
Education Loan: 0.00
Equity Funds: 1.00
Debt Funds: 1.00
Hybrid Funds: 1.00
Index Funds: 1.00
Short-term FD: 0.00
Long-term FD: 1.00
Tax-savings FD: 1.00
Senior Citizen FD: 1.00
Health Insurance: 1.00
Life Insurance: 1.00
Car Insurance: 1.00
Home Insurance: 1.00
Stocks Investments: 1.00
Bonds: 1.00
Real-Estates: 1.00
Commodities: 1.00
Retirement Policy: 1.00
Child Policy: 1.00
Term Policy: 1.00
Endowment Policy: 1.00
