In [4]:
import pandas as pd
from catboost import CatBoostClassifier, Pool
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt


In [5]:
df = pd.read_csv("extended_customer_prediction_data.csv")

X = df.drop(columns = ['Name', 'ProductBought'])
y = df['ProductBought']

categorical_features = ['Region', 'Language', 'Profession', 'OwnsSmartphone', 'MaritalStatus']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

train_pool = Pool(X_train, y_train, cat_features = categorical_features)
test_pool = Pool(X_test, y_test, cat_features = categorical_features)

model = CatBoostClassifier(iterations = 300, depth = 10, learning_rate = 0.05, verbose = 0)
model.fit(train_pool)

y_pred = model.predict(test_pool)


model.save_model("catboost_model.cbm")



accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
print(df['ProductBought'].value_counts())

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='weighted'))
print("Recall:", recall_score(y_test, y_pred, average='weighted'))
print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))





Model Accuracy: 0.90
ProductBought
Loan               28
Credit Card        24
Savings Account    14
Insurance          10
Digital Wallet     10
Mutual Fund        10
Name: count, dtype: int64
Accuracy: 0.9
Precision: 0.9400000000000001
Recall: 0.9
F1 Score: 0.8958333333333333


In [19]:
import pandas as pd
from catboost import CatBoostClassifier, Pool

# Define categorical columns based on your dataset
cat_features = [
    "Name", "Region", "Language", "Profession",
    "OwnsSmartphone", "MaritalStatus"
]

# Load trained model
model = CatBoostClassifier()
model.load_model("catboost_model.cbm")

# Get user input
print("Enter customer details:\n")

name = input("Name: ")
region = input("Region (Village/City/Metro): ")
language = input("Language (Hindi/English/Kannada/Tamil): ")
profession = input("Profession (Student/Businessman/Shopkeeper/Teacher/Farmer): ")
income_str = input("Monthly Income (e.g., 85000): ")
age_str = input("Age (e.g., 45): ")
owns_smartphone = input("Owns Smartphone? (Yes/No): ")
internet_hours_str = input("Internet Usage Hours per day (e.g., 3): ")
marital_status = input("Marital Status (Single/Married): ")

# Convert numeric fields safely
try:
    income = float(income_str)
    age = int(age_str)
    internet_hours = float(internet_hours_str)
except ValueError:
    print("⚠️ Please enter valid numeric values for income, age, and internet usage hours.")
    exit()

# Prepare DataFrame for prediction
customer_data = pd.DataFrame([{
    "Name": name,
    "Region": region,
    "Language": language,
    "Profession": profession,
    "Income": income,
    "Age": age,
    "OwnsSmartphone": owns_smartphone,
    "InternetUsageHours": internet_hours,
    "MaritalStatus": marital_status
}])

# Create a Pool for CatBoost
input_pool = Pool(customer_data, cat_features=cat_features)

# Predict
prediction = model.predict(input_pool)
print(f"\n🎯 Recommended Product: {prediction[0][0]}")

Enter customer details:




🎯 Recommended Product: Credit Card


In [21]:
import pandas as pd
import ast

# For testing, set predicted_product manually
predicted_product = str(prediction[0][0]).strip()


csv_path = "product_relation_dataset.csv"

# Load CSV with headers
csv_df = pd.read_csv(csv_path)

# Strip whitespaces in MainProduct column to avoid mismatch
csv_df['MainProduct'] = csv_df['MainProduct'].str.strip()

# Print loaded CSV for debug (remove/comment this after debugging)
# print("CSV Data:")
# print(csv_df)

# Find matching row
match_row = csv_df[csv_df['MainProduct'].str.lower() == predicted_product.lower()]

if not match_row.empty:
    # Safely convert string list to Python list
    related_products = ast.literal_eval(match_row.iloc[0]['AdditionalProducts'])
    print(f"\n🎯 Predicted Product: {predicted_product}")
    print("\n📦 Other digital products often bought together:")
    for product in related_products:
        print("-", product)
else:
    print(f"\n⚠️ No related products found for '{predicted_product}'. Check spelling and CSV content.")



🎯 Predicted Product: Credit Card

📦 Other digital products often bought together:
- Loan
- Savings Account
- Demat Account
