In [4]:
import pandas as pd
from catboost import CatBoostClassifier, Pool
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt


In [5]:
df = pd.read_csv("extended_customer_prediction_data.csv")

X = df.drop(columns = ['Name', 'ProductBought'])
y = df['ProductBought']

categorical_features = ['Region', 'Language', 'Profession', 'OwnsSmartphone', 'MaritalStatus']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

train_pool = Pool(X_train, y_train, cat_features = categorical_features)
test_pool = Pool(X_test, y_test, cat_features = categorical_features)

model = CatBoostClassifier(iterations = 300, depth = 10, learning_rate = 0.05, verbose = 0)
model.fit(train_pool)

y_pred = model.predict(test_pool)


model.save_model("catboost_model.cbm")



accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
print(df['ProductBought'].value_counts())

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='weighted'))
print("Recall:", recall_score(y_test, y_pred, average='weighted'))
print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))





Model Accuracy: 0.90
ProductBought
Loan               28
Credit Card        24
Savings Account    14
Insurance          10
Digital Wallet     10
Mutual Fund        10
Name: count, dtype: int64
Accuracy: 0.9
Precision: 0.9400000000000001
Recall: 0.9
F1 Score: 0.8958333333333333


In [12]:
import pandas as pd
from catboost import CatBoostClassifier, Pool

# Define categorical columns based on your dataset
cat_features = [
    "Name", "Region", "Language", "Profession",
    "OwnsSmartphone", "MaritalStatus"
]

# Load trained model
model = CatBoostClassifier()
model.load_model("catboost_model.cbm")

# Get user input
print("Enter customer details:\n")

name = input("Name: ")
region = input("Region (Village/City/Metro): ")
language = input("Language (Hindi/English/Kannada/Tamil): ")
profession = input("Profession (Student/Businessman/Shopkeeper/Teacher/Farmer): ")
income_str = input("Monthly Income (e.g., 85000): ")
age_str = input("Age (e.g., 45): ")
owns_smartphone = input("Owns Smartphone? (Yes/No): ")
internet_hours_str = input("Internet Usage Hours per day (e.g., 3): ")
marital_status = input("Marital Status (Single/Married): ")

# Convert numeric fields safely
try:
    income = float(income_str)
    age = int(age_str)
    internet_hours = float(internet_hours_str)
except ValueError:
    print("⚠️ Please enter valid numeric values for income, age, and internet usage hours.")
    exit()

# Prepare DataFrame for prediction
customer_data = pd.DataFrame([{
    "Name": name,
    "Region": region,
    "Language": language,
    "Profession": profession,
    "Income": income,
    "Age": age,
    "OwnsSmartphone": owns_smartphone,
    "InternetUsageHours": internet_hours,
    "MaritalStatus": marital_status
}])

# Create a Pool for CatBoost
input_pool = Pool(customer_data, cat_features=cat_features)

# Predict
prediction = model.predict(input_pool)
print(f"\n🎯 Recommended Product: {prediction[0]}")

Enter customer details:




🎯 Recommended Product: ['Digital Wallet']


In [None]:
import pandas as pd
import ast
from catboost import CatBoostClassifier, Pool
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# ------------------------- Load Trained CatBoost Model -------------------------
cat_features = [
    "Name", "Region", "Language", "Profession",
    "OwnsSmartphone", "MaritalStatus"
]

# Load model
model = CatBoostClassifier()
model.load_model("catboost_model.cbm")

# ------------------------- Get User Input -------------------------
print("Enter customer details:\n")

name = input("Name: ")
region = input("Region (Village/City/Metro): ")
language = input("Language (Hindi/English/Kannada/Tamil): ")
profession = input("Profession (Student/Businessman/Shopkeeper/Teacher/Farmer): ")
income_str = input("Monthly Income (e.g., 85000): ")
age_str = input("Age (e.g., 45): ")
owns_smartphone = input("Owns Smartphone? (Yes/No): ")
internet_hours_str = input("Internet Usage Hours per day (e.g., 3): ")
marital_status = input("Marital Status (Single/Married): ")

# Convert numeric inputs safely
try:
    income = float(income_str)
    age = int(age_str)
    internet_hours = float(internet_hours_str)
except ValueError:
    print(" Please enter valid numeric values for income, age, and internet usage hours.")
    exit()

# Prepare DataFrame for CatBoost prediction
customer_data = pd.DataFrame([{
    "Name": name,
    "Region": region,
    "Language": language,
    "Profession": profession,
    "Income": income,
    "Age": age,
    "OwnsSmartphone": owns_smartphone,
    "InternetUsageHours": internet_hours,
    "MaritalStatus": marital_status
}])

input_pool = Pool(customer_data, cat_features=cat_features)
prediction = model.predict(input_pool)
predicted_product = str(prediction[0])

print(f"\n  Recommended Product: {predicted_product}")

# ------------------------- Apriori-based Suggestions -------------------------

# Define valid digital products (used by your Gromo app)
digital_products = [
    "Life Insurance",
    "Health Insurance",
    "Credit Card",
    "Demat Account",
    "Loan",
    "Mutual Fund",
    "Savings Account",
    "Personal Accident Cover",
    "Car Insurance",
    "Bike Insurance"
]

# Load association transaction data
df = pd.read_csv("association_transactions.csv")
df['Products'] = df['Products'].apply(ast.literal_eval)

# Encode transactions
te = TransactionEncoder()
te_ary = te.fit(df['Products']).transform(df['Products'])
df_trans = pd.DataFrame(te_ary, columns=te.columns_)

# Generate frequent itemsets and rules
frequent_itemsets = apriori(df_trans, min_support=0.01, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.05)
strong_rules = rules[(rules['lift'] > 1.0) & (rules['leverage'] > 0)]

# Filter based on the predicted product
related = strong_rules[strong_rules['antecedents'].apply(lambda x: predicted_product in list(x))]
all_suggested = pd.Series([item for s in related['consequents'] for item in s]).unique()

# Filter to show only digital products (excluding the predicted one)
# filtered_suggestions = [p for p in all_suggested if p in digital_products and p != predicted_product]


# if filtered_suggestions:
#     print("\n Other digital products often bought together:")
#     for product in filtered_suggestions:
#         print("-", product)
# else:
#     print("\n No related digital product found for this recommendation.")

related = strong_rules[strong_rules['antecedents'].apply(lambda x: predicted_product in list(x))]

if related.empty:
    # fallback: show products frequently bought together with predicted_product (support-wise)
    candidate_sets = frequent_itemsets[frequent_itemsets['itemsets'].apply(lambda x: predicted_product in x and len(x) > 1)]
    if not candidate_sets.empty:
        all_suggested = pd.Series([item for s in candidate_sets['itemsets'] for item in s if item != predicted_product]).unique()
    else:
        all_suggested = []
else:
    all_suggested = pd.Series([item for s in related['consequents'] for item in s]).unique()

filtered_suggestions = [p for p in all_suggested if p in digital_products and p != predicted_product]

if not filtered_suggestions:
    # final fallback: suggest top N digital products overall (excluding predicted)
    filtered_suggestions = [p for p in digital_products if p != predicted_product][:5]

print("\n Other digital products often bought together:")
for product in filtered_suggestions:
    print("-", product)



Enter customer details:


🎯 Recommended Product: ['Digital Wallet']

🛍️ Other digital products often bought together:
- Life Insurance
- Health Insurance
- Credit Card
- Demat Account
- Loan
