In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, roc_auc_score, classification_report
)
from sklearn.utils import resample
import joblib



df = pd.read_excel("product_purchase_.xlsx")
print("Shape:", df.shape)
print(df.head())



Shape: (200, 6)
   TimeOnSite   Age  Gender  AdsClicked  PreviousPurchases  Purchase
0    8.467391  41.0  Female         8.0                3.0       1.0
1   18.816166  41.0  Female         7.0                1.0       0.0
2    7.457470  18.0    Male         1.0                NaN       1.0
3   11.136754  63.0    Male         NaN                2.0       0.0
4    5.102389  55.0  Female         3.0                2.0       1.0


In [3]:
df.drop_duplicates(inplace=True)
df.fillna(df.median(numeric_only=True), inplace=True)
df.fillna(df.mode().iloc[0], inplace=True)


if 'Gender' in df.columns:
    le = LabelEncoder()
    df['Gender'] = le.fit_transform(df['Gender'].astype(str))
else:
    le = None

In [4]:

target_col = None
for c in df.columns:
    if c.lower() in ['purchase', 'bought', 'target', 'label']:
        target_col = c
        break
if target_col is None:
    raise ValueError(" No target column found! Add a 'Purchase' or 'Bought' column.")


if df[target_col].nunique() > 2:
    df[target_col] = (df[target_col] > 0).astype(int)



print("\nClass distribution before balancing:")
print(df[target_col].value_counts())

df_majority = df[df[target_col] == 0]
df_minority = df[df[target_col] == 1]

if len(df_minority) > 0 and len(df_majority) / len(df_minority) > 1.5:
    df_minority_upsampled = resample(
        df_minority,
        replace=True,
        n_samples=len(df_majority),
        random_state=42
    )
    df = pd.concat([df_majority, df_minority_upsampled])
    print("\n Dataset balanced using upsampling.")
else:
    print("\n Dataset already balanced or nearly balanced.")


Class distribution before balancing:
Purchase
0    113
1     87
Name: count, dtype: int64

 Dataset already balanced or nearly balanced.


In [5]:


X = df.drop(columns=[target_col])
y = df[target_col]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)




scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [6]:


log_reg = LogisticRegression(max_iter=1000, random_state=42)
log_reg.fit(X_train_scaled, y_train)

tree = DecisionTreeClassifier(random_state=42)
tree.fit(X_train, y_train)




y_pred_log = log_reg.predict(X_test_scaled)
y_pred_tree = tree.predict(X_test)

In [7]:


def evaluate_model(name, y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, zero_division=0)
    rec = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)
    auc = roc_auc_score(y_true, y_pred)

    print(f"\n {name} PERFORMANCE")
    print(f"Accuracy  : {acc:.4f}")
    print(f"Precision : {prec:.4f}")
    print(f"Recall    : {rec:.4f}")
    print(f"F1-score  : {f1:.4f}")
    print(f"ROC-AUC   : {auc:.4f}")
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print("\nClassification Report:\n", classification_report(y_true, y_pred))


    return acc, prec, rec, f1, auc




evaluate_model("Logistic Regression", y_test, y_pred_log)
evaluate_model("Decision Tree", y_test, y_pred_tree)




results = pd.DataFrame({
    "Model": ["Logistic Regression", "Decision Tree"],
    "Accuracy": [
        accuracy_score(y_test, y_pred_log),
        accuracy_score(y_test, y_pred_tree)
    ],
    "F1 Score": [
        f1_score(y_test, y_pred_log),
        f1_score(y_test, y_pred_tree)
    ]
})

print("\nMODEL COMPARISON SUMMARY")
print(results)
best_model = results.loc[results['Accuracy'].idxmax(), 'Model']
print(f"\nBest Performing Model: {best_model}")




 Logistic Regression PERFORMANCE
Accuracy  : 0.6250
Precision : 0.5714
Recall    : 0.2500
F1-score  : 0.3478
ROC-AUC   : 0.5625
Confusion Matrix:
 [[21  3]
 [12  4]]

Classification Report:
               precision    recall  f1-score   support

           0       0.64      0.88      0.74        24
           1       0.57      0.25      0.35        16

    accuracy                           0.62        40
   macro avg       0.60      0.56      0.54        40
weighted avg       0.61      0.62      0.58        40


 Decision Tree PERFORMANCE
Accuracy  : 0.5750
Precision : 0.4706
Recall    : 0.5000
F1-score  : 0.4848
ROC-AUC   : 0.5625
Confusion Matrix:
 [[15  9]
 [ 8  8]]

Classification Report:
               precision    recall  f1-score   support

           0       0.65      0.62      0.64        24
           1       0.47      0.50      0.48        16

    accuracy                           0.57        40
   macro avg       0.56      0.56      0.56        40
weighted avg       0.58

In [8]:


joblib.dump(log_reg, "model.joblib")
joblib.dump(scaler, "scaler.joblib")
if le:
    joblib.dump(le, "le_gender.joblib")

In [None]:
import sys
import subprocess

try:
    import gradio as gr
except ModuleNotFoundError:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "gradio"])
    import gradio as gr

import numpy as np
import joblib

model = joblib.load("model.joblib")
scaler = joblib.load("scaler.joblib")
le = joblib.load("le_gender.joblib")

def predict_purchase(time_on_site, age, gender, ads_clicked, prev_purchases):
    if gender not in le.classes_:
        return "‚ùå Invalid gender. Choose Male, Female, or Other."
    g = le.transform([gender])[0]
    X = np.array([[time_on_site, age, g, ads_clicked, prev_purchases]])
    X_scaled = scaler.transform(X)
    prob = model.predict_proba(X_scaled)[0][1]
    pred = model.predict(X_scaled)[0]
    result = "‚úÖ Will Buy" if pred == 1 else "‚ùå Will Not Buy"
    return f"{result} (Probability: {prob:.2f})"

interface = gr.Interface(
    fn=predict_purchase,
    inputs=[
        gr.Number(label="Time on Website (minutes)"),
        gr.Number(label="Age"),
        gr.Radio(["Male", "Female", "Other"], label="Gender"),
        gr.Number(label="Ads Clicked"),
        gr.Number(label="Previous Purchases")
    ],
    outputs="text",
    title="üõí Product Purchase Likelihood Predictor",
    description="Enter customer details to predict if they will buy the product.",
    allow_flagging="never"
)

interface.launch()


  from .autonotebook import tqdm as notebook_tqdm


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




