In [1]:
# 1. Import Libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
import joblib

In [2]:
# 2. Load Dataset
df = pd.read_csv('../data/customer_data.csv')

In [3]:
# 3. Handle Missing Values
imputer = SimpleImputer(strategy='mean')
df[['Age', 'Purchase_Amount', 'Review_Rating', 'Shopping_Experience']] = imputer.fit_transform(
    df[['Age', 'Purchase_Amount', 'Review_Rating', 'Shopping_Experience']]
)


In [4]:
# 4. Encode Categorical Variables
df['Gender'] = LabelEncoder().fit_transform(df['Gender'])
df['Subscription_Status'] = LabelEncoder().fit_transform(df['Subscription_Status'])
df['Discount_Applied'] = LabelEncoder().fit_transform(df['Discount_Applied'])
df['Product_Category'] = LabelEncoder().fit_transform(df['Product_Category'])

In [5]:
# 5. Features & Target
X = df.drop('Product_Category', axis=1)
y = df['Product_Category']

In [6]:
# 6. Scale Numerical Features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [8]:
# 📌 11. Balance Dataset with SMOTE
smote = SMOTE(random_state=42, k_neighbors=1)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)


In [9]:
# 8. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42
)

In [10]:
# 9. K-Means Clustering
kmeans = KMeans(n_clusters=3, random_state=42)
df['Cluster'] = kmeans.fit_predict(X_scaled)
joblib.dump(kmeans, '../models/kmeans_model.pkl')

['../models/kmeans_model.pkl']

In [11]:
# 10. KNN Classifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
print('KNN Classification Report:\n', classification_report(y_test, y_pred_knn))
joblib.dump(knn, '../models/knn_model.pkl')

KNN Classification Report:
               precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       0.00      0.00      0.00         2
           2       0.00      0.00      0.00         0

    accuracy                           0.33         3
   macro avg       0.17      0.33      0.22         3
weighted avg       0.17      0.33      0.22         3



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


['../models/knn_model.pkl']

In [12]:
# 11. Decision Tree
dtree = DecisionTreeClassifier()
dtree.fit(X_train, y_train)
y_pred_tree = dtree.predict(X_test)
print('Decision Tree Report:\n', classification_report(y_test, y_pred_tree))
joblib.dump(dtree, '../models/decision_tree_model.pkl')

Decision Tree Report:
               precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       1.00      0.50      0.67         2

    accuracy                           0.67         3
   macro avg       0.75      0.75      0.67         3
weighted avg       0.83      0.67      0.67         3



['../models/decision_tree_model.pkl']

In [13]:
# 12. Logistic Regression
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)
y_pred_log = log_reg.predict(X_test)
print('Logistic Regression Report:\n', classification_report(y_test, y_pred_log))
joblib.dump(log_reg, '../models/logistic_regression_model.pkl')

Logistic Regression Report:
               precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       1.00      0.50      0.67         2

    accuracy                           0.67         3
   macro avg       0.75      0.75      0.67         3
weighted avg       0.83      0.67      0.67         3



['../models/logistic_regression_model.pkl']

In [14]:
# 13. Save Scaler
joblib.dump(scaler, '../models/scaler.pkl')

print("Models trained & saved successfully!")

Models trained & saved successfully!


In [18]:
import os
import joblib

# ✅ Create models folder if needed
if not os.path.exists('models'):
    os.makedirs('models')

# ✅ Save your trained models
joblib.dump(knn, 'models/knn_model.pkl')
joblib.dump(kmeans, 'models/kmeans_model.pkl')
joblib.dump(scaler, 'models/scaler.pkl')

print("All models saved successfully in /models/")


All models saved successfully in /models/
