In [17]:
pip install imbalanced-learn

Note: you may need to restart the kernel to use updated packages.


In [20]:
pip install --upgrade scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [21]:
pip install --upgrade imbalanced-learn

Collecting imbalanced-learn
  Obtaining dependency information for imbalanced-learn from https://files.pythonhosted.org/packages/9d/41/721fec82606242a2072ee909086ff918dfad7d0199a9dfd4928df9c72494/imbalanced_learn-0.13.0-py3-none-any.whl.metadata
  Downloading imbalanced_learn-0.13.0-py3-none-any.whl.metadata (8.8 kB)
Collecting sklearn-compat<1,>=0.1 (from imbalanced-learn)
  Obtaining dependency information for sklearn-compat<1,>=0.1 from https://files.pythonhosted.org/packages/f0/a8/ad69cf130fbd017660cdd64abbef3f28135d9e2e15fe3002e03c5be0ca38/sklearn_compat-0.1.3-py3-none-any.whl.metadata
  Downloading sklearn_compat-0.1.3-py3-none-any.whl.metadata (18 kB)
Downloading imbalanced_learn-0.13.0-py3-none-any.whl (238 kB)
   ---------------------------------------- 0.0/238.4 kB ? eta -:--:--
   ------ -------------------------------- 41.0/238.4 kB 991.0 kB/s eta 0:00:01
   ---------------------------------------  235.5/238.4 kB 2.9 MB/s eta 0:00:01
   -------------------------------------

# TRAIN

In [5]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import pickle
import os

#  upload
df = pd.read_csv(
    r"C:\Users\mehme\OneDrive\Desktop\w1991607_MehmetSezer_E.C\ecom\store\data\ecommerce_behavior_data.csv",
    nrows=10000,  # daha fazla satır çekiyoruz
    usecols=['user_id', 'category_code', 'event_type']
)

# category 
def map_category(category_code):
    if pd.isna(category_code):
        return 'unknown'
    if category_code.startswith('electronics'):
        return 'Electronics'
    if category_code.startswith('appliances') or category_code.startswith('furniture'):
        return 'Home&Living'
    if category_code.startswith('apparel'):
        return 'Fashion'
    if category_code.startswith('sport'):
        return 'Sports&Outdoor'
    if category_code.startswith('toys'):
        return 'Toys&Hobbies'
    if category_code.startswith('automotive'):
        return 'Automotive&DIY'
    if category_code.startswith('beauty'):
        return 'Health&Personal Care'
    if category_code.startswith('books'):
        return 'Books&Stationery'
    return 'unknown'

df['mapped_category'] = df['category_code'].apply(map_category)

#  'unknown'  cleaning 
df = df[df['mapped_category'] != 'unknown']

user_features = df.groupby(['user_id', 'event_type']).size().unstack(fill_value=0)
user_features['favorite_category'] = df.groupby('user_id')['mapped_category'].agg(lambda x: x.value_counts().idxmax())

#Fill in any missing event_type columns
for col in ['view', 'cart', 'purchase']:
    if col not in user_features.columns:
        user_features[col] = 0

# Separate features
X = user_features[['view', 'cart', 'purchase']]
y = user_features['favorite_category']

#Smote
if len(X) >= 6:
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    user_features_balanced = pd.DataFrame(X_resampled, columns=['view', 'cart', 'purchase'])
    user_features_balanced['favorite_category'] = y_resampled.values
else:
    user_features_balanced = user_features.copy()

# Let's standardize the features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(user_features_balanced[['view', 'cart', 'purchase']])

#Clustering with KMeans
kmeans = KMeans(n_clusters=5, random_state=42, n_init=10)
user_features_balanced['cluster'] = kmeans.fit_predict(scaled_features)

# Global popular categories
global_popular_categories = ['Electronics', 'Home&Living', 'Fashion', 'Sports&Outdoor', 'Toys&Hobbies', 'Health&Personal Care', 'Books&Stationery']

# Category suggestion function for the user
def recommend_categories_for_user(view, cart, purchase, favorite_category):
    user_input = scaler.transform([[view, cart, purchase]])
    cluster = kmeans.predict(user_input)[0]

    # Users in the same cluster
    cluster_users = user_features_balanced[user_features_balanced['cluster'] == cluster]
    category_counts = cluster_users['favorite_category'].value_counts()

    top_categories = category_counts.index.tolist()

    # Let's highlight the favorite category
    if favorite_category not in top_categories:
        top_categories.insert(0, favorite_category)

    # Let's choose 7 categories without repeats
    seen = set()
    unique_categories = []
    for cat in top_categories:
        if cat not in seen:
            unique_categories.append(cat)
            seen.add(cat)
        if len(unique_categories) == 7:
            break

    # If it's still not 7, let's complete it from global popular ones.
    for cat in global_popular_categories:
        if len(unique_categories) == 7:
            break
        if cat not in seen:
            unique_categories.append(cat)
            seen.add(cat)

    return unique_categories

#Get data from user
def get_user_input():
    print("Please enter the following information: ")
    view = int(input("How many products viewed?: "))
    cart = int(input("How many products did you add to your cart?: "))
    purchase = int(input("How many products did he buy?: "))

    print("\nEnter one of the following categories: ")
    print("Electronics, Home&Living, Sports&Outdoor, Toys&Hobbies, Fashion, Health&Personal Care, Books&Stationery")
    favorite_category = input("Enter your favorite category: ")

    return view, cart, purchase, favorite_category

# run
view, cart, purchase, favorite_category = get_user_input()
recommended_categories = recommend_categories_for_user(view, cart, purchase, favorite_category)

print("\nRecommended categories for the user:")
for idx, category in enumerate(recommended_categories, 1):
    print(f"{idx}. {category}")


def save_model(kmeans_model, scaler_model, directory="models"):
    # if there is not such a models folder create
    if not os.path.exists(directory):
        os.makedirs(directory)

    # saving 
    model_filename = os.path.join(directory, "ecommerce_recommendation_model.pkl")
    with open(model_filename, 'wb') as f:
        pickle.dump({'kmeans': kmeans_model, 'scaler': scaler_model}, f)
    print(f"Model ve scaler '{model_filename}' saved.")

# saving the model
save_model(kmeans, scaler, "models")

# Function to load the model
def load_model(directory="models"):
    model_filename = os.path.join(directory, "ecommerce_recommendation_model.pkl")
    
    with open(model_filename, 'rb') as f:
        model_data = pickle.load(f)
    return model_data['kmeans'], model_data['scaler']

# load the model
kmeans_loaded, scaler_loaded = load_model("models")
print("Model and scaler loaded successfully.")

recommended_categories = recommend_categories_for_user(view, cart, purchase, favorite_category)

print("\nRecommended categories for the user (uploaded model):")
for idx, category in enumerate(recommended_categories, 1):
    print(f"{idx}. {category}")



Please enter the following information: 
How many products viewed?: 50
How many products did you add to your cart?: 10
How many products did he buy?: 3

Enter one of the following categories: 
Electronics, Home&Living, Sports&Outdoor, Toys&Hobbies, Fashion, Health&Personal Care, Books&Stationery
Enter your favorite category: Fashion

Recommended categories for the user:
1. Fashion
2. Electronics
3. Home&Living
4. Sports&Outdoor
5. Toys&Hobbies
6. Health&Personal Care
7. Books&Stationery
Model ve scaler 'models\ecommerce_recommendation_model.pkl' saved.
Model and scaler loaded successfully.

Recommended categories for the user (uploaded model):
1. Fashion
2. Electronics
3. Home&Living
4. Sports&Outdoor
5. Toys&Hobbies
6. Health&Personal Care
7. Books&Stationery


