In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv("/content/drive/MyDrive/NextBuy : your perfect purchase/ratings_Beauty.csv")
df = df.drop('Timestamp', axis=1) #Dropping timestamp

In [4]:
counts = df['UserId'].value_counts()
df_filtered = df[df['UserId'].isin(counts[counts >= 50].index)]
print('The number of observations in the final data =', len(df_filtered))
print('Number of unique USERS in the final data = ', df_filtered['UserId'].nunique())
print('Number of unique PRODUCTS in the final data = ', df_filtered['ProductId'].nunique())

The number of observations in the final data = 29559
Number of unique USERS in the final data =  361
Number of unique PRODUCTS in the final data =  17228


In [7]:
pip install scikit-surprise


Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━[0m [32m112.6/154.4 kB[0m [31m3.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357279 sha256=777e8e059d4e82897d3ba05e838fdb58bd14b10925255e6568d23a92625d558f
  Stored in directory: /root/.cache/pip/wheels/4b/3f/d

In [27]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
import random


# Define the Reader object for Surprise
reader = Reader(rating_scale=(1, 5))

# Load data into Surprise Dataset
surprise_data = Dataset.load_from_df(df_filtered[["UserId", "ProductId", "Rating"]], reader)

# Split the dataset into training and testing
trainset, testset = train_test_split(surprise_data, test_size=0.2, random_state=42)

# Define the SVD model
model = SVD()

# Train the model
model.fit(trainset)

# Evaluate the model
predictions = model.test(testset)
print(f"RMSE: {accuracy.rmse(predictions)}")

# Function to recommend products for a given UserId
def recommend_products(user_id, model, trainset, n=5):
    # Get all product IDs
    product_ids = df["ProductId"].unique()

    # Predict ratings for all products not rated by the user
    user_products = trainset.ur[trainset.to_inner_uid(user_id)] if user_id in trainset.ur else []
    rated_products = [trainset.to_raw_iid(iid) for iid, _ in user_products]
    unrated_products = [pid for pid in product_ids if pid not in rated_products]

    predictions = [
        (product, model.predict(user_id, product).est) for product in unrated_products
    ]

    # Sort by predicted rating
    predictions.sort(key=lambda x: x[1], reverse=True)
    return predictions[:n]

# Recommend top 5 products for user 'A22ZFXQE8AWPEP'
user_id = "A22ZFXQE8AWPEP"
recommendations = recommend_products(user_id, model, trainset, n=5)
print(f"Top recommendations for user {user_id}: {recommendations}")


RMSE: 1.4795
RMSE: 1.4794653221143623
Top recommendations for user A22ZFXQE8AWPEP: [('0205616461', 2.9175), ('0558925278', 2.9175), ('0733001998', 2.9175), ('0737104473', 2.9175), ('0762451459', 2.9175)]


In [26]:
# Generate predictions for the testset
predictions = model.test(testset)

# Accuracy Metrics: RMSE and MAE
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)

# Precision Calculation (Precision@k)
def precision_at_k(predictions, k=10, threshold=1):
    """Compute precision at k for the predictions."""
    user_est_true = {}
    for uid, iid, true_r, est, _ in predictions:
        user_est_true.setdefault(uid, []).append((est, true_r))

    precisions = []
    for uid, user_ratings in user_est_true.items():
        # Sort predictions by estimated rating in descending order
        user_ratings.sort(key=lambda x: x[0], reverse=True)
        # Take top k predictions
        top_k = user_ratings[:k]
        # Calculate precision
        relevant = sum((true_r >= threshold) for _, true_r in top_k)
        precisions.append(relevant / k)

    return sum(precisions) / len(precisions)

# Calculate precision@k
precision = precision_at_k(predictions, k=10)

# Accuracy Calculation (Hit Ratio or Top-k Accuracy)
def accuracy_at_k(predictions, k=10, threshold=1):
    """Compute accuracy at k for the predictions (hit ratio)."""
    user_est_true = {}
    for uid, iid, true_r, est, _ in predictions:
        user_est_true.setdefault(uid, []).append((est, true_r))

    hits = 0
    for uid, user_ratings in user_est_true.items():
        # Sort predictions by estimated rating in descending order
        user_ratings.sort(key=lambda x: x[0], reverse=True)
        # Take top k predictions
        top_k = user_ratings[:k]
        # Count hits (relevant product in the top k)
        if any(true_r >= threshold for _, true_r in top_k):
            hits += 1

    # Return hit ratio (accuracy at k)
    return hits / len(user_est_true)

# Calculate accuracy@k
accuracy_k = accuracy_at_k(predictions, k=10)

# Print results
print(f"Precision@10: {precision}")
print(f"Accuracy@10 (Hit Ratio): {accuracy_k}")


RMSE: 1.4805
MAE:  1.2759
Precision@10: 0.8800000000000001
Accuracy@10 (Hit Ratio): 1.0


In [29]:
pip install implicit

Collecting implicit
  Downloading implicit-0.7.2-cp310-cp310-manylinux2014_x86_64.whl.metadata (6.1 kB)
Downloading implicit-0.7.2-cp310-cp310-manylinux2014_x86_64.whl (8.9 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/8.9 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.2/8.9 MB[0m [31m5.1 MB/s[0m eta [36m0:00:02[0m[2K   [91m━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/8.9 MB[0m [31m22.9 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━[0m [32m5.4/8.9 MB[0m [31m52.0 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m8.9/8.9 MB[0m [31m70.8 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m8.9/8.9 MB[0m [31m70.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9

In [32]:
counts = df['UserId'].value_counts()
df_final = df[df['UserId'].isin(counts[counts >= 50].index)]
print('The number of observations in the final data =', len(df_final))
print('Number of unique USERS in the final data = ', df_final['UserId'].nunique())
print('Number of unique PRODUCTS in the final data = ', df_final['ProductId'].nunique())

#Creating the interaction matrix of products and users based on ratings and replacing NaN value with 0
final_ratings_matrix = df_final.pivot(index = 'UserId', columns ='ProductId', values = 'Rating').fillna(0)
print('Shape of final_ratings_matrix: ', final_ratings_matrix.shape)

#Finding the number of non-zero entries in the interaction matrix
given_num_of_ratings = np.count_nonzero(final_ratings_matrix)
print('given_num_of_ratings = ', given_num_of_ratings)

#Finding the possible number of ratings as per the number of users and products
possible_num_of_ratings = final_ratings_matrix.shape[0] * final_ratings_matrix.shape[1]
print('possible_num_of_ratings = ', possible_num_of_ratings)

#Density of ratings
density = (given_num_of_ratings/possible_num_of_ratings)
density *= 100
print ('density: {:4.2f}%'.format(density))

final_ratings_matrix.head()

The number of observations in the final data = 29559
Number of unique USERS in the final data =  361
Number of unique PRODUCTS in the final data =  17228
Shape of final_ratings_matrix:  (361, 17228)
given_num_of_ratings =  29559
possible_num_of_ratings =  6219308
density: 0.48%


ProductId,0762451459,1304482596,1304482685,1304495396,1304511111,1304511138,1304622665,1304624498,1304651029,130466578X,...,B00L3LB0IG,B00L3LBUGI,B00L4I5PJS,B00L5BXG36,B00L5KTZ0K,B00L7BX4FE,B00LG63DOM,B00LH81A0I,B00LLPT4HI,B00LMOT7X4
UserId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A100WO06OQR8BQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A1047EDJ84IMAS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A10G136JEISLVR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A10Y59HW4O47N0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A110PQTEI6THU7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
df_filtered = df_final

In [42]:
import implicit
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming df_filtered is preloaded with columns ["UserId", "ProductId", "Rating"]

# Convert DataFrame to matrix format (user-item matrix)
df_filtered['Rating'] = df_filtered['Rating'].astype(float)  # Ensure ratings are floats

# Prepare the data for implicit (convert UserId and ProductId to index)
user_mapping = {user: idx for idx, user in enumerate(df_filtered['UserId'].unique())}
item_mapping = {item: idx for idx, item in enumerate(df_filtered['ProductId'].unique())}

df_filtered['UserId'] = df_filtered['UserId'].map(user_mapping)
df_filtered['ProductId'] = df_filtered['ProductId'].map(item_mapping)

# Create the user-item interaction matrix (matrix of ratings)
user_item_matrix = df_filtered.pivot(index='UserId', columns='ProductId', values='Rating').fillna(0)

# Convert to a sparse matrix (necessary for implicit)
from scipy.sparse import csr_matrix
user_item_matrix = csr_matrix(user_item_matrix.values)

# Train the ALS model using implicit
model = implicit.als.AlternatingLeastSquares(factors=50, regularization=0.1, iterations=30)
model.fit(user_item_matrix)

# Generate top-N recommendations for each user using the ALS model
def get_top_n_recommendations(model, user_item_matrix, n=100):
    recommendations = {}

    # For each user, get the top n recommendations
    for user_id in range(user_item_matrix.shape[0]):
        recommended_items = model.recommend(user_id, user_item_matrix[user_id], N=n)
        recommendations[user_id] = [item[0] for item in recommended_items]  # Extract product IDs

    return recommendations

# Get top 10 recommendations for each user
top_n_recommendations = get_top_n_recommendations(model, user_item_matrix, n=100)

# Evaluate the predictions: Precision@k, Accuracy@k, and confusion matrix
def precision_at_k(recommendations, df_filtered, k=100, threshold=1.0):
    """Calculate precision at k"""
    precisions = []
    for user_id, recommended_items in recommendations.items():
        # Get actual ratings for the user
        user_ratings = df_filtered[df_filtered['UserId'] == user_id]

        # Check how many of the top-k recommended items are relevant (i.e., rating >= threshold)
        relevant_items = sum((user_ratings[user_ratings['ProductId'] == item]['Rating'].values >= threshold).any() for item in recommended_items)
        precisions.append(relevant_items / k)

    return np.mean(precisions)

# Calculate precision@k
precision = precision_at_k(top_n_recommendations, df_filtered, k=100, threshold=1.0)

# Accuracy Calculation (Hit Ratio or Top-k Accuracy)
def accuracy_at_k(recommendations, df_filtered, k=100, threshold=1.0):
    """Compute accuracy at k for the predictions (hit ratio)."""
    hits = 0
    for user_id, recommended_items in recommendations.items():
        # Get actual ratings for the user
        user_ratings = df_filtered[df_filtered['UserId'] == user_id]

        # Check if any recommended item has a rating >= threshold
        if any(user_ratings[user_ratings['ProductId'] == item]['Rating'].values >= threshold for item in recommended_items):
            hits += 1

    return hits / len(recommendations)

# Calculate accuracy@k
accuracy_k = accuracy_at_k(top_n_recommendations, df_filtered, k=100, threshold=1.0)

# Confusion Matrix Plot
def plot_confusion_matrix(recommendations, df_filtered, threshold=1.0):
    """Generate a confusion matrix plot."""
    y_true = []
    y_pred = []

    # Iterate over each user and their top recommendations
    for user_id, recommended_items in recommendations.items():
        user_ratings = df_filtered[df_filtered['UserId'] == user_id]

        for item in recommended_items:
            # Check if the item has been rated and if the rating is above the threshold
            true_rating = user_ratings[user_ratings['ProductId'] == item]['Rating'].values
            if true_rating.size > 0:
                true_r = true_rating[0]
                y_true.append(1 if true_r >= threshold else 0)  # 1 if relevant, 0 if not
                y_pred.append(1)  # 1 if predicted relevant (since all recommendations are relevant)

    # Check if y_true and y_pred are non-empty before calculating confusion matrix
    if len(y_true) > 0 and len(y_pred) > 0:
        cm = confusion_matrix(y_true, y_pred)

        # Plot confusion matrix
        plt.figure(figsize=(6, 6))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Not Relevant", "Relevant"], yticklabels=["Not Relevant", "Relevant"])
        plt.xlabel("Predicted")
        plt.ylabel("True")
        plt.title("Confusion Matrix for Recommendations")
        plt.show()
    else:
        print("No relevant items found for confusion matrix.")

# Plot confusion matrix for the recommendations
plot_confusion_matrix(top_n_recommendations, df_filtered, threshold=1)

# Print Results
print(f"Precision@10: {precision}")
print(f"Accuracy@10 (Hit Ratio): {accuracy_k}")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['Rating'] = df_filtered['Rating'].astype(float)  # Ensure ratings are floats
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['UserId'] = df_filtered['UserId'].map(user_mapping)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['ProductId'] = df_filtered['ProductId'].

  0%|          | 0/30 [00:00<?, ?it/s]

  if any(user_ratings[user_ratings['ProductId'] == item]['Rating'].values >= threshold for item in recommended_items):


No relevant items found for confusion matrix.
Precision@10: 0.0
Accuracy@10 (Hit Ratio): 0.0


In [43]:
import implicit
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming df_filtered is preloaded with columns ["UserId", "ProductId", "Rating"]

# Convert DataFrame to matrix format (user-item matrix)
df_filtered['Rating'] = df_filtered['Rating'].astype(float)  # Ensure ratings are floats

# Prepare the data for implicit (convert UserId and ProductId to index)
user_mapping = {user: idx for idx, user in enumerate(df_filtered['UserId'].unique())}
item_mapping = {item: idx for idx, item in enumerate(df_filtered['ProductId'].unique())}

df_filtered['UserId'] = df_filtered['UserId'].map(user_mapping)
df_filtered['ProductId'] = df_filtered['ProductId'].map(item_mapping)

# Create the user-item interaction matrix (matrix of ratings)
user_item_matrix = df_filtered.pivot(index='UserId', columns='ProductId', values='Rating').fillna(0)

# Convert to a sparse matrix (necessary for implicit)
from scipy.sparse import csr_matrix
user_item_matrix = csr_matrix(user_item_matrix.values)

# Train the ALS model using implicit
model = implicit.als.AlternatingLeastSquares(factors=50, regularization=0.1, iterations=30)
model.fit(user_item_matrix)

# Generate top-N recommendations for each user using the ALS model
def get_top_n_recommendations(model, user_item_matrix, n=100):
    recommendations = {}

    # For each user, get the top n recommendations
    for user_id in range(user_item_matrix.shape[0]):
        recommended_items = model.recommend(user_id, user_item_matrix[user_id], N=n)
        recommendations[user_id] = [item[0] for item in recommended_items]  # Extract product IDs

    return recommendations

# Get top 100 recommendations for each user
top_n_recommendations = get_top_n_recommendations(model, user_item_matrix, n=100)

# Evaluate the predictions: Precision@k, Accuracy@k, and confusion matrix
def precision_at_k(recommendations, df_filtered, k=100, threshold=4.0):
    """Calculate precision at k"""
    precisions = []
    for user_id, recommended_items in recommendations.items():
        # Get actual ratings for the user
        user_ratings = df_filtered[df_filtered['UserId'] == user_id]

        # Check how many of the top-k recommended items are relevant (i.e., rating >= threshold)
        relevant_items = sum((user_ratings[user_ratings['ProductId'] == item]['Rating'].values >= threshold).any() for item in recommended_items)
        precisions.append(relevant_items / k)

    return np.mean(precisions)

# Calculate precision@k
precision = precision_at_k(top_n_recommendations, df_filtered, k=100, threshold=4.0)

# Accuracy Calculation (Hit Ratio or Top-k Accuracy)
def accuracy_at_k(recommendations, df_filtered, k=100, threshold=4.0):
    """Compute accuracy at k for the predictions (hit ratio)."""
    hits = 0
    for user_id, recommended_items in recommendations.items():
        # Get actual ratings for the user
        user_ratings = df_filtered[df_filtered['UserId'] == user_id]

        # Check if any recommended item has a rating >= threshold
        if any(user_ratings[user_ratings['ProductId'] == item]['Rating'].values >= threshold for item in recommended_items):
            hits += 1

    return hits / len(recommendations)

# Calculate accuracy@k
accuracy_k = accuracy_at_k(top_n_recommendations, df_filtered, k=100, threshold=4.0)

# Confusion Matrix Plot
def plot_confusion_matrix(recommendations, df_filtered, threshold=4.0):
    """Generate a confusion matrix plot."""
    y_true = []
    y_pred = []

    # Iterate over each user and their top recommendations
    for user_id, recommended_items in recommendations.items():
        user_ratings = df_filtered[df_filtered['UserId'] == user_id]

        for item in recommended_items:
            # Check if the item has been rated and if the rating is above the threshold
            true_rating = user_ratings[user_ratings['ProductId'] == item]['Rating'].values
            if true_rating.size > 0:
                true_r = true_rating[0]
                y_true.append(1 if true_r >= threshold else 0)  # 1 if relevant, 0 if not
                y_pred.append(1)  # 1 if predicted relevant (since all recommendations are relevant)

    # Check if y_true and y_pred are non-empty before calculating confusion matrix
    if len(y_true) > 0 and len(y_pred) > 0:
        cm = confusion_matrix(y_true, y_pred)

        # Plot confusion matrix
        plt.figure(figsize=(6, 6))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Not Relevant", "Relevant"], yticklabels=["Not Relevant", "Relevant"])
        plt.xlabel("Predicted")
        plt.ylabel("True")
        plt.title("Confusion Matrix for Recommendations")
        plt.show()
    else:
        print("No relevant items found for confusion matrix.")

# Plot confusion matrix for the recommendations
plot_confusion_matrix(top_n_recommendations, df_filtered, threshold=4.0)

# Print Results
print(f"Precision@100: {precision}")
print(f"Accuracy@100 (Hit Ratio): {accuracy_k}")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['Rating'] = df_filtered['Rating'].astype(float)  # Ensure ratings are floats
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['UserId'] = df_filtered['UserId'].map(user_mapping)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['ProductId'] = df_filtered['ProductId'].

  0%|          | 0/30 [00:00<?, ?it/s]

  if any(user_ratings[user_ratings['ProductId'] == item]['Rating'].values >= threshold for item in recommended_items):


No relevant items found for confusion matrix.
Precision@100: 0.0
Accuracy@100 (Hit Ratio): 0.0


In [73]:
import implicit
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming df_filtered is preloaded with columns ["UserId", "ProductId", "Rating"]

# Convert DataFrame to matrix format (user-item matrix)
df_filtered['Rating'] = df_filtered['Rating'].astype(float)  # Ensure ratings are floats

# Prepare the data for implicit (convert UserId and ProductId to index)
user_mapping = {user: idx for idx, user in enumerate(df_filtered['UserId'].unique())}
item_mapping = {item: idx for idx, item in enumerate(df_filtered['ProductId'].unique())}

df_filtered['UserId'] = df_filtered['UserId'].map(user_mapping)
df_filtered['ProductId'] = df_filtered['ProductId'].map(item_mapping)

# Create the user-item interaction matrix (matrix of ratings)
user_item_matrix = df_filtered.pivot(index='UserId', columns='ProductId', values='Rating').fillna(0)

# Convert to a sparse matrix (necessary for implicit)
from scipy.sparse import csr_matrix
user_item_matrix = csr_matrix(user_item_matrix.values)

# Train the ALS model using implicit
model = implicit.als.AlternatingLeastSquares(factors=50, regularization=0.1, iterations=300)
model.fit(user_item_matrix)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['Rating'] = df_filtered['Rating'].astype(float)  # Ensure ratings are floats
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['UserId'] = df_filtered['UserId'].map(user_mapping)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['ProductId'] = df_filtered['ProductId'].

  0%|          | 0/300 [00:00<?, ?it/s]

In [69]:
# Generate top-N recommendations for each user using the ALS model
def get_top_n_recommendations(model, user_item_matrix, n=100):
    recommendations = {}

    # For each user, get the top n recommendations
    for user_id in range(user_item_matrix.shape[0]):
        recommended_items = model.recommend(user_id, user_item_matrix[user_id], N=n)
        recommendations[user_id] = [item[0] for item in recommended_items]  # Extract product IDs

    return recommendations

# Get top 5 recommendations for each user
top_n_recommendations = get_top_n_recommendations(model, user_item_matrix, n=5)
print(top_n_recommendations )

{0: [14428, 0.6274052], 1: [428, 0.21543236], 2: [10400, 0.68802005], 3: [7613, 0.5074364], 4: [361, 0.42129558], 5: [509, 0.38867068], 6: [6587, 0.75204396], 7: [7616, 0.71334815], 8: [11015, 0.42829704], 9: [4498, 0.616676], 10: [7613, 0.41019118], 11: [4961, 0.13681795], 12: [3284, 0.4411508], 13: [428, 0.5541724], 14: [6926, 0.52293545], 15: [14596, 0.35026467], 16: [12582, 0.79402816], 17: [7164, 0.5173761], 18: [14594, 0.7077342], 19: [1111, 0.79471684], 20: [1969, 0.6142772], 21: [361, 0.57589734], 22: [8229, 0.54617095], 23: [428, 0.40278733], 24: [14601, 1.0317107], 25: [5138, 0.390365], 26: [16573, 0.8133471], 27: [4028, 0.4660818], 28: [14421, 1.0008128], 29: [14585, 0.5580976], 30: [3680, 0.6309422], 31: [11259, 0.51106596], 32: [9129, 0.69173956], 33: [13024, 0.47320268], 34: [9606, 0.5870061], 35: [6970, 0.4348214], 36: [12267, 0.6422789], 37: [14582, 0.80249894], 38: [1674, 0.3999472], 39: [8847, 0.56084675], 40: [1030, 0.47897455], 41: [7616, 0.5325122], 42: [6329, 0.57

Score: This is the predicted rating or preference score for the user-product pair, indicating how much the model believes the user will like the item. This score is typically based on the latent factors the model has learned during training (such as user and product preferences).
<br>

Score Interpretation:
Higher score: Indicates that the product is more relevant or preferable for the user.<br>
Lower score: Indicates that the product is less relevant or less preferred for the user.

In [78]:
import random

# Function to get recommendations for a specific user ID
def get_user_recommendations(user_id, model, user_item_matrix, n=2):
    """Generate top-N recommendations for a specific user."""
    recommended_items = model.recommend(user_id, user_item_matrix[user_id], N=n)
    return recommended_items

# Select two random user IDs from the dataset
random_user_ids = random.sample(df_filtered['UserId'].unique().tolist(), 4)

# Get recommendations for each random user
for user_id in random_user_ids:
    recommended_items = get_user_recommendations(user_id, model, user_item_matrix, n=2)  # Request recommendation
    print(f"Recommendations for User {user_id}:")
    for rec in recommended_items:
        item_id, score = rec[0], rec[1]  # Unpack the item ID and score from the tuple

        # Check if the item_id exists in the item_mapping and handle if not
        if item_id in item_mapping.values():
            # Reverse the product ID mapping to get the original ProductId
            original_product_id = [key for key, value in item_mapping.items() if value == item_id][0]
            print(f"Product ID: {original_product_id}")
    print("\n")


Recommendations for User 31:
Product ID: 4161


Recommendations for User 357:
Product ID: 12123


Recommendations for User 206:
Product ID: 11031


Recommendations for User 190:
Product ID: 4600


