In [1]:
import numpy as np
import pandas as pd
import pickle
import re
import string
import emoji

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.sequence import pad_sequences

from sklearn.model_selection import train_test_split 
from sklearn.metrics import mean_squared_error, mean_absolute_error, precision_score, recall_score, f1_score
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.preprocessing import MinMaxScaler

ModuleNotFoundError: No module named 'emoji'

In [None]:
data_path = './assets/data_dummy.xlsx'
data = pd.read_excel(data_path)

display(data.sample(n=20, random_state=5))

In [None]:
# Function to create pivot table
def createPivot(data, fillVal = None):
  pivot_table = data.pivot_table(index='user_id', columns='laundry_id', values='rating')
  return pivot_table.fillna(fillVal) if fillVal is not None else pivot_table

In [None]:
random_state = 10
test_size = 0.30 

# Split data into train and test sets
train, test = train_test_split(data, test_size=test_size, random_state=random_state)
test = test[test['user_id'].isin(train['user_id'])]

# Create a pivot table
train_pivot = createPivot(train, 0)
test_pivot = createPivot(test, 0)

# Convert ratings to binary values
train_binary = train.copy()
train_binary['rating'] = train_binary['rating'].apply(lambda x: 0 if x >= 1 else 1)
train_binary = createPivot(train_binary, 1)

test_binary = test.copy()
test_binary['rating'] = test_binary['rating'].apply(lambda x: 1 if x >= 1 else 0)
test_binary = createPivot(test_binary, 0)

In [None]:
# Compute mean ratings
mean = np.nanmean(createPivot(train), axis=1)

# Subtract mean from train pivot table
train_subtracted_by_mean = (createPivot(train).T - mean).T
train_subtracted_by_mean.fillna(0, inplace=True)

# Calculate correlation between user
correlation = 1 - pairwise_distances(train_subtracted_by_mean, metric='cosine')
correlation[np.isnan(correlation)] = 0

correlation_df = pd.DataFrame(correlation)
correlation_df['user_id'] = train_subtracted_by_mean.index
correlation_df.set_index('user_id',inplace=True)
correlation_df.columns = train_subtracted_by_mean.index.tolist()

correlation_df.shape, train_pivot.shape

In [None]:
# Compute predicted ratings
predicted_rating = np.dot(correlation, train_pivot)
final_rating = np.multiply(predicted_rating, train_binary)

final_rating

In [None]:
# Filter user correlation only for users in the test set
correlation_test_df = correlation_df[correlation_df.index.isin(test.user_id)]
correlation_test_df = correlation_test_df[list(set(test.user_id))]

# Get test user predicted ratings
test_user_predicted_ratings = np.dot(correlation_test_df, test_pivot)
test_user_predicted_ratings *= test_binary
test_user_predicted_ratings[test_user_predicted_ratings <= 0] = np.nan

# Scale the predicted ratings between 1 and 5
scaler = MinMaxScaler(feature_range=(1, 5))
test_user_predicted_ratings = scaler.fit_transform(test_user_predicted_ratings)

# Metric evaluation
actual_ratings = createPivot(test)

mse = np.nanmean((actual_ratings - test_user_predicted_ratings)**2)
rmse = np.sqrt(mse)
mae = np.nanmean(np.abs(actual_ratings - test_user_predicted_ratings))

print("[Metric Evaluation]")
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("Mean Absolute Error (MAE):", mae)

In [None]:
def getReviewData(x):
  product_sentiments = data[data['laundry_id'] == x]['sentiment_score']
  sentiment_score = product_sentiments.mean()
  return sentiment_score

def rankRecommendation(final_rating, user_id, data, no_rec):
  user_ratings = final_rating.loc[user_id]
  recommendation = user_ratings.sort_values(ascending=False)[:no_rec]

  recommendation_table = pd.DataFrame(recommendation).reset_index().rename(columns={user_id: 'predicted_ratings'})
  recommendation_table.insert(0, 'user_id', user_id)
  # recommendation_table = recommendation_table[recommendation_table['predicted_ratings'] != 0]
  # display(recommendation_table)

  recommendation_table['sentiment_score'] = recommendation_table['laundry_id'].apply(getReviewData)
  recommendation_table['product_ranking_score'] = recommendation_table['predicted_ratings'] + recommendation_table['sentiment_score']
  display(recommendation_table.sort_values(by='product_ranking_score', ascending=False).head(no_rec))

In [None]:
user_id = 25
no_rec = 15

recommendation_table = rankRecommendation(final_rating, user_id, data, no_rec)