In [None]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics.pairwise import cosine_similarity

from sklearn.metrics import mean_squared_error

In [None]:
df = pd.read_csv('interactions_information.csv', header=None)

df.columns = ['user_id', 'product_id', 'categorie', 'sub_categorie','rating']

df_copy = df.copy(deep=True)

In [None]:
rows, columns = df.shape
print("No of rows = ", rows)
print("No of columns = ", columns)

In [None]:
df.info()

In [None]:
df.isna().sum()

In [None]:
plt.figure(figsize = (12,6))
df['rating'].value_counts(1).plot(kind='bar')
plt.show()

In [None]:
print('Number of unique USERS in Raw data = ', df['user_id'].nunique())
print('Number of unique ITEMS in Raw data = ', df['product_id'].nunique())

In [None]:
most_rated = df.groupby('user_id').size().sort_values(ascending=False)[:10]
most_rated

In [None]:
counts = df['user_id'].value_counts()
df_final = df[df['user_id'].isin(counts[counts >= 50].index)]

In [None]:
print('The number of observations in the final data =', len(df_final))
print('Number of unique USERS in the final data = ', df_final['user_id'].nunique())
print('Number of unique PRODUCTS in the final data = ', df_final['product_id'].nunique())

In [None]:
aggregated_df = df_final.groupby(['user_id', 'product_id'])['rating'].mean().reset_index()

In [None]:
final_ratings_matrix = aggregated_df.pivot_table(index='user_id', columns='product_id', values='rating', fill_value=0)


In [None]:
given_num_of_ratings = np.count_nonzero(final_ratings_matrix)
possible_num_of_ratings = final_ratings_matrix.shape[0] * final_ratings_matrix.shape[1]
density = (given_num_of_ratings / possible_num_of_ratings) * 100

print('Shape of final_ratings_matrix:', final_ratings_matrix.shape)
print('given_num_of_ratings =', given_num_of_ratings)
print('possible_num_of_ratings =', possible_num_of_ratings)
print('density: {:4.2f}%'.format(density))

In [None]:
final_ratings_matrix.head()


In [None]:
final_ratings_matrix['user_index'] = np.arange(0, final_ratings_matrix.shape[0])
final_ratings_matrix.set_index(['user_index'], inplace=True)

# Actual ratings given by users
final_ratings_matrix.head()


In [None]:
# defining a function to get similar users
def similar_users(user_index, interactions_matrix):
    similarity = []
    for user in range(0, interactions_matrix.shape[0]): #  .shape[0] gives number of rows

        #finding cosine similarity between the user_id and each user
        sim = cosine_similarity([interactions_matrix.loc[user_index]], [interactions_matrix.loc[user]])

        #Appending the user and the corresponding similarity score with user_id as a tuple
        similarity.append((user,sim))

    similarity.sort(key=lambda x: x[1], reverse=True)
    most_similar_users = [tup[0] for tup in similarity] #Extract the user from each tuple in the sorted list
    similarity_score = [tup[1] for tup in similarity] ##Extracting the similarity score from each tuple in the sorted list

    #Remove the original user and its similarity score and keep only other similar users
    most_similar_users.remove(user_index)
    similarity_score.remove(similarity_score[0])

    return most_similar_users, similarity_score

In [None]:
similar = similar_users(3,final_ratings_matrix)[0][0:10]
similar


In [None]:
similar_users(3,final_ratings_matrix)[1][0:10]

In [None]:
similar = similar_users(152, final_ratings_matrix)[0][0:10]
similar

In [None]:
#Print the similarity score
similar_users(152,final_ratings_matrix)[1][0:10]

In [None]:
def recommendations(user_index, num_of_products, interactions_matrix):

    #Saving similar users using the function similar_users defined above
    most_similar_users = similar_users(user_index, interactions_matrix)[0]

    #Finding product IDs with which the user_id has interacted
    prod_ids = set(list(interactions_matrix.columns[np.where(interactions_matrix.loc[user_index] > 0)]))
    recommendations = []

    observed_interactions = prod_ids.copy()
    for similar_user in most_similar_users:
        if len(recommendations) < num_of_products:

            #Finding 'n' products which have been rated by similar users but not by the user_id
            similar_user_prod_ids = set(list(interactions_matrix.columns[np.where(interactions_matrix.loc[similar_user] > 0)]))
            recommendations.extend(list(similar_user_prod_ids.difference(observed_interactions)))
            observed_interactions = observed_interactions.union(similar_user_prod_ids)
        else:
            break

    return recommendations[:num_of_products]

In [None]:
recommendations(3,5,final_ratings_matrix)

In [None]:
recommendations(152,5,final_ratings_matrix)

In [None]:
from scipy.sparse import csr_matrix
final_ratings_sparse = csr_matrix(final_ratings_matrix.values)



In [None]:
from scipy.sparse.linalg import svds
U, s, Vt = svds(final_ratings_sparse, k = 250) # here k is the number of latent features

# Construct diagonal array in SVD
sigma = np.diag(s)

In [None]:
U.shape

In [None]:
sigma.shape


In [None]:
Vt.shape

In [None]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt)

# Predicted ratings
preds_df = pd.DataFrame(abs(all_user_predicted_ratings), columns = final_ratings_matrix.columns)
preds_df.head()
preds_matrix = csr_matrix(preds_df.values)

In [None]:
import numpy as np

def recommend_items(user_index, interactions_matrix, preds_matrix, num_recommendations):

    # Get the user's ratings from the actual and predicted interaction matrices
    user_ratings = interactions_matrix[user_index,:].toarray().reshape(-1)
    user_predictions = preds_matrix[user_index,:].toarray().reshape(-1)

    #Creating a dataframe with actual and predicted ratings columns
    temp = pd.DataFrame({'user_ratings': user_ratings, 'user_predictions': user_predictions})
    temp['Recommended Products'] = np.arange(len(user_ratings))
    temp = temp.set_index('Recommended Products')

    #Filtering the dataframe where actual ratings are 0 which implies that the user has not interacted with that product
    temp = temp.loc[temp.user_ratings == 0]

    #Recommending products with top predicted ratings
    temp = temp.sort_values('user_predictions',ascending=False)#Sort the dataframe by user_predictions in descending order
    print('\nBelow are the recommended products for user(user_id = {}):\n'.format(user_index))
    print(temp['user_predictions'].head(num_recommendations))

In [None]:
#Enter 'user index' and 'num_recommendations' for the user
recommend_items(1,final_ratings_sparse,preds_matrix,5)


In [None]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics.pairwise import cosine_similarity

from sklearn.metrics import mean_squared_error

In [None]:
df = pd.read_csv('interactions_information.csv', header=None)

df.columns = ['user_id', 'product_id', 'categorie', 'sub_categorie','rating']

df_copy = df.copy(deep=True)

In [None]:
rows, columns = df.shape
print("No of rows = ", rows)
print("No of columns = ", columns)

In [None]:
df.info()

In [None]:
df.isna().sum()

In [None]:
plt.figure(figsize = (12,6))
df['rating'].value_counts(1).plot(kind='bar')
plt.show()

In [None]:
print('Number of unique USERS in Raw data = ', df['user_id'].nunique())
print('Number of unique ITEMS in Raw data = ', df['product_id'].nunique())

In [None]:
most_rated = df.groupby('user_id').size().sort_values(ascending=False)[:10]
most_rated

In [None]:
counts = df['user_id'].value_counts()
df_final = df[df['user_id'].isin(counts[counts >= 50].index)]

In [None]:
print('The number of observations in the final data =', len(df_final))
print('Number of unique USERS in the final data = ', df_final['user_id'].nunique())
print('Number of unique PRODUCTS in the final data = ', df_final['product_id'].nunique())

In [None]:
aggregated_df = df_final.groupby(['user_id', 'product_id'])['rating'].mean().reset_index()

In [None]:
final_ratings_matrix = aggregated_df.pivot_table(index='user_id', columns='product_id', values='rating', fill_value=0)


In [None]:
given_num_of_ratings = np.count_nonzero(final_ratings_matrix)
possible_num_of_ratings = final_ratings_matrix.shape[0] * final_ratings_matrix.shape[1]
density = (given_num_of_ratings / possible_num_of_ratings) * 100

print('Shape of final_ratings_matrix:', final_ratings_matrix.shape)
print('given_num_of_ratings =', given_num_of_ratings)
print('possible_num_of_ratings =', possible_num_of_ratings)
print('density: {:4.2f}%'.format(density))

In [None]:
final_ratings_matrix.head()


In [None]:
final_ratings_matrix['user_index'] = np.arange(0, final_ratings_matrix.shape[0])
final_ratings_matrix.set_index(['user_index'], inplace=True)

# Actual ratings given by users
final_ratings_matrix.head()


In [None]:
# defining a function to get similar users
def similar_users(user_index, interactions_matrix):
    similarity = []
    for user in range(0, interactions_matrix.shape[0]): #  .shape[0] gives number of rows

        #finding cosine similarity between the user_id and each user
        sim = cosine_similarity([interactions_matrix.loc[user_index]], [interactions_matrix.loc[user]])

        #Appending the user and the corresponding similarity score with user_id as a tuple
        similarity.append((user,sim))

    similarity.sort(key=lambda x: x[1], reverse=True)
    most_similar_users = [tup[0] for tup in similarity] #Extract the user from each tuple in the sorted list
    similarity_score = [tup[1] for tup in similarity] ##Extracting the similarity score from each tuple in the sorted list

    #Remove the original user and its similarity score and keep only other similar users
    most_similar_users.remove(user_index)
    similarity_score.remove(similarity_score[0])

    return most_similar_users, similarity_score

In [None]:
similar_users(3,final_ratings_matrix)[1][0:10]

In [None]:
similar = similar_users(152, final_ratings_matrix)[0][0:10]
similar

In [None]:
#Print the similarity score
similar_users(152,final_ratings_matrix)[1][0:10]

In [None]:
def recommendations(user_index, num_of_products, interactions_matrix):

    #Saving similar users using the function similar_users defined above
    most_similar_users = similar_users(user_index, interactions_matrix)[0]

    #Finding product IDs with which the user_id has interacted
    prod_ids = set(list(interactions_matrix.columns[np.where(interactions_matrix.loc[user_index] > 0)]))
    recommendations = []

    observed_interactions = prod_ids.copy()
    for similar_user in most_similar_users:
        if len(recommendations) < num_of_products:

            #Finding 'n' products which have been rated by similar users but not by the user_id
            similar_user_prod_ids = set(list(interactions_matrix.columns[np.where(interactions_matrix.loc[similar_user] > 0)]))
            recommendations.extend(list(similar_user_prod_ids.difference(observed_interactions)))
            observed_interactions = observed_interactions.union(similar_user_prod_ids)
        else:
            break

    return recommendations[:num_of_products]

In [None]:
recommendations(3,5,final_ratings_matrix)

In [None]:
recommendations(152,5,final_ratings_matrix)

In [None]:
from scipy.sparse import csr_matrix
final_ratings_sparse = csr_matrix(final_ratings_matrix.values)



In [None]:
from scipy.sparse.linalg import svds
U, s, Vt = svds(final_ratings_sparse, k = 250) # here k is the number of latent features

# Construct diagonal array in SVD
sigma = np.diag(s)

In [None]:
U.shape

In [None]:
sigma.shape


In [None]:
Vt.shape

In [None]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt)

# Predicted ratings
preds_df = pd.DataFrame(abs(all_user_predicted_ratings), columns = final_ratings_matrix.columns)
preds_df.head()
preds_matrix = csr_matrix(preds_df.values)

In [None]:
import numpy as np

def recommend_items(user_index, interactions_matrix, preds_matrix, num_recommendations):

    # Get the user's ratings from the actual and predicted interaction matrices
    user_ratings = interactions_matrix[user_index,:].toarray().reshape(-1)
    user_predictions = preds_matrix[user_index,:].toarray().reshape(-1)

    #Creating a dataframe with actual and predicted ratings columns
    temp = pd.DataFrame({'user_ratings': user_ratings, 'user_predictions': user_predictions})
    temp['Recommended Products'] = np.arange(len(user_ratings))
    temp = temp.set_index('Recommended Products')

    #Filtering the dataframe where actual ratings are 0 which implies that the user has not interacted with that product
    temp = temp.loc[temp.user_ratings == 0]

    #Recommending products with top predicted ratings
    temp = temp.sort_values('user_predictions',ascending=False)#Sort the dataframe by user_predictions in descending order
    print('\nBelow are the recommended products for user(user_id = {}):\n'.format(user_index))
    print(temp['user_predictions'].head(num_recommendations))

In [None]:
#Enter 'user index' and 'num_recommendations' for the user
recommend_items(1,final_ratings_sparse,preds_matrix,5)


In [None]:

recommend_items(100,final_ratings_sparse,preds_matrix,10)

In [None]:
final_ratings_matrix['user_index'] = np.arange(0, final_ratings_matrix.shape[0])
final_ratings_matrix.set_index(['user_index'], inplace=True)

# Actual ratings given by users
final_ratings_matrix.head()

In [None]:
average_rating = final_ratings_matrix.mean()
average_rating.head()


In [None]:
preds_df.head()


In [None]:
final_ratings_matrix = aggregated_df.pivot_table(index='user_id', columns='product_id', values='rating', fill_value=0)

product_id_mapping = {idx: product_id for idx, product_id in enumerate(final_ratings_matrix.columns)}


In [None]:
def recommend_items_with_user_id(user_id, interactions_matrix, preds_matrix, num_recommendations, product_id_mapping, final_ratings_matrix):
    print(final_ratings_matrix.index.get_loc('AG1'))
    user_index = final_ratings_matrix.index.get_loc(user_id)
    print(user_index)
    user_ratings = interactions_matrix[user_index, :].toarray().reshape(-1)
    user_predictions = preds_matrix[user_index, :].toarray().reshape(-1)
    
    
    temp = pd.DataFrame({'user_ratings': user_ratings, 'user_predictions': user_predictions})
    temp['Recommended Products'] = np.arange(len(user_ratings))
    temp = temp.set_index('Recommended Products')

    temp = temp.loc[temp.user_ratings == 0]
    temp = temp.sort_values('user_predictions', ascending=False)

    recommended_product_indices = temp.index[:num_recommendations]

    user_id = final_ratings_matrix.index[user_index]  # Get the user ID

    print(f'\nBelow are the recommended products for user (user_id = {user_id}, user_index = {user_index}):')

    for idx, product_index in enumerate(recommended_product_indices):
        product_id = product_id_mapping[product_index]
        user_id_for_product = find_user_id_for_product(user_index, product_id, final_ratings_matrix)
        if user_id_for_product:
            print(f"Product ID: {product_id}")
        else:
            print(f"Product ID: {product_id}")

def find_user_id_for_product(user_index, product_id, final_ratings_matrix):
    # Get the user ID for the given user_index
    user_id = final_ratings_matrix.index[user_index]
    
    # Check if the user has interacted with the product
    if final_ratings_matrix.loc[user_id, product_id] != 0:
        return user_id
    else:
        return None

In [None]:
rmse_df = pd.concat([average_rating, avg_preds], axis=1)

rmse_df.columns = ['Avg_actual_ratings', 'Avg_predicted_ratings']

rmse_df.head()

In [None]:
RMSE=mean_squared_error(rmse_df['Avg_actual_ratings'], rmse_df['Avg_predicted_ratings'], squared=False)
print(f'RMSE SVD Model = {RMSE} \n')


In [None]:
user_id = 'AG81'  # Change this to the desired user_index
num_recommendations = 10  # Change this to the desired number of recommendations
user_id_type = type(user_id).__name__
print(f"User ID Type: {user_id_type}")
recommend_items_with_user_id(user_id, final_ratings_sparse, preds_matrix, num_recommendations, product_id_mapping, final_ratings_matrix)


In [None]:
# Print the user-item interactions matrix again

final_ratings_matrix['user_index'] = np.arange(0, final_ratings_matrix.shape[0])
final_ratings_matrix.set_index(['user_index'], inplace=True)

# Actual ratings given by users
final_ratings_matrix.head()

In [None]:
# Calculate the average rating for each product
average_rating = final_ratings_matrix.mean()
average_rating.head()


In [None]:
# Display the predicted ratings DataFrame again
preds_df.head()

In [None]:
# Calculate the average predicted ratings
avg_preds=preds_df.mean()
avg_preds.head()


In [None]:
# Create a DataFrame to compare average actual and predicted ratings

rmse_df = pd.concat([average_rating, avg_preds], axis=1)

rmse_df.columns = ['Avg_actual_ratings', 'Avg_predicted_ratings']

rmse_df.head()

In [None]:
# Calculate RMSE for the SVD model
RMSE=mean_squared_error(rmse_df['Avg_actual_ratings'], rmse_df['Avg_predicted_ratings'], squared=False)
print(f'RMSE SVD Model = {RMSE} \n')