In [48]:
# Import Python Libraries
import pandas as pd
import numpy as np
from scipy.sparse import coo_matrix
import random

In [49]:
# Read data and perform basic analysis
def read_data(file_paths):
    data_frames = []
    for file_path in file_paths:
        data_frames.append(pd.read_csv(file_path, sep=',', encoding="utf-8"))
    return data_frames

In [50]:
def calculate_sparsity(matrix):
    total_elements = matrix.size
    zero_elements = np.count_nonzero(matrix == 0)
    sparsity = zero_elements / total_elements
    return sparsity

In [51]:
# Collaborative Filtering using Stochastic Gradient Descent
class SGDRecommender:
    def __init__(self,   n_epochs=10):
        self.n_epochs = n_epochs


    def fit(self, R, K, lamda=0.02, gamma=0.001):
        M,N = R.shape
        P = np.random.rand(M,K)
        Q = np.random.rand(K,N)


        # Compute initial RMSE
        rmse = np.sqrt(self.error(R, P, Q, lamda) / len(R.data))
        print("Initial RMSE:", rmse)


        # SGD optimization
        for epoch in range(self.n_epochs):
            for ui in range(len(R.data)):
                rui = R.data[ui]
                u = R.row[ui]
                i = R.col[ui]
                if rui > 0:
                    eui = rui - np.dot(P[u,:], Q[:,i])
                    P[u,:] += gamma * 2 * (eui * Q[:,i] - lamda * P[u,:])
                    Q[:,i] += gamma * 2 * (eui * P[u,:] - lamda * Q[:,i])
            rmse = np.sqrt(self.error(R, P, Q, lamda) / len(R.data))
            if rmse < 0.5:
                break
        print("Final RMSE:", rmse)
        return P, Q

    def error(self, R, P, Q, lamda=0.02):
        ratings = R.data
        rows = R.row
        cols = R.col
        e = 0
        for ui in range(len(ratings)):
            rui = ratings[ui]
            u = rows[ui]
            i = cols[ui]
            if rui > 0:
                e += pow(rui - np.dot(P[u,:], Q[:,i]), 2) + lamda * (pow(np.linalg.norm(P[u,:]), 2) + pow(np.linalg.norm(Q[:,i]), 2))
        return e



In [52]:
# Collaborative Filtering
def collaborative_filtering(ratings_pivot, K=3, gamma=0.0007, lamda=0.01):
    R = coo_matrix(ratings_pivot.values)

    print("Shape of coo_matrix:", R.shape)


    recommender = SGDRecommender()
    P, Q = recommender.fit(R, K=K, gamma=gamma, lamda=lamda)
    all_user_ratings = np.matmul(P, Q)
    all_user_ratings_df = pd.DataFrame(np.round(all_user_ratings, 4), columns=ratings_pivot.columns, index=ratings_pivot.index)
    return all_user_ratings_df

In [53]:
# Recommendation for a specific user
def recommend_for_user(user_id, all_user_ratings_df):

    all_user_ratings_df_transposed = all_user_ratings_df.transpose()

    user_ratings = all_user_ratings_df_transposed[user_id].sort_values(ascending=False)

    recommendations = user_ratings

    return recommendations

In [54]:
def filter_rated_recommendations(recommendations, user_id, ratings_pivot):
    user_ratings = ratings_pivot.loc[user_id]
    rated_products = user_ratings[user_ratings > 0].index
    filtered_recommendations = recommendations[~recommendations.index.isin(rated_products)].head(10)
    return filtered_recommendations


In [55]:
# File paths
file_paths = [r"softwareMerged.csv"]

# Read data
data_frames = read_data(file_paths)

In [56]:
# Collaborative Filtering
ratings = data_frames[0].sample(30000)  # Using Usersrating
ratings_pivot = ratings.pivot(index='User_ID', columns='Product_ID', values='Rating').fillna(0)
print(calculate_sparsity(ratings_pivot))
all_user_ratings_df = collaborative_filtering(ratings_pivot)


0.9998463426020534
Shape of coo_matrix: (29377, 6646)
Initial RMSE: 3.2914211204743093
Final RMSE: 2.660580298536864


# New Section

In [57]:
#recommend to a user
user_id = ratings.iloc[random.randint(1,100), ratings.columns.get_loc('User_ID')]

# Assuming user_ratings is a DataFrame containing all user ratings
recommendations = recommend_for_user(user_id, all_user_ratings_df)

top_ten_recommendations = filter_rated_recommendations(recommendations, user_id, ratings_pivot)

print("Top 10 Recommendations for User", user_id, ":", top_ten_recommendations)


Top 10 Recommendations for User A1ZYRECKENDIP8 : Product_ID
B00UB76290    4.4995
B00H9A60O4    4.4286
B00EZPXYP4    4.1255
B01637RFR4    4.1193
B00CTTEKJW    4.0184
B01617VQJ4    3.7694
B00EZQYC8G    3.7672
B00NG7JVSQ    3.6713
B01617VPUY    3.6371
B015724OVG    3.5355
Name: A1ZYRECKENDIP8, dtype: float64
