In [1]:
#author : Mohan Kancherla
#content : Latent factors recommendation

#importing numpy, csv,linalg and matlab plot
import numpy as np
import csv
from numpy import linalg as LA
import matplotlib.pyplot as plt

#assigning default values of iterations,k,reg factor, learning rate and data path from which it needs to be read
iterations =40
k=20
regularization_factor = 0.1
learning_rate = 0.01
data_path = r"C:\Users\mohan\Downloads\ratings.csv"

# defining calculate error with iteration, Q and P matrix values
def calculateError(i,Q,P):
    #assigning error variable as zero
    Error=0
    #opening file and splitting the values with delimiter comma
    with open(data_path) as file2:
        csv_reader2= csv.reader(file2,delimiter=',')
        #caluclating the square of normalized form of matices Q and P(movies and users)
        x = np.sum(LA.norm(Q,ord=2,axis=1)**2)
        y = np.sum(LA.norm(P,ord=2,axis=1)**2)
        
        #reading through each row of the file
        for row in csv_reader2:
            #assigning each element to variables
            movie_id2 = int(row[0])
            user_id2 = int(row[1])
            rating2 = int(row[2])
            #calculating the error with the rating from csv file and dot product of two vectors
            Error +=(rating2-np.dot(Q[movie_id2],np.transpose(P[user_id2])))**2
        
        #total error is defined as error + reg,factor(sum of square of norm form of P and Q)
        TotalError = Error+regularization_factor*(x+y)
    #closing the file to avoid corrupt of data
    file2.close()
    #printing out the iteration and total error
    print("Iteration {}, Error : {}".format(i, TotalError))
    #function returns total error value
    return TotalError

# defining function latent factor recommendation with the default variables as arguments
def latent_factor_recommnder(data_path, regularization_factor, learning_rate, iterations, k):
    #Taking Q and P matrices as null
    Q =np.array([])
    P =np.array([])
    #assiging total errors to list to plot the graph
    Latent_error=[]
    #looping through 40 iterations
    for i in range(1,iterations+1):
        #opening of file
        with open(data_path) as file:
            csv_reader = csv.reader(file, delimiter=',')
            # for each row of data , assign to variables
            for row in csv_reader:
                movie_id = int(row[0])
                user_id = int(row[1])
                rating = int(row[2])
                # if matrices Q and P length is zero, it will generate random values of size of respective variable+1,K
                if(len(Q)==0 and len(P)==0):
                    #generating random matrices values between 0,0.5
                    Q = np.random.uniform(0,np.sqrt(5/k),size=(movie_id+1,k))
                    P = np.random.uniform(0,np.sqrt(5/k),size=(user_id+1,k))
                else:
                    # if movie id, user id is already read and generated matrix, then we will compare the length
                    # to avoid generating of random matrices again 
                    # It will generate with the reduced length and append to the original matrix
                    if(len(Q)<movie_id+1):
                        z = (movie_id+1)-len(Q)
                        Q= np.append(Q,np.random.uniform(0,np.sqrt(5/k),size=(z,k)),axis=0)
                    if(len(P)<user_id+1):
                        z2 = (user_id+1)-len(P)
                        P = np.append(P,np.random.uniform(0,np.sqrt(5/k),size=(z2,k)),axis=0)
                # calculating E and updating of Q and P matrices values
                E = 2*(rating - np.dot(Q[movie_id], np.transpose(P[user_id])))
                Q[movie_id] = Q[movie_id]+(learning_rate*(E*P[user_id]-2*regularization_factor*Q[movie_id]))
                P[user_id]=P[user_id]+(learning_rate*(E*Q[movie_id]-2*regularization_factor*P[user_id]))
        #calling the caluculate error function and appending results to list
        Latent_error.append(calculateError(i,Q,P))
    # returning list of total error values for all iterations
    return Latent_error

# calling the latent factor recommender function with default values and saving the results into list
Error_Values = latent_factor_recommnder(data_path, regularization_factor, learning_rate, iterations, k)


# using matlab plot x-axis as iterations and y-axis as error values
plt.plot([i for i in range(1,41,1)], Error_Values)
plt.xlabel('Iteration')
plt.ylabel('Total Error')
plt.title('Iteration vs Total Error')
plt.show()

Iteration 1, Error : 180223.19774654426
Iteration 2, Error : 94918.19410013918
Iteration 3, Error : 89126.78276548113
Iteration 4, Error : 86325.87128213503
Iteration 5, Error : 84251.02190219729
Iteration 6, Error : 82348.55135546322
Iteration 7, Error : 80410.48135812854
Iteration 8, Error : 78365.21399484637
Iteration 9, Error : 76222.1458255956
Iteration 10, Error : 74036.62552737213
Iteration 11, Error : 71875.44100752118
Iteration 12, Error : 69792.77468754665
Iteration 13, Error : 67822.66502741279
Iteration 14, Error : 65981.95790855931
Iteration 15, Error : 64275.80896535968
Iteration 16, Error : 62702.187873913565
Iteration 17, Error : 61254.93192491905
Iteration 18, Error : 59925.73035909676
Iteration 19, Error : 58705.39278917285
Iteration 20, Error : 57584.62940714134
Iteration 21, Error : 56554.49829967157
Iteration 22, Error : 55606.63493463161
Iteration 23, Error : 54733.34784899109
Iteration 24, Error : 53927.6376769449
Iteration 25, Error : 53183.17530468959
Iteration

<Figure size 640x480 with 1 Axes>