In [None]:
from google.colab import files
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
uploaded=files.upload()

Saving ua.test to ua.test


In [None]:
r_cols=['user_id','movie_id','rating','unix_timestamp']
ratings_train=pd.read_csv("ua.base",sep='\t',names=r_cols,encoding='latin-1')
ratings_test=pd.read_csv("ua.test",sep='\t',names=r_cols,encoding='latin-1')
ratings_train.shape,ratings_test.shape

((90570, 4), (9430, 4))

In [None]:
class MF():


    def __init__(self, R, K, alpha, beta, iterations):
        self.R = R
        self.num_users, self.num_items = R.shape
        self.K = K
        self.alpha = alpha
        self.beta = beta
        self.iterations = iterations


    def train(self):
        self.P = np.random.normal(scale=1./self.K, size=(self.num_users, self.K))
        self.Q = np.random.normal(scale=1./self.K, size=(self.num_items, self.K))


        self.b_u = np.zeros(self.num_users)
        self.b_i = np.zeros(self.num_items)
        self.b = np.mean(self.R[np.where(self.R != 0)])


        self.samples = [
        (i, j, self.R[i, j])
        for i in range(self.num_users)
        for j in range(self.num_items)
        if self.R[i, j] > 0
        ]


        training_process = []
        for i in range(self.iterations):
            np.random.shuffle(self.samples)
            self.sgd()
            mse = self.mse()
            training_process.append((i, mse))
            if (i+1) % 20 == 0:
                print("Iteration: %d ; error = %.4f" % (i+1, mse))

            return training_process


    def mse(self):
        xs, ys = self.R.nonzero()
        predicted = self.full_matrix()
        error = 0
        for x, y in zip(xs, ys):
            error += pow(self.R[x, y] - predicted[x, y], 2)
        return np.sqrt(error)

    def sgd(self):
        for i, j, r in self.samples:
            prediction = self.get_rating(i, j)
            e = (r - prediction)

            self.b_u[i] += self.alpha * (e - self.beta * self.b_u[i])
            self.b_i[j] += self.alpha * (e - self.beta * self.b_i[j])

            self.P[i, :] += self.alpha * (e * self.Q[j, :] - self.beta * self.P[i,:])
            self.Q[j, :] += self.alpha * (e * self.P[i, :] - self.beta * self.Q[j,:])


    def get_rating(self, i, j):
        prediction = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)
        return prediction


    def full_matrix(self):
        return mf.b+mf.b_u[:,np.newaxis]+mf.b_i[np.newaxis:,]+mf.P.dot(mf.Q.T)

In [None]:
R=np.array(ratings_train.pivot(index='user_id',columns='movie_id',values='rating').fillna(0))

In [None]:
mf = MF(R, K=20, alpha=0.001, beta=0.01, iterations=100)
training_process = mf.train()
print()
print("P x Q:")
print(mf.full_matrix())
print()


P x Q:
[[3.65415284 3.50117246 3.51280469 ... 3.5317271  3.52987741 3.55360014]
 [3.65020559 3.5088183  3.48842602 ... 3.53835664 3.5395841  3.51868968]
 [3.59377979 3.47155941 3.46591823 ... 3.48524289 3.513019   3.49804276]
 ...
 [3.63302913 3.48981643 3.48792739 ... 3.53841347 3.52887867 3.53670806]
 [3.69538797 3.54518684 3.56168016 ... 3.57106007 3.5584781  3.56870694]
 [3.62115794 3.46328989 3.47166836 ... 3.48859651 3.49267591 3.53276549]]



In [None]:
n_users = ratings_train.user_id.unique().shape[0]
n_items = ratings_train.movie_id.unique().shape[0]

In [None]:
from sklearn.metrics.pairwise import pairwise_distances
user_similarity = pairwise_distances(data_matrix, metric='cosine')
item_similarity = pairwise_distances(data_matrix.T, metric='cosine')

In [None]:
def predict(ratings, similarity, type='user'):
    if type == 'user':
        mean_user_rating = ratings.mean(axis=1)
        #We use np.newaxis so that mean_user_rating has same format as ratings
        ratings_diff = (ratings - mean_user_rating[:, np.newaxis])
        pred = mean_user_rating[:, np.newaxis] + similarity.dot(ratings_diff) / np.array([np.abs(similarity).sum(axis=1)]).T
    elif type == 'item':
        pred = ratings.dot(similarity) / np.array([np.abs(similarity).sum(axis=1)])
    return pred

In [None]:
user_prediction = predict(data_matrix, user_similarity, type='user')
item_prediction = predict(data_matrix, item_similarity, type='item')

In [None]:
user