In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def getData():
    r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
    ratings = pd.read_csv('u.data', sep='\t', names=r_cols,
                          encoding='latin-1')
    m_cols = ['movie_id', 'title', 'release_date', 'video_release_date', 'imdb_url']
    movies = pd.read_csv('u.item', sep='|', names=m_cols, usecols=range(5),
                         encoding='latin-1')
    movie_ratings = pd.merge(movies, ratings)
    temp = movie_ratings[['movie_id', 'user_id', 'rating']].copy()
    temp = temp.pivot_table(columns='movie_id', index='user_id', values='rating').copy()
    temp.index = ['User_'+str(int(i)) for i in temp.index]
    temp.columns = ['Filme_'+str(int(i)) for i in temp.columns]
    qtd_cols = 80
    R = temp.iloc[:, :qtd_cols]
    l=[]
    for i in range(1, R.shape[0]+1):
        if R.iloc[i-1, ].isnull().sum() >= (qtd_cols - 10):
            l.append(i)
    R = R.drop(["User_"+str(r) for r in l])
    R.index = ['User_'+str(int(i)) for i in range(R.shape[0])]
    return R

In [3]:
import time

class MatrixFactorization():
    
    def __init__(self, dataframe, K, steps, alpha, beta):
        self.df = dataframe
        self.K = K
        self.steps = steps
        self.alpha = alpha
        self.beta = beta
        
    def fit(self):
        t0 = time.time()
        
        R = self.df.values
        N, M = R.shape
        
        #inicio aleatorio
        P = np.random.rand(N,self.K)
        Q = np.random.rand(self.K,M)
        
        lista_erro_step = []
        
        #loop
        for step in range(self.steps):
            
            mse_total_step = 0
            #varrendo todas as entradas da matriz R
            for i in range(len(R)):
                for j in range(len(R[i])):
                    #validando se o valor associado está preenchido
                    if R[i][j] > 0:

                        #calculando o erro:
                        eij = R[i][j] - np.dot(P[i,:],Q[:,j])
                        mse_total_step += (eij)**2
                        #alterando os valores
                        for k in range(self.K):
                            P[i][k] = P[i][k] + self.alpha * ( 2 * eij * Q[k][j] - self.beta * P[i][k])
                            Q[k][j] = Q[k][j] + self.alpha * ( 2 * eij * P[i][k] - self.beta * Q[k][j])
                            
            lista_erro_step.append(mse_total_step)
            
        self.P = P
        self.Q = Q
        self.lista_erro_step = lista_erro_step
        t1 = time.time()
        print("Fatoração concluída. Tempo aproximado:", int((t1-t0)/60)+1, 'minuto(s).')
        
    def predict(self):
        return self.P.dot(self.Q)
    
    def print_MSE_steps(self):
        plt.figure(figsize=[15,6])
        plt.title("Custo total por Step", fontsize = 16, fontweight = 'bold')
        plt.xlabel("Step", fontsize = 14, fontweight = 'bold')
        plt.ylabel("Erro", fontsize = 14, fontweight = 'bold')
        plt.plot(range(1, 1+self.steps), self.lista_erro_step, c = 'blue', lw = 2)
        plt.grid()
        plt.show()

In [4]:
df = getData()
df

Unnamed: 0,Filme_1,Filme_2,Filme_3,Filme_4,Filme_5,Filme_6,Filme_7,Filme_8,Filme_9,Filme_10,...,Filme_71,Filme_72,Filme_73,Filme_74,Filme_75,Filme_76,Filme_77,Filme_78,Filme_79,Filme_80
User_0,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,3.0,4.0,3.0,1.0,4.0,4.0,4.0,1.0,4.0,4.0
User_1,4.0,3.0,,,,,,,,,...,,,,,,,,,3.0,2.0
User_2,4.0,,,,,,2.0,4.0,4.0,,...,4.0,,,,,,,,3.0,
User_3,,,,5.0,,,5.0,5.0,5.0,4.0,...,5.0,5.0,3.0,,,,5.0,3.0,4.0,4.0
User_4,4.0,,,4.0,,,4.0,,4.0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
User_362,3.0,,,3.0,,,4.0,,3.0,,...,,3.0,4.0,,,,,,3.0,2.0
User_363,2.0,4.0,,5.0,,,,,,,...,,3.0,,,,,,,,
User_364,4.0,,4.0,,,5.0,4.0,,4.0,,...,,,,,,,,,,
User_365,,,,2.0,,,4.0,5.0,3.0,,...,,,,,,,,,,


In [5]:
df.isnull().sum()

Filme_1     103
Filme_2     238
Filme_3     292
Filme_4     176
Filme_5     299
           ... 
Filme_76    313
Filme_77    225
Filme_78    337
Filme_79     96
Filme_80    299
Length: 80, dtype: int64

In [6]:
ratings = df.fillna(0).values
ratings

array([[5., 3., 4., ..., 1., 4., 4.],
       [4., 3., 0., ..., 0., 3., 2.],
       [4., 0., 0., ..., 0., 3., 0.],
       ...,
       [4., 0., 4., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 5., 0., ..., 0., 5., 2.]])

In [7]:
def train_test_split(ratings, qtd):
    test = np.zeros(ratings.shape)
    train = ratings.copy()
    for user in range(ratings.shape[0]):
        test_ratings = np.random.choice(ratings[user, :].nonzero()[0], 
                                        size=qtd, 
                                        replace=False)
        train[user, test_ratings] = 0.
        test[user, test_ratings] = ratings[user, test_ratings]
        
    return train, test

In [8]:
train, test = train_test_split(ratings, qtd = 2)

In [9]:
train, val = train_test_split(train, qtd = 2)

In [10]:
test

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 4., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 5., 0., ..., 0., 0., 0.]])

In [11]:
fat = MatrixFactorization(dataframe = pd.DataFrame(train, columns = df.columns, index = df.index), 
                          K = 5, steps = 5000, alpha = 0.0001, beta = 0.2)

In [12]:
fat.fit()

Fatoração concluída. Tempo aproximado: 12 minuto(s).


In [13]:
pd.DataFrame(fat.predict(), columns = df.columns, index = df.index).round(2)

Unnamed: 0,Filme_1,Filme_2,Filme_3,Filme_4,Filme_5,Filme_6,Filme_7,Filme_8,Filme_9,Filme_10,...,Filme_71,Filme_72,Filme_73,Filme_74,Filme_75,Filme_76,Filme_77,Filme_78,Filme_79,Filme_80
User_0,3.81,3.07,3.32,3.39,2.91,3.29,3.97,3.73,4.70,4.07,...,3.67,3.00,2.78,2.11,3.51,3.34,3.09,1.66,3.81,2.75
User_1,3.68,3.15,2.20,3.66,2.98,2.90,3.73,3.98,3.59,3.25,...,3.62,2.92,3.32,2.67,2.84,2.97,3.14,2.40,3.72,2.72
User_2,3.05,2.62,2.05,3.18,2.09,2.77,3.15,3.36,3.77,3.35,...,3.27,2.34,2.44,1.38,2.82,2.31,2.71,1.60,3.13,2.47
User_3,4.20,3.67,2.91,4.34,3.67,3.59,4.72,4.79,4.12,4.23,...,3.73,3.59,3.76,3.74,2.96,3.75,3.37,2.54,4.44,3.37
User_4,4.10,3.47,2.64,3.98,3.34,3.25,4.17,4.35,4.08,3.70,...,3.97,3.27,3.60,2.97,3.21,3.39,3.44,2.55,4.13,2.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
User_362,3.30,2.55,1.89,2.91,2.57,1.64,3.22,3.11,3.23,2.28,...,3.34,2.29,2.90,2.16,2.44,2.77,2.72,2.38,2.97,2.15
User_363,3.83,3.66,2.11,4.80,3.01,3.90,4.56,5.11,4.13,4.61,...,3.76,3.32,3.60,3.04,2.77,3.02,3.35,2.40,4.33,3.71
User_364,4.08,3.63,2.87,4.14,3.29,4.13,4.15,4.56,4.28,4.29,...,3.94,3.47,3.48,2.81,3.52,3.24,3.53,2.15,4.38,3.13
User_365,3.00,2.84,2.52,3.46,2.74,3.48,3.79,3.88,3.25,4.00,...,2.43,2.91,2.59,3.00,2.17,2.83,2.35,1.38,3.55,2.82


In [14]:
df

Unnamed: 0,Filme_1,Filme_2,Filme_3,Filme_4,Filme_5,Filme_6,Filme_7,Filme_8,Filme_9,Filme_10,...,Filme_71,Filme_72,Filme_73,Filme_74,Filme_75,Filme_76,Filme_77,Filme_78,Filme_79,Filme_80
User_0,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,3.0,4.0,3.0,1.0,4.0,4.0,4.0,1.0,4.0,4.0
User_1,4.0,3.0,,,,,,,,,...,,,,,,,,,3.0,2.0
User_2,4.0,,,,,,2.0,4.0,4.0,,...,4.0,,,,,,,,3.0,
User_3,,,,5.0,,,5.0,5.0,5.0,4.0,...,5.0,5.0,3.0,,,,5.0,3.0,4.0,4.0
User_4,4.0,,,4.0,,,4.0,,4.0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
User_362,3.0,,,3.0,,,4.0,,3.0,,...,,3.0,4.0,,,,,,3.0,2.0
User_363,2.0,4.0,,5.0,,,,,,,...,,3.0,,,,,,,,
User_364,4.0,,4.0,,,5.0,4.0,,4.0,,...,,,,,,,,,,
User_365,,,,2.0,,,4.0,5.0,3.0,,...,,,,,,,,,,


In [15]:
k = np.arange(1,7,1)
steps = np.arange(100,6000,1000)
alpha  = [0.1,0.01,0.001,0.0001,0.00001,0.000001]
beta = np.arange(0.1,0.7,0.1)
s = []

for a in np.arange(0,6,1):
    s.append(a)
    s[a] =  MatrixFactorization(dataframe = pd.DataFrame(train, columns = df.columns, index = df.index), 
                          K = k[a], steps = steps[a], alpha = alpha[a], beta = beta[a])
    s[a].fit()
   
      



  mse_total_step += (eij)**2
  P[i][k] = P[i][k] + self.alpha * ( 2 * eij * Q[k][j] - self.beta * P[i][k])
  P[i][k] = P[i][k] + self.alpha * ( 2 * eij * Q[k][j] - self.beta * P[i][k])
  Q[k][j] = Q[k][j] + self.alpha * ( 2 * eij * P[i][k] - self.beta * Q[k][j])
  Q[k][j] = Q[k][j] + self.alpha * ( 2 * eij * P[i][k] - self.beta * Q[k][j])


Fatoração concluída. Tempo aproximado: 1 minuto(s).
Fatoração concluída. Tempo aproximado: 2 minuto(s).
Fatoração concluída. Tempo aproximado: 4 minuto(s).
Fatoração concluída. Tempo aproximado: 6 minuto(s).
Fatoração concluída. Tempo aproximado: 10 minuto(s).
Fatoração concluída. Tempo aproximado: 13 minuto(s).


In [16]:
dff1= pd.DataFrame(s[0].predict(), columns = df.columns, index = df.index).round(2)
dff2= pd.DataFrame(s[1].predict(), columns = df.columns, index = df.index).round(2)
dff3= pd.DataFrame(s[2].predict(), columns = df.columns, index = df.index).round(2)
dff4= pd.DataFrame(s[3].predict(), columns = df.columns, index = df.index).round(2)
dff5= pd.DataFrame(s[4].predict(), columns = df.columns, index = df.index).round(2)
dff6= pd.DataFrame(s[5].predict(), columns = df.columns, index = df.index).round(2)
    

In [17]:
dff1

Unnamed: 0,Filme_1,Filme_2,Filme_3,Filme_4,Filme_5,Filme_6,Filme_7,Filme_8,Filme_9,Filme_10,...,Filme_71,Filme_72,Filme_73,Filme_74,Filme_75,Filme_76,Filme_77,Filme_78,Filme_79,Filme_80
User_0,,,,,,,,,,,...,,,,,,,,,,
User_1,,,,,,,,,,,...,,,,,,,,,,
User_2,,,,,,,,,,,...,,,,,,,,,,
User_3,,,,,,,,,,,...,,,,,,,,,,
User_4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
User_362,,,,,,,,,,,...,,,,,,,,,,
User_363,,,,,,,,,,,...,,,,,,,,,,
User_364,,,,,,,,,,,...,,,,,,,,,,
User_365,,,,,,,,,,,...,,,,,,,,,,


In [18]:
dff2

Unnamed: 0,Filme_1,Filme_2,Filme_3,Filme_4,Filme_5,Filme_6,Filme_7,Filme_8,Filme_9,Filme_10,...,Filme_71,Filme_72,Filme_73,Filme_74,Filme_75,Filme_76,Filme_77,Filme_78,Filme_79,Filme_80
User_0,3.74,3.32,2.67,3.70,3.00,3.87,4.25,4.03,4.23,4.36,...,3.62,3.06,2.99,2.44,4.16,3.49,3.03,1.61,3.97,2.86
User_1,3.68,3.20,2.66,3.21,3.17,2.26,3.43,3.61,3.30,3.07,...,3.64,3.04,3.24,3.30,3.10,3.17,3.13,2.15,3.73,2.38
User_2,3.19,2.83,2.28,3.15,2.56,3.26,3.61,3.43,3.59,3.69,...,3.09,2.61,2.56,2.10,3.53,2.97,2.59,1.38,3.38,2.43
User_3,4.31,3.79,3.10,4.00,3.60,3.47,4.42,4.42,4.33,4.24,...,4.22,3.55,3.64,3.39,4.16,3.86,3.59,2.22,4.47,3.02
User_4,4.19,3.66,3.02,3.74,3.56,2.89,4.06,4.18,3.93,3.74,...,4.12,3.45,3.63,3.56,3.73,3.66,3.53,2.33,4.28,2.80
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
User_362,3.09,2.71,2.22,2.84,2.59,2.41,3.13,3.15,3.06,2.98,...,3.03,2.54,2.63,2.47,2.93,2.75,2.58,1.62,3.19,2.15
User_363,4.13,3.67,2.94,4.11,3.30,4.35,4.73,4.46,4.71,4.87,...,3.99,3.37,3.29,2.64,4.65,3.86,3.34,1.74,4.38,3.18
User_364,3.64,3.25,2.59,3.73,2.85,4.22,4.36,4.02,4.37,4.60,...,3.50,2.97,2.82,2.10,4.35,3.47,2.91,1.39,3.91,2.92
User_365,3.16,2.81,2.26,3.09,2.56,3.13,3.53,3.37,3.50,3.58,...,3.07,2.59,2.56,2.14,3.43,2.93,2.58,1.41,3.34,2.38


In [19]:
dff3

Unnamed: 0,Filme_1,Filme_2,Filme_3,Filme_4,Filme_5,Filme_6,Filme_7,Filme_8,Filme_9,Filme_10,...,Filme_71,Filme_72,Filme_73,Filme_74,Filme_75,Filme_76,Filme_77,Filme_78,Filme_79,Filme_80
User_0,3.68,3.11,3.13,3.55,3.04,4.09,4.26,3.90,4.15,4.02,...,3.35,3.12,2.90,2.34,3.70,3.53,2.92,1.61,3.79,2.87
User_1,3.71,3.12,2.98,3.18,3.25,3.58,3.71,3.61,3.47,3.05,...,3.44,3.13,3.15,2.65,3.42,3.34,3.11,2.03,3.71,2.50
User_2,3.16,2.74,1.74,3.19,2.37,2.39,3.14,3.52,3.56,3.73,...,3.02,2.49,2.64,2.59,2.19,2.52,2.58,1.79,3.33,2.37
User_3,4.24,3.61,3.12,3.93,3.51,3.95,4.37,4.39,4.39,4.19,...,3.96,3.51,3.54,3.11,3.69,3.74,3.49,2.25,4.34,3.05
User_4,4.13,3.53,2.59,3.80,3.35,3.23,3.90,4.27,4.10,3.97,...,3.92,3.33,3.56,3.34,3.09,3.37,3.47,2.46,4.23,2.83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
User_362,3.12,2.70,1.65,3.00,2.41,2.15,2.88,3.35,3.24,3.30,...,3.01,2.45,2.70,2.67,2.04,2.41,2.62,1.92,3.25,2.18
User_363,4.10,3.48,3.34,3.92,3.39,4.33,4.59,4.32,4.52,4.37,...,3.77,3.46,3.29,2.74,3.95,3.83,3.30,1.92,4.22,3.13
User_364,3.93,3.30,3.50,3.56,3.40,4.39,4.39,3.96,4.08,3.72,...,3.57,3.37,3.17,2.50,4.05,3.79,3.19,1.82,3.97,2.90
User_365,3.19,2.71,2.55,3.10,2.60,3.34,3.59,3.40,3.58,3.51,...,2.93,2.67,2.55,2.14,3.03,2.97,2.55,1.48,3.29,2.47


In [20]:
dff4

Unnamed: 0,Filme_1,Filme_2,Filme_3,Filme_4,Filme_5,Filme_6,Filme_7,Filme_8,Filme_9,Filme_10,...,Filme_71,Filme_72,Filme_73,Filme_74,Filme_75,Filme_76,Filme_77,Filme_78,Filme_79,Filme_80
User_0,3.72,3.18,3.02,3.48,2.97,3.55,3.85,3.92,4.09,3.72,...,3.53,3.07,3.11,2.60,3.04,3.27,3.08,1.86,3.84,2.78
User_1,3.61,3.06,2.58,3.28,3.10,2.90,3.53,3.68,3.50,3.65,...,3.47,3.01,3.20,2.44,2.88,3.11,3.03,2.33,3.65,2.47
User_2,3.20,2.73,2.46,2.99,2.64,2.92,3.26,3.38,3.50,3.33,...,3.08,2.68,2.71,2.24,2.43,2.80,2.68,1.80,3.32,2.39
User_3,4.18,3.55,3.08,3.84,3.53,3.60,4.15,4.32,4.24,4.23,...,4.02,3.49,3.64,2.85,3.31,3.62,3.51,2.55,4.27,2.96
User_4,4.01,3.41,2.95,3.68,3.40,3.24,3.98,4.11,3.97,4.08,...,3.85,3.34,3.53,2.75,3.24,3.48,3.35,2.50,4.07,2.79
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
User_362,3.03,2.57,2.31,2.79,2.51,2.31,3.05,3.08,2.98,3.08,...,2.88,2.51,2.66,2.12,2.56,2.65,2.50,1.82,3.04,2.09
User_363,4.16,3.53,3.19,3.82,3.43,3.65,4.18,4.28,4.22,4.10,...,3.95,3.44,3.59,2.83,3.48,3.62,3.46,2.36,4.23,2.94
User_364,4.09,3.47,3.11,3.72,3.37,4.05,4.08,4.24,4.20,3.86,...,3.88,3.36,3.49,2.68,3.42,3.53,3.44,2.24,4.20,2.91
User_365,3.33,2.84,2.59,3.10,2.73,2.80,3.39,3.46,3.49,3.43,...,3.18,2.77,2.87,2.35,2.69,2.92,2.76,1.89,3.40,2.41


In [21]:
dff5

Unnamed: 0,Filme_1,Filme_2,Filme_3,Filme_4,Filme_5,Filme_6,Filme_7,Filme_8,Filme_9,Filme_10,...,Filme_71,Filme_72,Filme_73,Filme_74,Filme_75,Filme_76,Filme_77,Filme_78,Filme_79,Filme_80
User_0,3.80,3.20,2.85,3.47,3.14,3.39,3.79,3.91,3.87,3.82,...,3.64,3.14,3.32,2.54,2.96,3.28,3.17,2.33,3.85,2.70
User_1,3.48,2.94,2.59,3.14,2.86,3.10,3.45,3.61,3.54,3.49,...,3.31,2.89,2.99,2.33,2.90,3.03,2.89,2.17,3.53,2.41
User_2,3.26,2.76,2.42,2.96,2.71,2.94,3.25,3.36,3.31,3.23,...,3.11,2.73,2.86,2.27,2.62,2.87,2.72,2.03,3.32,2.28
User_3,4.11,3.51,3.11,3.77,3.49,3.70,4.11,4.25,4.20,4.11,...,3.96,3.42,3.63,2.76,3.16,3.58,3.45,2.58,4.20,2.92
User_4,3.87,3.26,2.94,3.57,3.17,3.40,3.86,3.98,3.96,3.97,...,3.72,3.15,3.37,2.46,2.94,3.26,3.23,2.33,3.92,2.80
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
User_362,2.95,2.51,2.22,2.72,2.48,2.71,2.96,3.06,3.05,2.97,...,2.85,2.45,2.62,2.06,2.28,2.57,2.48,1.84,3.01,2.14
User_363,3.88,3.30,2.95,3.57,3.28,3.41,3.87,3.98,3.93,3.89,...,3.73,3.20,3.43,2.49,2.85,3.32,3.24,2.39,3.95,2.76
User_364,3.91,3.29,2.87,3.54,3.21,3.54,3.89,4.02,3.97,3.86,...,3.72,3.28,3.43,2.77,3.18,3.45,3.25,2.41,3.97,2.74
User_365,3.24,2.69,2.40,2.93,2.55,2.85,3.21,3.34,3.30,3.30,...,3.06,2.65,2.74,2.12,2.72,2.76,2.67,1.95,3.25,2.28


In [22]:
dff6

Unnamed: 0,Filme_1,Filme_2,Filme_3,Filme_4,Filme_5,Filme_6,Filme_7,Filme_8,Filme_9,Filme_10,...,Filme_71,Filme_72,Filme_73,Filme_74,Filme_75,Filme_76,Filme_77,Filme_78,Filme_79,Filme_80
User_0,4.03,3.21,3.16,3.51,3.11,2.57,4.08,4.08,3.95,3.52,...,3.81,3.21,3.42,1.84,1.73,3.21,3.19,2.08,4.03,2.70
User_1,2.76,2.22,2.12,2.29,2.14,1.96,2.65,2.54,2.48,2.32,...,2.53,2.24,2.14,1.34,0.92,2.18,2.22,1.40,2.68,1.75
User_2,3.28,2.44,2.47,2.83,2.75,2.07,3.27,3.43,3.14,3.21,...,3.15,2.59,2.90,1.73,1.43,2.21,2.75,1.47,3.30,2.15
User_3,4.00,3.39,3.29,3.63,3.04,2.55,4.18,4.37,4.19,3.69,...,3.91,3.19,3.65,1.81,1.85,3.18,3.17,2.02,4.25,2.93
User_4,3.77,2.96,2.93,3.30,2.74,2.41,3.80,3.72,3.78,2.96,...,3.53,2.99,3.06,1.84,1.55,3.13,2.95,1.99,3.67,2.38
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
User_362,2.91,2.70,2.49,2.75,2.24,2.10,2.80,2.91,2.95,2.58,...,2.77,2.51,2.67,1.55,1.00,2.55,2.48,1.38,3.14,2.20
User_363,3.83,3.19,3.14,3.51,2.73,2.36,4.09,4.26,4.20,3.30,...,3.77,3.01,3.45,1.79,1.84,3.11,2.95,1.99,4.03,2.72
User_364,2.94,2.56,2.50,2.79,2.18,1.74,3.13,3.22,3.22,2.56,...,2.88,2.37,2.77,1.17,1.49,2.64,2.28,1.63,3.13,2.23
User_365,3.17,2.72,2.52,2.69,2.65,2.34,3.01,3.08,2.81,3.11,...,2.97,2.64,2.72,1.54,1.06,2.35,2.67,1.46,3.30,2.26


In [23]:
df

Unnamed: 0,Filme_1,Filme_2,Filme_3,Filme_4,Filme_5,Filme_6,Filme_7,Filme_8,Filme_9,Filme_10,...,Filme_71,Filme_72,Filme_73,Filme_74,Filme_75,Filme_76,Filme_77,Filme_78,Filme_79,Filme_80
User_0,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,3.0,4.0,3.0,1.0,4.0,4.0,4.0,1.0,4.0,4.0
User_1,4.0,3.0,,,,,,,,,...,,,,,,,,,3.0,2.0
User_2,4.0,,,,,,2.0,4.0,4.0,,...,4.0,,,,,,,,3.0,
User_3,,,,5.0,,,5.0,5.0,5.0,4.0,...,5.0,5.0,3.0,,,,5.0,3.0,4.0,4.0
User_4,4.0,,,4.0,,,4.0,,4.0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
User_362,3.0,,,3.0,,,4.0,,3.0,,...,,3.0,4.0,,,,,,3.0,2.0
User_363,2.0,4.0,,5.0,,,,,,,...,,3.0,,,,,,,,
User_364,4.0,,4.0,,,5.0,4.0,,4.0,,...,,,,,,,,,,
User_365,,,,2.0,,,4.0,5.0,3.0,,...,,,,,,,,,,


comparando todos os resultados de cada datafreme que foi feito não parece ter tido uma diferença muito grande de um para outro, 
ate no que foi feito manualmente, neste caso tem que varia mais os valores do parametro?acrescenta mais valores? ou é assim msm 

In [35]:
fat = MatrixFactorization(dataframe = pd.DataFrame(val, columns = df.columns, index = df.index), 
                          K = 7, steps = 5100, alpha = 0.000001, beta = 0.7)

In [36]:
fat.fit()

Fatoração concluída. Tempo aproximado: 2 minuto(s).


In [37]:
pd.DataFrame(fat.predict(), columns = df.columns, index = df.index).round(2)

Unnamed: 0,Filme_1,Filme_2,Filme_3,Filme_4,Filme_5,Filme_6,Filme_7,Filme_8,Filme_9,Filme_10,...,Filme_71,Filme_72,Filme_73,Filme_74,Filme_75,Filme_76,Filme_77,Filme_78,Filme_79,Filme_80
User_0,2.14,1.07,1.62,1.81,1.50,2.18,1.99,1.38,1.09,1.99,...,1.31,0.80,0.86,2.07,2.42,1.17,1.22,1.40,2.34,1.46
User_1,2.74,2.24,2.01,2.70,2.59,2.35,3.09,1.99,1.23,2.87,...,2.06,1.67,1.83,2.23,2.48,2.19,1.66,1.29,3.03,3.04
User_2,2.01,1.14,1.65,2.07,1.76,1.65,2.06,1.73,0.92,1.60,...,1.17,0.87,0.80,1.72,2.25,1.06,1.18,1.35,2.05,1.79
User_3,1.97,1.56,1.52,1.71,1.80,1.89,1.99,1.45,1.07,2.16,...,1.27,0.97,1.10,1.53,1.96,1.42,1.18,1.09,1.96,1.91
User_4,2.45,2.00,1.98,2.25,2.47,1.90,2.62,1.95,1.16,2.60,...,2.10,1.16,1.52,1.54,2.57,1.88,1.65,1.37,2.51,2.79
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
User_362,2.47,1.79,2.05,2.23,2.23,1.44,2.49,1.68,1.08,2.36,...,2.08,1.45,1.54,1.52,2.05,2.00,1.37,1.20,2.42,2.62
User_363,1.92,1.06,1.58,1.93,1.60,1.31,1.91,1.49,0.81,1.49,...,1.29,0.96,0.87,1.53,1.91,1.17,1.05,1.16,1.96,1.74
User_364,2.46,2.04,1.89,2.08,2.29,2.08,2.44,1.79,1.22,2.68,...,2.04,1.15,1.55,1.57,2.47,1.96,1.57,1.30,2.50,2.68
User_365,1.85,1.10,1.63,2.03,1.72,0.74,2.10,1.36,0.67,1.42,...,1.39,1.32,1.05,1.37,1.37,1.35,0.92,0.91,1.86,1.91


In [38]:
fat = MatrixFactorization(dataframe = pd.DataFrame(test, columns = df.columns, index = df.index), 
                          K = 7, steps = 5100, alpha = 0.000001, beta = 0.7)

In [42]:
fat.fit()

Fatoração concluída. Tempo aproximado: 2 minuto(s).


In [43]:
pd.DataFrame(fat.predict(), columns = df.columns, index = df.index).round(2)

Unnamed: 0,Filme_1,Filme_2,Filme_3,Filme_4,Filme_5,Filme_6,Filme_7,Filme_8,Filme_9,Filme_10,...,Filme_71,Filme_72,Filme_73,Filme_74,Filme_75,Filme_76,Filme_77,Filme_78,Filme_79,Filme_80
User_0,3.11,2.54,2.01,2.57,1.74,1.83,2.80,2.59,2.73,1.33,...,2.08,3.37,2.56,2.32,1.66,3.00,2.06,1.70,3.05,3.05
User_1,3.16,2.77,1.82,2.30,1.63,2.07,2.95,2.57,2.66,1.35,...,2.32,3.51,2.50,2.40,1.69,3.15,2.15,1.64,3.11,2.92
User_2,2.52,1.79,1.09,1.56,0.96,1.44,2.26,1.43,1.59,1.20,...,1.67,2.19,1.95,1.45,1.41,1.76,1.30,1.06,2.36,1.72
User_3,3.43,2.48,2.36,2.88,2.09,2.08,3.32,2.48,3.11,1.47,...,1.89,3.14,2.53,2.34,2.29,2.80,2.38,2.42,3.16,3.19
User_4,2.36,1.52,1.45,1.90,1.60,1.48,2.12,1.77,1.95,0.86,...,1.65,2.09,1.82,1.39,1.44,1.73,1.39,1.57,2.16,1.79
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
User_362,1.84,1.36,1.24,1.72,0.86,0.90,1.55,1.61,1.69,0.70,...,0.97,1.71,1.63,1.37,1.15,1.58,1.29,1.18,2.01,1.95
User_363,2.66,2.10,1.46,1.89,0.99,1.48,2.32,2.01,2.03,1.12,...,2.14,2.59,2.41,2.02,1.41,2.49,1.61,1.02,2.86,2.02
User_364,2.19,2.09,1.81,1.85,1.22,1.46,2.33,1.86,2.37,1.01,...,1.40,2.51,1.67,2.12,1.43,2.65,1.95,1.44,2.19,2.45
User_365,3.85,2.58,2.20,2.97,2.11,2.15,3.43,2.53,2.91,1.73,...,2.34,3.46,2.99,2.26,2.22,2.77,2.12,2.12,3.49,3.09


In [44]:
fat = MatrixFactorization(dataframe = pd.DataFrame(df, columns = df.columns, index = df.index), 
                          K = 7, steps = 5100, alpha = 0.000001, beta = 0.7)

In [45]:
fat.fit()

Fatoração concluída. Tempo aproximado: 18 minuto(s).


In [46]:
 pd.DataFrame(fat.predict(), columns = df.columns, index = df.index).round(2)   

Unnamed: 0,Filme_1,Filme_2,Filme_3,Filme_4,Filme_5,Filme_6,Filme_7,Filme_8,Filme_9,Filme_10,...,Filme_71,Filme_72,Filme_73,Filme_74,Filme_75,Filme_76,Filme_77,Filme_78,Filme_79,Filme_80
User_0,3.89,3.18,2.91,3.41,3.17,2.71,3.71,3.81,3.85,3.59,...,3.71,3.03,3.26,2.43,3.11,2.86,3.18,2.12,3.80,2.82
User_1,2.87,2.58,2.33,2.41,2.22,1.91,2.72,2.83,2.71,2.73,...,2.75,2.16,2.46,1.95,2.29,2.27,2.27,1.39,2.76,1.91
User_2,3.36,2.54,2.38,3.18,2.92,2.57,3.25,3.54,3.29,3.06,...,2.95,2.74,3.00,2.32,2.80,2.91,2.66,1.60,3.44,2.51
User_3,4.28,3.44,3.15,3.84,3.47,2.81,3.97,4.29,4.12,3.96,...,3.90,3.26,3.65,2.57,3.30,3.34,3.43,2.13,4.19,3.11
User_4,3.66,2.85,2.61,3.46,3.17,2.93,3.54,3.89,3.63,3.36,...,3.30,3.06,3.20,2.56,3.10,3.04,2.99,1.75,3.73,2.76
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
User_362,3.12,2.59,2.18,2.88,2.74,2.43,3.21,3.30,3.32,2.76,...,3.07,2.66,2.74,2.15,2.36,2.47,2.72,1.75,3.52,2.26
User_363,3.42,2.97,2.65,3.15,2.96,3.06,3.60,3.70,3.56,3.24,...,3.37,2.98,3.14,2.88,3.07,2.98,2.84,1.68,3.68,2.33
User_364,4.01,3.06,2.97,3.61,3.46,3.12,4.07,3.96,4.10,3.64,...,3.80,3.22,3.55,2.84,3.45,3.25,3.19,2.31,4.10,2.89
User_365,2.61,1.90,1.79,2.61,2.38,2.33,2.81,2.80,2.95,2.48,...,2.52,2.28,2.46,1.95,2.33,2.20,2.21,1.48,2.90,1.95


In [49]:
df

Unnamed: 0,Filme_1,Filme_2,Filme_3,Filme_4,Filme_5,Filme_6,Filme_7,Filme_8,Filme_9,Filme_10,...,Filme_71,Filme_72,Filme_73,Filme_74,Filme_75,Filme_76,Filme_77,Filme_78,Filme_79,Filme_80
User_0,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,3.0,4.0,3.0,1.0,4.0,4.0,4.0,1.0,4.0,4.0
User_1,4.0,3.0,,,,,,,,,...,,,,,,,,,3.0,2.0
User_2,4.0,,,,,,2.0,4.0,4.0,,...,4.0,,,,,,,,3.0,
User_3,,,,5.0,,,5.0,5.0,5.0,4.0,...,5.0,5.0,3.0,,,,5.0,3.0,4.0,4.0
User_4,4.0,,,4.0,,,4.0,,4.0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
User_362,3.0,,,3.0,,,4.0,,3.0,,...,,3.0,4.0,,,,,,3.0,2.0
User_363,2.0,4.0,,5.0,,,,,,,...,,3.0,,,,,,,,
User_364,4.0,,4.0,,,5.0,4.0,,4.0,,...,,,,,,,,,,
User_365,,,,2.0,,,4.0,5.0,3.0,,...,,,,,,,,,,


k = np.arange(1,7,1)
steps = np.arange(100,6000,1000)
alpha  = [0.1,0.01,0.001,0.0001,0.00001,0.000001]
beta = np.arange(0.1,0.7,0.1)

eu usei estes parametros so que logico que o loop vai usar o primeiro valor de capa parametro depois o segundo de cada parametro 
era bom varia esta orden? ou ta certo ?