## Factorizacion de Matrices

Primero creemos una matriz de diseño inicial con valores aleatorios entre 0.1 y 0.9. Tambien creemos la funcion del error cuadratico medio

In [15]:
import numpy as np

class matrix_factorization():
    
    def __init__(self,data,features):
        self.data = data
        self.features = features
        self.user_count = data.shape[0]
        self.item_count = data.shape[1]
        self.user_features = np.random.uniform(low=0.1,high = 0.9, size = (self.user_count,self.features))
        self.item_features = np.random.uniform(low=0.1,high = 0.9, size = (self.features,self.item_count))
        
    def MSE(self):
        """
        Mean Squared Error function comparing dot product of user-feature row and feature-item column to user-item cell
        """
        
        matrix_product = np.matmul(self.user_features,self.item_features)
        return np.sum((self.data - matrix_product)**2)


    def single_gradient(self,user_row,item_col,wrt_user_idx = None, wrt_item_idx = None):
        """
        Calcula el gradiente de un unica celda usuario-item a una unica celda usuario-feature o item-feature
        """
        
        if wrt_user_idx !=None and wrt_item_idx !=None:
            return "Too many elements"
        elif wrt_user_idx ==None and wrt_item_idx ==None:
            return "insufficient elements"
        else:
            u_row = self.user_features[user_row,:]
            i_col = self.item_features[:,item_col]
            ui_rating = float(self.data[user_row,item_col])
            prediction = float(np.dot(u_row,i_col))
            
            if wrt_user_idx != None:
                row_elem = float(i_col[wrt_user_idx])
                gradient = 2*(ui_rating-prediction)*row_elem
            else:
                col_elem = float(u_row[wrt_item_idx])
                gradient = 2*(ui_rating-prediction)*col_elem
            return gradient
        
    def user_feature_gradient(self,user_row,wrt_user_idx):
        """
        Averages the gradients of a single user-item row with respect to a single user-feature parameter
        """
        
        summation = 0
        for col in range(0,self.item_count):
            summation += self.single_gradient(user_row = user_row,item_col=col,wrt_user_idx=wrt_user_idx)
        return summation/self.item_count
    
    def item_feature_gradient(self,item_col,wrt_item_idx):
        """
        Averages the gradients of a single user-item column with respect to a single feature-item parameter
        """
        
        summation = 0
        for row in range(0,self.user_count):
            summation += self.single_gradient(user_row = row,item_col=item_col,wrt_item_idx=wrt_item_idx)
        return summation/self.user_count
    
    def update_user_feature(self,learning_rate):
        """
        Updates every user-feature parameter according to supplied learning rate
        """
        for i in range(0, self.user_count):
            for j in range(0,self.features):
                self.user_features[i,j] += learning_rate*self.user_feature_gradient(user_row=i,wrt_user_idx=j)
        
    def update_item_feature(self,learning_rate):
        """
        Updates every feature-item parameter according to supplied learning rate
        """
        for i in range(0, self.features):
            for j in range(0,self.item_count):
                self.item_features[i,j] += learning_rate*self.item_feature_gradient(item_col=j,wrt_item_idx=i)

    ### Metodo de entrenamiento
    
    def train_model(self,learning_rate=0.1,iterations = 1000):
        """
        Trains model, outputting MSE cost/loss every 50 iterations, using supplied learning and iterations
        """
        for i in range(iterations):
            self.update_user_feature(learning_rate=learning_rate)
            self.update_item_feature(learning_rate=learning_rate)
            if i %50 ==0:
                print(self.MSE())
                print(i)
            

Este es un programa de cero, veamos que pasa si creamos matrices

In [17]:
d = np.array([[5,3,1],[1,3,5],[3,5,1]])
print(d)


[[5 3 1]
 [1 3 5]
 [3 5 1]]


In [18]:
d2 = matrix_factorization(d,2)
d2

<__main__.matrix_factorization at 0x1d4302053a0>

In [24]:
d2.item_features

array([[ 0.81761702,  1.6900671 ,  1.94598575],
       [ 1.87994545,  1.1742752 , -0.76401102]])

In [20]:
d2.train_model(learning_rate = .1)

51.37468224533326
0
3.538657154250437
50
3.5386552492239733
100
3.538655249223966
150
3.5386552492239662
200
3.5386552492239662
250
3.5386552492239662
300
3.5386552492239662
350
3.5386552492239662
400
3.5386552492239662
450
3.5386552492239662
500
3.5386552492239662
550
3.5386552492239662
600
3.5386552492239662
650
3.5386552492239662
700
3.5386552492239662
750
3.5386552492239662
800
3.5386552492239662
850
3.5386552492239662
900
3.5386552492239662
950


In [25]:
np.dot(d2.user_features,d2.item_features)

array([[4.28078016, 3.86133275, 0.55412635],
       [0.8201476 , 3.21539   , 4.88850218],
       [3.84015545, 3.99383559, 1.52084655]])

Si consideramos un feature (caracteristica adicional), entonces nos mejora la descomposicion

In [26]:
d = np.array([[5,3,1],[1,3,5],[3,5,1]])
print(d)
d2 = matrix_factorization(d,3)
d2.train_model(learning_rate = .1)

[[5 3 1]
 [1 3 5]
 [3 5 1]]
41.90386218428807
0
0.20127835127556626
50
6.948352666604495e-14
100
1.3501921386633245e-26
150
8.628166150854817e-31
200
1.0600318413907346e-30
250
1.0600318413907346e-30
300
8.628166150854817e-31
350
8.628166150854817e-31
400
1.0600318413907346e-30
450
1.0600318413907346e-30
500
8.628166150854817e-31
550
8.628166150854817e-31
600
1.0600318413907346e-30
650
1.0600318413907346e-30
700
8.628166150854817e-31
750
8.628166150854817e-31
800
1.0600318413907346e-30
850
1.0600318413907346e-30
900
8.628166150854817e-31
950


In [27]:
np.dot(d2.user_features,d2.item_features)

array([[5., 3., 1.],
       [1., 3., 5.],
       [3., 5., 1.]])

In [28]:
### Ahora en Keras

from __future__ import print_function, division
from builtins import range, input
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.utils import shuffle

import tensorflow

from tensorflow import keras

In [29]:
from keras.models import Model
from keras.layers import Input, Embedding, Dot, Add, Flatten
from keras.regularizers import l2
from tensorflow.keras.optimizers import Adam, SGD

In [30]:
df = pd.read_csv('rating.csv')

In [31]:
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,110,1.0,1425941529
1,1,147,4.5,1425942435
2,1,858,5.0,1425941523
3,1,1221,5.0,1425941546
4,1,1246,5.0,1425941556


In [32]:
user = df['userId'].value_counts().index
map = {k:i for i, k in enumerate(user)}
df['userId'] = df['userId'].map(map)

In [None]:
mov = df['movieId'].value_counts().index
map = {k:i for i, k in enumerate(mov)}
df['movieId'] = df['movieId'].map(map)

In [None]:
N = df['userId'].max()
M = df['movieId'].max()

df.drop('timestamp', axis = 1, inplace = True)

N,M



In [None]:
n = 1000
m = 800

from collections import Counter

In [None]:
ucount = Counter(df['userId'])
mcount = Counter(df['movieId'])

uid = [u for u, c in ucount.most_common(n)]
mid = [u for u, c in mcount.most_common(m)]

In [None]:
newdf = df[df['userId'].isin(uid) & df['movieId'].isin(mid)]
newdf.head()

In [None]:
N = newdf['userId'].max()
M = newdf['movieId'].max()

user = newdf['userId'].value_counts().index
map = {k:i for i, k in enumerate(user)}
newdf['userId'] = newdf['userId'].map(map)


In [None]:
mov = newdf['movieId'].value_counts().index
map = {k:i for i, k in enumerate(mov)}
newdf['movieId'] = newdf['movieId'].map(map)

In [None]:
newdf = shuffle(newdf)
cutoff = int(0.8*len(newdf))
cutoff

In [None]:
train = newdf.iloc[: cutoff]
test = newdf.iloc[cutoff:]

In [None]:
K = 10
mu = newdf['rating'].mean()
epochs = 25
reg = 0.
N = 1000
M = 800