# SVD

## Import libraries

In [41]:
from lib.models import RecommendSystemModel

from typing import List, Any, Tuple,Union
from numpy.typing import NDArray
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

# import tensorflow as tf

## Function to update class in Jupyter Notebook 
https://stackoverflow.com/questions/45161393/jupyter-split-classes-in-multiple-cells

In [42]:
import functools
def update_class(
    main_class=None, exclude=("__module__", "__name__", "__dict__", "__weakref__")
):
    """Class decorator. Adds all methods and members from the wrapped class to main_class

    Args:
    - main_class: class to which to append members. Defaults to the class with the same name as the wrapped class
    - exclude: black-list of members which should not be copied
    """

    def decorates(main_class, exclude, appended_class):
        if main_class is None:
            main_class = globals()[appended_class.__name__]
        for k, v in appended_class.__dict__.items():
            if k not in exclude:
                setattr(main_class, k, v)
        return main_class

    return functools.partial(decorates, main_class, exclude)

### Example

In [43]:
class MyClass:
    def method1(self):
        print("method1")
me = MyClass()


In [44]:
@update_class()
class MyClass:
    def method2(self):
        print("method2")
me.method1()
me.method2()

method1
method2


## The ML model

In [45]:
class SVDModel(RecommendSystemModel):
    def __init__(self, mode:str=None, features: int = None, lr: float = None, epochs: int = None, weight_decay: float = None, stopping: float = None, momentum: float = None) -> None:
        # Data frame
        # self.data#:pd.DataFrame
        # # Training data 
        # self.train#:pd.DataFrame
        # # Validating Data
        # self.valid#:pd.DataFrame
        # self.test#:pd.DataFrame
        # SVD mode 
        self.mode: str = mode or 'funk'
        # Number of features
        self.features: int = features or 10
        # Learning rate
        self.lr: float = lr or 0.0002
        # Number of total epochs
        self.epochs: int = epochs or 101
        # the weight decay 
        self.weight_decay: float = weight_decay or 0.02
        self.stopping: float = stopping or 0.001
        self.momentum: float = momentum or 0.0
        # Tensor SGD optimizer
        # self.optimizer = tf.keras.optimizers.SGD(learning_rate=self.lr, momentum=self.momentum,)
        
        # # Rating matrix
        # self.R: NDArray
        # # User matrix
        # self.P: NDArray
        # # Item matrix
        # self.Q: NDArray
        
        # Rating matrix
        # self._R = self.R.copy()
        # User latent matrix
        self._P: NDArray# = np.random.rand(self.n_users, features) * 0.1
        # Item latent matrix
        self._Q: NDArray # = np.random.rand(self.n_items, features) * 0.1
        
        
        super().__init__()

In [46]:
@update_class()
class SVDModel(RecommendSystemModel):
    def split(self, ratio_train_test: float, ratio_train_valid: float, tensor: bool = False) -> List[NDArray]:
        userItemMatrix = self.convertToUserItemMatrix(self.data, self.n_users, self.n_movies)
        
        trainBeforeSplit = np.zeros((len(userItemMatrix), len(userItemMatrix[0]))).tolist()
        self.train = np.zeros((len(userItemMatrix), len(userItemMatrix[0]))).tolist()
        self.valid = np.zeros((len(userItemMatrix), len(userItemMatrix[0]))).tolist()
        self.test = np.zeros((len(userItemMatrix), len(userItemMatrix[0]))).tolist()

        for i in range(len(userItemMatrix)):
            for j in range(len(userItemMatrix[i])):
                if userItemMatrix[i][j] > 0:
                    if np.random.binomial(1, ratio_train_test, 1):
                        trainBeforeSplit[i][j] = userItemMatrix[i][j]
                    else:
                        self.test[i][j] = userItemMatrix[i][j]
        
        for i in range(len(trainBeforeSplit)):
            for j in range(len(trainBeforeSplit[i])):
                if trainBeforeSplit[i][j] > 0:
                    if np.random.binomial(1, ratio_train_valid, 1):
                        self.train[i][j] = trainBeforeSplit[i][j]
                    else:
                        self.valid[i][j] = trainBeforeSplit[i][j]


In [47]:
@update_class()
class SVDModel(RecommendSystemModel):
    def data_loader(self, path:str=None, nrows:int=None, skiprows=None, data:pd.DataFrame=None, n_users: int = None, n_items = None) -> None:
        if not path and not data:
            raise 'Error: one of path or data frame should be provided'
        if not data:
            self.data = pd.read_csv(path,low_memory=False,nrows=nrows,skiprows=skiprows)
        elif not path:
            self.data = data
        self.n_users = n_users
        self.n_items = n_items

In [48]:
@update_class()
class SVDModel(RecommendSystemModel):
    def train(self) -> Tuple[NDArray, NDArray, float, float]:
        loss_train = []
        loss_valid = []
        errors = []

        # self.n_users = len(self.train)
        # self.n_items = len(self.valid)
        self._P = np.random.rand(self.n_users, self.features) * 0.1
        self._Q = np.random.rand(self.n_items, self.features) * 0.1

        # Johnny
        for e in range(self.epochs):
            for id_user in range(self.n_users):
                for id_item in range(self.n_items):
                    if self.train[id_user][id_item] > 0:
                        
                        predict = self.prediction(self._P, self._Q, id_user, id_item)
                        
                        error = self.train[id_user][id_item] - predict
                        errors.append(error)
                        
                        self.optimize(error, id_user, id_item, self.weight_decay)
            trainLoss = self.loss(self.train, self._P, self._Q)
            validLoss = self.loss(self.valid, self._P, self._Q)
            loss_train.append(trainLoss)
            loss_valid.append(validLoss)
            if e % 10 == 0:
                print('Epoch : ', "{:3.0f}".format(e+1), ' | Train :', "{:3.3f}".format(trainLoss), 
                    ' | Valid :', "{:3.3f}".format(validLoss))
                
            # TODO stopping criterion
            if (trainLoss - loss_train[-2]) < self.stopping:
                break
        return loss_train, loss_valid, errors
        # return super().learn_to_recommend(data, features, lr, epochs, weight_decay, stopping)

In [49]:
@update_class()
class SVDModel(RecommendSystemModel):
    def convertToUserItemMatrix(data, n_users, n_movies):
        userItemMatrix = []
        for id_user in range(1, n_users + 1):
            id_movies = np.array([ratingRecord[1] for ratingRecord in data if ratingRecord[0] == id_user])
            id_ratings = [ratingRecord[2] for ratingRecord in data if ratingRecord[0] == id_user]
            ratings = np.zeros(n_movies)
            ratings[id_movies - 1] = id_ratings
            userItemMatrix.append(list(ratings))
        return userItemMatrix

In [50]:
@update_class()
class SVDModel(RecommendSystemModel):
    def prediction(self, P: NDArray, Q: NDArray, u: int, i: int) -> float:
        # Woody
        return np.dot(P[u: ], Q[ :i])
        # print(321)
        # return super().prediction(P, Q, u, i)

In [51]:
@update_class()
class SVDModel(RecommendSystemModel):
    def loss(self, groundTruthData, P: NDArray, Q: NDArray) -> float:
        # Woody
        squaredErrors = 0.0
        numOfPrediction = 0
        # nb_users, nb_items = len(data), len(data[0])

        for u in range(self.n_users):
            for i in range(self.n_items):
                if groundTruthData[u][i] > 0:
                    squaredErrors += pow(groundTruthData[u][i] - self.prediction(P, Q, u, i), 2)
                    numOfPrediction += 1
                    
        return squaredErrors / numOfPrediction
        print(654)
        # return super().loss(data, P, Q)

In [52]:
@update_class()
class SVDModel(RecommendSystemModel):
    def optimize(self, error:float, id_user:int, id_item:int,weight_decay):
        # Johnny
        # P[id_user] = self.optimizer.minimize(P[id_user], [error])
        # Q[id_item] = self.optimizer.minimize()
        # return super().svd()
        
        self._P[:, id_user] += self.lr * (error * self._Q[:, id_item] - weight_decay * self._P[:, id_user])
        self._Q[:, id_item] += self.lr * (error * self._P[:, id_user] - weight_decay * self._Q[:, id_item])

In [53]:
svd = SVDModel()
# svd.svd()
# svd.learn_to_recommend(2)

In [54]:
# define the paths
data_path = './data/MovieLens25M/'
df_rating = pd.read_csv(data_path+'ratings.csv')
df_movies = pd.read_csv(data_path+'movies.csv')
m_movies = df_rating['movieId'].nunique()
n_users = df_rating['userId'].nunique()
n_ratings = len(df_rating)

In [55]:
svd.data_loader(path="./data/MovieLens25M/ratings.csv", n_items = m_movies, n_users=n_users)

In [56]:
print(svd.n_items)
print(svd.__dict__)
# svd.n_items = 10

59047
{'mode': 'funk', 'features': 10, 'lr': 0.0002, 'epochs': 101, 'weight_decay': 0.02, 'stopping': 0.001, 'momentum': 0.0, 'data':           userId  movieId  rating   timestamp
0              1      296     5.0  1147880044
1              1      306     3.5  1147868817
2              1      307     5.0  1147868828
3              1      665     5.0  1147878820
4              1      899     3.5  1147868510
...          ...      ...     ...         ...
25000090  162541    50872     4.5  1240953372
25000091  162541    55768     2.5  1240951998
25000092  162541    56176     2.0  1240950697
25000093  162541    58559     4.0  1240953434
25000094  162541    63876     5.0  1240952515

[25000095 rows x 4 columns], 'valid': None, 'train': None, 'n_users': 162541, 'n_items': 59047}


In [38]:
svd.split(0.6,0.8)

In [40]:
print(svd.test)

AttributeError: 'SVDModel' object has no attribute 'test'