In [12]:
import msePipeline as mp

import numpy as np
from scipy import sparse
import pandas as pd

In [13]:
pipeline = mp.MSEPipeline()
pipeline.preprocess()
train, test, validation = pipeline.split_test_train()

The number of books in the train set: 10000, test set: 9999, val set: 9999. The number of users in the train set: 53425, test set: 26712, val set: 26712.


## FROM  [here](https://towardsdatascience.com/recommender-systems-matrix-factorization-using-pytorch-bd52f46aa199)

In [None]:
def get_params(df):
    
    uids = df.uid.unique().tolist()
    iids = df.iid.unique().tolist()
    num_uid = len(uids)
    num_iid = len(iids)
    
    return uids, iids, num_uid, num_iid

def create_embeddings(n, K, gamma = 7):
    """
    Creates a random numpy matrix of shape n, K with uniform values in (0, 11/K)
    n: number of items/users
    K: number of factors in the embedding 
    """
    return gamma*np.random.rand(n, K) / K

def create_sparse_matrix(df, rows, cols, column_name="rating"):
    """ Returns a sparse utility matrix""" 
    return sparse.csc_matrix((df[column_name].values,(df['uid'].values, df['iid'].values)),shape=(rows, cols))

def predict(df, emb_user, emb_anime):
    """ This function computes df["prediction"] without doing (U*V^T).
    
    Computes df["prediction"] by using elementwise multiplication of the corresponding embeddings and then 
    sum to get the prediction u_i*v_j. This avoids creating the dense matrix U*V^T.
    """
    df['prediction'] = np.sum(np.multiply(emb_anime[df['iid']],emb_user[df['uid']]), axis=1)
    return df

def cost(df, emb_user, emb_anime):
    """ Computes mean square error"""
    Y = create_sparse_matrix(df, emb_user.shape[0], emb_anime.shape[0])
    predicted = create_sparse_matrix(predict(df, emb_user, emb_anime), emb_user.shape[0], emb_anime.shape[0], 'prediction')
    return np.sum((Y-predicted).power(2))/df.shape[0]

def gradient(df, emb_user, emb_anime):
    """ Computes the gradient for user and anime embeddings"""
    lmbda = 0.002
    Y = create_sparse_matrix(df, emb_user.shape[0], emb_anime.shape[0])
    predicted = create_sparse_matrix(predict(df, emb_user, emb_anime), emb_user.shape[0], emb_anime.shape[0], 'prediction')
    delta =(Y-predicted)
    grad_user = (-2/df.shape[0])*(delta*emb_anime) + 2*lmbda*emb_user
    grad_anime = (-2/df.shape[0])*(delta.T*emb_user) + 2*lmbda*emb_anime
    return grad_user, grad_anime

def gradient_descent(df, emb_user, emb_anime, iterations=200, learning_rate=0.05, df_val=None, beta = 0.9, updates = True):
    """ 
    Computes gradient descent with momentum (0.9) for given number of iterations.
    emb_user: the trained user embedding
    emb_anime: the trained anime embedding
    """
    Y = create_sparse_matrix(df, emb_user.shape[0], emb_anime.shape[0])
    grad_user, grad_anime = gradient(df, emb_user, emb_anime)
    v_user = grad_user
    v_anime = grad_anime
    for i in range(iterations):
        grad_user, grad_anime = gradient(df, emb_user, emb_anime)
        v_user = beta*v_user + (1-beta)*grad_user
        v_anime = beta*v_anime + (1-beta)*grad_anime
        emb_user = emb_user - learning_rate*v_user
        emb_anime = emb_anime - learning_rate*v_anime
        if(not (i+1)%50) and (updates):
            print("\niteration", i+1, ":")
            print("train mse:",  cost(df, emb_user, emb_anime))
            if df_val is not None:
                print("validation mse:",  cost(df_val, emb_user, emb_anime))
    return emb_user, emb_anime, cost(df, emb_user, emb_anime), cost(df_val, emb_user, emb_anime)

In [None]:
uids, iids, num_uid, num_iid = get_params(train)

In [None]:
Y = create_sparse_matrix(train, num_uid, num_iid)
Y.todense()

In [None]:
emb_user = create_embeddings(num_uid, 4)
emb_anime = create_embeddings(num_iid, 4)
emb_user, emb_anime = gradient_descent(train, emb_user, emb_anime, iterations=350, learning_rate=0.1, df_val = validation)

In [None]:
from tqdm import tqdm

def sample_hyperparameters():

    return {
        "K": np.random.randint(10, 20),
        "lr": np.random.normal(0.05, 0.025),
        "beta": np.random.normal(0.9, 0.05),
        "gamma": np.random.randint(5, 15),
        "epochs": np.random.randint(50, 80)
        }      
def paramSearch(train, num_uid, num_iid, num_samples = 5):

        hyperparams = pd.DataFrame()
        print('Searching for optimal parameters...')
        for i in tqdm(range(num_samples)):
            # get a random sample of hyperparameters
            params = sample_hyperparameters()
            print(params)
            # train a model using those hyperparameters
            emb_user = create_embeddings(num_uid, params['K'], gamma = params['gamma'])
            emb_item = create_embeddings(num_iid, params['K'], gamma = params['gamma'])
            emb_user, emb_item, cost_train, cost_val = gradient_descent(train, 
                                                                         emb_user, 
                                                                         emb_item, 
                                                                         iterations=params['epochs'], 
                                                                         learning_rate=params['lr'],
                                                                         beta = params['beta'], 
                                                                         df_val = validation,
                                                                         updates = False)
            params['train_mse'] = cost_train
            params['val_mse'] = cost_val
            hyperparams = hyperparams.append(params, ignore_index=True)
        return hyperparams.sort_values(by = 'val_mse')

In [61]:
hp = paramSearch(train, num_uid, num_iid, num_samples = 10)

  0%|          | 0/10 [00:00<?, ?it/s]

Searching for optimal parameters...
{'K': 16, 'lr': 0.06911681421964877, 'beta': 0.8751939500399427, 'gamma': 5, 'epochs': 53}


 10%|█         | 1/10 [01:12<10:52, 72.52s/it]

{'K': 14, 'lr': 0.059983531559637523, 'beta': 0.9299892557964924, 'gamma': 6, 'epochs': 79}


 20%|██        | 2/10 [02:49<10:38, 79.81s/it]

{'K': 10, 'lr': 0.04015033453193613, 'beta': 0.9194450312942336, 'gamma': 13, 'epochs': 59}


 30%|███       | 3/10 [03:54<08:48, 75.52s/it]

{'K': 13, 'lr': 0.061889367038880476, 'beta': 1.0124316673343328, 'gamma': 7, 'epochs': 57}


 40%|████      | 4/10 [05:05<07:24, 74.12s/it]

{'K': 18, 'lr': 0.09124107449615401, 'beta': 0.9154476785848042, 'gamma': 5, 'epochs': 64}


 50%|█████     | 5/10 [06:39<06:40, 80.02s/it]

{'K': 12, 'lr': 0.09134714584730058, 'beta': 0.8747537042415194, 'gamma': 5, 'epochs': 50}


 60%|██████    | 6/10 [07:38<04:54, 73.69s/it]

{'K': 15, 'lr': 0.022424125657719913, 'beta': 0.895327792394344, 'gamma': 14, 'epochs': 58}


 70%|███████   | 7/10 [08:56<03:44, 74.99s/it]

{'K': 12, 'lr': 0.039828263821539006, 'beta': 0.8090874407948825, 'gamma': 11, 'epochs': 66}


 80%|████████  | 8/10 [10:13<02:31, 75.51s/it]

{'K': 15, 'lr': 0.04029155477339579, 'beta': 0.8854276678463482, 'gamma': 14, 'epochs': 77}


 90%|█████████ | 9/10 [11:55<01:23, 83.50s/it]

{'K': 18, 'lr': 0.01829605681505877, 'beta': 0.8838056958032741, 'gamma': 11, 'epochs': 76}


100%|██████████| 10/10 [13:48<00:00, 82.86s/it]


In [62]:
hp

Unnamed: 0,K,beta,epochs,gamma,lr,train_mse,val_mse
6,15.0,0.895328,58.0,14.0,0.022424,1.95591,1.955168
8,15.0,0.885428,77.0,14.0,0.040292,2.04069,2.043973
2,10.0,0.919445,59.0,13.0,0.04015,2.360247,2.369465
7,12.0,0.809087,66.0,11.0,0.039828,3.47685,3.468857
9,18.0,0.883806,76.0,11.0,0.018296,6.176782,6.153494
3,13.0,1.012432,57.0,7.0,0.061889,10.005748,9.978112
1,14.0,0.929989,79.0,6.0,0.059984,11.811594,11.785822
5,12.0,0.874754,50.0,5.0,0.091347,12.618779,12.587218
0,16.0,0.875194,53.0,5.0,0.069117,13.479378,13.445336
4,18.0,0.915448,64.0,5.0,0.091241,13.801077,13.768661


In [65]:
testList = [
    [1,1,4],
    [1,4,5],
    [4,7,1],
    [7,2,4],
    [7,1,2],
    [9,9,4]
]

vec = [np.random.randint(1,10) for _ in range(10)]

testListdf = pd.DataFrame(testList, columns = ['uid', 'iid', 'rating'])
testListdf

Unnamed: 0,uid,iid,rating
0,1,1,4
1,1,4,5
2,4,7,1
3,7,2,4
4,7,1,2
5,9,9,4


In [67]:
vec

[4, 4, 5, 7, 2, 9, 1, 7, 9, 6]

In [68]:
testSparse = sparse.csc_matrix((testListdf['rating'].values,(testListdf['uid'].values, testListdf['iid'].values)),shape=(10,10))

In [69]:
testSparse.dot(vec)

array([ 0, 26,  0,  0,  7,  0,  0, 28,  0, 24])

## My own

In [3]:
num_items = len(train.iid.unique())
num_users = len(train.uid.unique())

print(f"The training set has {num_items} books and {num_users}  users.")

The training set has 10000 books and 53424  users.


### Step 1

First I want to create the matrices with which I can perform gradient descent. I'll need 
   1. Feature embeddings $\bf{A}$ and $\bf{B}$, initialized randomly.
   2. A sparse user-item interaction matrix $\bf{Y}$ (also known as the utility matrix).

In [36]:
# for now let's just have 10 features (K=10)
K=10 # this is a hyperparameter that should be tuned
alpha=11 # this is a hyperparameter that should be tuned

user_features = np.random.uniform(0,alpha/K,(num_users,K))
item_features = np.random.uniform(0,alpha/K,(num_items,K))

utility = sparse.csc_matrix((train.rating.values, (train.uid.values, train.iid.values)), shape=(num_users, num_items))

### Step 2

Now we need a cost function and it's gradient. Let's go with mean square error (MSE) for now. For us MSE looks like

$$ \textbf{MSE} = \frac{1}{N}\sum_{i,j}^{N} (y_{ij} - a_{ik}b_{kj})^2 $$ 

where $N$ is the number of non-null entries in the utility matrix. Note that the lower case names of the utility and embedded matrices represent the indexed entries of each matrix. Capital and bolded letters indicate a matrix representation of the objects. Repeated indices imply summation. 

If we include regularization in this loss function, we get:

$$ \textbf{L} = \textbf{MSE} + \lambda_a a_{\ell m}a_{m \ell} +  \lambda_b b_{\ell m}b_{m \ell} $$. 

Where $\lambda_a$ and $\lambda_b$ are just regularization parameters. As it turns out, the matrix $\bf{A} \bf{B}^{T}$ is a large matrix. We can avoid computing it by just computing each prediction row individually and adding it to the training DataFrame as a new column, then we can create a sparse matrix from that! Just as in [here](https://towardsdatascience.com/recommender-systems-matrix-factorization-using-pytorch-bd52f46aa199). In fact, our `predict` method will be more or less the same. The gradient of this function is fairly simple to compute:

$$ \vec{\nabla} \textbf{L} = -2 $$

In [68]:
def predict(df, user_features, item_features):
    """ This function computes df["prediction"] without doing (U*V^T).
    
    Computes df["prediction"] by using elementwise multiplication of the corresponding embeddings and then 
    sum to get the prediction u_i*v_j. This avoids creating the dense matrix U*V^T.
    """
    df['prediction'] = np.sum(np.multiply(item_features[df['iid']],user_features[df['uid']]), axis=1)
    return df


def meanSquareError(df, utility, user_features, item_features):
    '''
    This function computes the MSE for a given set of feature matrices. Remember we never hold the 
    prediction utility matrix. We add a prediction column to the pandas dataframe then create a sparse
    matrix of predictions when we need it.
    '''
    temp = predict(train, user_features=user_features, item_features=item_features)
    prediction = sparse.csc_matrix((temp.prediction.values, (temp.uid.values, temp.iid.values)),
                                       shape=(user_features.shape[0], item_features.shape[0]))
    error = utility-prediction
    return (1/len(df))*np.sum(error.power(2))

def gradient_reg(df, utility, user_features, item_features, lmbda_a, lmbda_b):
    '''
    This function computes the regularized gradient.
    '''
    temp = predict(train, user_features=user_features, item_features=item_features)
    prediction = sparse.csc_matrix((temp.prediction.values, (temp.uid.values, temp.iid.values)),
                                       shape=(user_features.shape[0], item_features.shape[0]))
    error = utility-prediction
    grad_user = (-2/df.shape[0])*(error*item_features) + 2*lmbda_a*user_features
    grad_item = (-2/df.shape[0])*((error.T)*user_features) + 2*lmbda_b*item_features
    return grad_user, grad_item

In [62]:
test = MSE(df = train, utility = utility, user_features = user_features, item_features = item_features)
test

2.5527136723429513

In [63]:
test = gradient_reg(df = train, utility = utility, user_features = user_features, item_features = item_features,
                   lmbda_a = 0.0002, lmbda_b=0.0002)

(53424, 10000) (53424, 10) (10000, 10)


In [66]:
def gradient_descent(df, 
                    utility, 
                    user_features, 
                    item_features, 
                    lmbda_a=0.002, 
                    lmbda_b=0.002,
                    utility_val=None, 
                    iterations=10, 
                    learning_rate=0.05, 
                    beta=0.9, 
                    updates=True):

    grad_user, grad_item = gradient_reg(df=df, 
                                        utility=utility, 
                                        user_features=user_features, 
                                        item_features=item_features, 
                                        lmbda_a=lmbda_a,
                                        lmbda_b=lmbda_b)
    v_user = grad_user
    v_item = grad_item
    for i in range(iterations):
        grad_user, grad_item = gradient_reg(df=df, 
                                            utility=utility, 
                                            user_features=user_features, 
                                            item_features=item_features, 
                                            lmbda_a=lmbda_a,
                                            lmbda_b=lmbda_b)
        v_user = beta*v_user + (1-beta)*grad_user
        v_item = beta*v_item + (1-beta)*grad_item
        user_features = user_features - learning_rate*v_user
        item_features = item_features - learning_rate*v_item
        if(not (i+1) % 50) and (updates):
            print("\niteration", i+1, ":")
            print("train mse:",  meanSquareError(df, utility, user_features, item_features))
            if utility_val is not None:
                print("validation mse:",  meanSquareError(df, utility_val, user_features, item_features))

    if utility_val:
        return user_features, item_features, meanSquareError(df, utility, user_features, item_features), meanSquareError(utility_val, user_features, item_features)
    else:
        return user_features, item_features, meanSquareError(df, utility, user_features, item_features)

In [69]:
gradient_descent(df = train, utility = utility, user_features = user_features, item_features = item_features)

TypeError: meanSquareError() missing 1 required positional argument: 'item_features'

## My own from file

### defs

In [16]:
import pandas as pd
import numpy as np
from scipy import sparse
from tqdm import tqdm

def create_sparse_matrix(df, rows, cols, column_name="rating"):
    ''' 
    Creates a scipy sparse matrix
    Parameters
    ----------
    df : pandas DataFrame
        The data that will be made a sparse matrix
    rows : int
        number of rows in the matrix
    columns : int
        number of columns in the matrix
    column_name : 

    Returns
    -------

    '''
    return sparse.csc_matrix((df[column_name].values, (df['uid'].values, df['iid'].values)), shape=(rows, cols))


def create_embeddings(n, K, gamma=7):
    ''' 

    Parameters
    ----------
    df : pandas DataFrame

    Returns
    -------

    '''
    return gamma*np.random.rand(n, K) / K

def predict(df, user_features, item_features):
    ''' 
    This function performs the element wise prediction of each item for each user. It avoids building the 
    approximated utility matrix in order to save space

    Parameters
    ----------
    df : pandas DataFrame
        This is the pandas dataframe of the data predictions are to be made on.
    user_features : numpy array
        The user feature embeddings.
    item_features : numpy Array
        The item feature embeddings.

    Returns
    -------
    df : pandas DataFrame
        The same dataframe as inputted but with a new/updated predictions column. 

    '''
    df['prediction'] = np.sum(np.multiply(
        item_features[df['iid']], user_features[df['uid']]), axis=1)
    return df


def meanSquareError(df, user_features, item_features):
    ''' 
    Computes the mean square error on the predictions. 

    Parameters
    ----------
    df : pandas DataFrame
        This is the pandas dataframe of the data predictions are to be made on.
    user_features : numpy array
        The user feature embeddings.
    item_features : numpy Array
        The item feature embeddings.

    Returns
    -------
    mse : float
        The mean square error for the given embedding matrices. 

    '''
    # we need to actually make predictions then convert those into a sparse matrix
    utility = create_sparse_matrix(df, user_features.shape[0], item_features.shape[0])
    temp = predict(df=df, user_features=user_features,
                   item_features=item_features)
    prediction = sparse.csc_matrix((temp.prediction.values, (temp.uid.values, temp.iid.values)),
                                   shape=(user_features.shape[0], item_features.shape[0]))

    # now let's get an error matrix then return the MSE.
    error = utility-prediction
    mse = (1/len(df))*np.sum(error.power(2))
    return mse


def gradient_reg(df, utility, user_features, item_features, lmbda_a, lmbda_b):
    ''' 
    Computes the regularized gradient of the mean square error. Returns the gradient
    in the 'directions' of both embedded matrices.

    Parameters
    ----------
    df : pandas DataFrame
        This is the pandas dataframe of the data predictions are to be made on.
    utility : scipy sparse matrix
        The sparse utility matrix of all of the ratings.
    user_features : numpy array
        The user feature embeddings.
    item_features : numpy Array
        The item feature embeddings.
    lmbda_a, lmbda_b : float
        These parameters are the regularization coefficients. 

    Returns
    -------
    grad_user : numpy array
        gradient of the MSE, partial derivative w.r.t. the user 
    grad_item : numpy array
        gradient of the MSE, partial derivative w.r.t. the item 

    '''
    # we need to actually make predictions then convert those into a sparse matrix
    temp = predict(df=df, user_features=user_features,
                   item_features=item_features)
    prediction = sparse.csc_matrix((temp.prediction.values, (temp.uid.values, temp.iid.values)),
                                   shape=(user_features.shape[0], item_features.shape[0]))
    # now let's get an error matrix
    error = utility-prediction

    # we can now compute the gradient 
    # we will compute each 'direction' separately and return them separately
    grad_user = (-2/df.shape[0]) * (error*item_features) + 2*lmbda_a*user_features
    grad_item = (-2/df.shape[0])*((error.T) * user_features) + 2*lmbda_b*item_features
    return grad_user, grad_item


def gradient_descent(df,
                     utility,
                     user_features,
                     item_features,
                     val=None,
                     lmbda_a=0.002,
                     lmbda_b=0.002,
                     epochs=200,
                     learning_rate=0.05,
                     beta=0.9,
                     updates=True):
    ''' 
    Performs gradient descent to find the optimal embedded matrices. A momentum term
    is added to arrive at the minimum sooner. This function will iterate a number of times
    specified by the user. It will update the user every 50 epochs on how the cost function 
    looks. Finally it will return the new embedded matrices and the final cost values.

    Parameters
    ----------
    df : pandas DataFrame
        This is the pandas dataframe of the data predictions are to be made on.
    utility : scipy sparse matrix
        The sparse utility matrix of all of the ratings.
    user_features : numpy array
        The user feature embeddings.
    item_features : numpy Array
        The item feature embeddings.
    val : pandas DataFrame DEFAULT=None
        The validation set to check the algorithm against.
    lmbda_a, lmbda_b : float, DEFAULT=0.002 for both
        These parameters are the regularization coefficients. 
    epochs : int, DEFAULT=200
        The number of iterations on which to perform GD
    learning_rate : float, DEFAULT=0.05
        The learning rate for GD.
    beta : float, DEFAULT=0.9
        The momentum coefficient.
    updates: bool, DEFAULT=True
        The option to print periodic updates of the MSE as the algorithm runs.
        Updates will print every epoch with the MSE of the set. It will give
        the MSE of the validation set if provided.

    Returns
    -------
    user_features : numpy array
        The optimized user feature embeddings.
    item_features : numpy Array
        The optimized item feature embeddings.
    mse_train : float
        The final MSE of the training set
    mse_val : float, OPTIONAL
        the final MSE of the validation set


    '''

    # get the initial gradient term so we can perform the first
    # round of GD. Needed for momentum terms
    grad_user, grad_item = gradient_reg(df=df,
                                        utility=utility,
                                        user_features=user_features,
                                        item_features=item_features,
                                        lmbda_a=lmbda_a,
                                        lmbda_b=lmbda_b)
    v_user = grad_user
    v_item = grad_item
    for i in range(epochs):
        # update the gradient based on new feature matrices
        grad_user, grad_item = gradient_reg(df=df,
                                            utility=utility,
                                            user_features=user_features,
                                            item_features=item_features,
                                            lmbda_a=lmbda_a,
                                            lmbda_b=lmbda_b)

        # compute our update matrices
        v_user = beta*v_user + (1-beta)*grad_user
        v_item = beta*v_item + (1-beta)*grad_item

        # update the embedded matrices
#         user_features = user_features - learning_rate*v_user
#         item_features = item_features - learning_rate*v_item
        
        user_features = user_features - learning_rate*grad_user
        item_features = item_features - learning_rate*grad_item

        # just print out values every so often to see what is happening 
        # with the algo.
        if(not (i+1) % 50) and (updates):
            print("\niteration", i+1, ":")
            print("train mse:",  meanSquareError(
                df, user_features, item_features))
            if val is not None:
                print("validation mse:",  meanSquareError(
                    val, user_features, item_features))

    # compute the final MSE
    mse_train = meanSquareError(df, user_features, item_features)

    # here we just check if the validation set is passed in so we can return the final cost of that as well if needed.
    if val:
        mse_val = meanSquareError(val, user_features, item_features)
        return user_features, item_features, mse_train, mse_val
    else:
        return user_features, item_features, mse_train


def sample_hyperparameters():
    ''' 
    This function returns a random value for each hyperparameter for MSE gradient descent. 
    '''
    return {
        "K": np.random.randint(5, 20),
        "lr": np.random.normal(0.1, 0.05),
        "beta": np.random.normal(0.9, 0.05),
        "gamma": np.random.randint(5, 15),
        "epochs": np.random.randint(50, 200)
    }


class MSErec():
    '''
    This class will perform all ML processes to predict books for users of our app. It will create 
    user/item matrices, perform gradient descent (with momentum), and output predictions!
    '''

    def __init__(self, df, test=None, validation=None):
        ''' 
        Parameters
        ----------
        df : pandas DataFrame

        Returns
        -------

        '''
        # let's create a class dataframe object first
        self.df=df
        
        num_uid = len(df.uid.unique())
        num_iid = len(df.iid.unique())

        # create sparse matrices
        self.utility = create_sparse_matrix(df, num_uid, num_iid)
        # only create matrices for test and val if passed
        if test:
            self.test = create_sparse_matrix(test, num_uid, num_iid)
        else:
            self.test = None
        if validation:
            self.validation = create_sparse_matrix(validation, num_uid, num_iid)
        else:
            self.validation = None
    

    def trainModel(self, K=15, beta=0.90, epochs=60, gamma=14, lr=0.025):
        ''' 
        optimal : K=15, beta=0.90, epochs=60, gamma=14, lr=0.025
        Parameters
        ----------
        df : pandas DataFrame

        Returns
        -------

        '''
        # this initializes some embedding matrices
        num_uid = self.utility.shape[0]
        num_iid = self.utility.shape[1]
        self.user_features = create_embeddings(num_uid, K=K, gamma=gamma)
        self.item_features = create_embeddings(num_iid, K=K, gamma=gamma)
        
        # now perform GD, check if we passed a validation set as well.
        if self.validation is not None:
            self.emb_user, self.emb_item, cost_train, cost_val = gradient_descent(df = self.df,
                                                                              utility = self.utility,
                                                                              user_features = self.user_features,
                                                                              item_features = self.item_features,
                                                                              epochs=epochs,
                                                                              val = self.validation,
                                                                              updates=False)
            return (cost_train, cost_val)
    
        else:
            self.emb_user, self.emb_item, cost_train = gradient_descent(df = self.df,
                                                                      utility = self.utility,
                                                                      user_features = self.user_features,
                                                                      item_features = self.item_features,
                                                                      epochs=epochs,
                                                                      updates=False)
            return (cost_train,)

    def paramSearch(self, num_samples=5):
        ''' 
        Parameters
        ----------
        df : pandas DataFrame

        Returns
        -------

        '''
        hyperparams = pd.DataFrame()
        print('Searching for optimal parameters...')
        for i in tqdm(range(num_samples)):
            # get a random sample of hyperparameters
            params = sample_hyperparameters()
            cost = self.trainModel(K=params["K"],
                                   beta=params["beta"], 
                                   epochs=params["epochs"], 
                                   gamma=params["gamma"], 
                                   lr=params["lr"])
            
            params['train_mse'] = cost[0]   
            if len(cost)==2:
                params['val_mse'] = cost[1]
            hyperparams = hyperparams.append(params, ignore_index=True)
            
        return hyperparams.sort_values(by='train_mse')
    
    def getPredictions(self):
        self.df = predict(df = self.df, 
                          user_features = self.user_features, 
                          item_features = self.item_features)


### testing ground

In [17]:
model = MSErec(df = train)

In [18]:
model.paramSearch(num_samples=10)

  0%|          | 0/10 [00:00<?, ?it/s]

Searching for optimal parameters...


100%|██████████| 10/10 [21:27<00:00, 128.71s/it]


Unnamed: 0,K,beta,epochs,gamma,lr,train_mse
8,18.0,0.8788,156.0,14.0,0.083615,3.046685
6,10.0,0.911529,130.0,10.0,0.069907,3.760387
4,19.0,0.916846,123.0,11.0,0.19764,6.740152
7,7.0,0.906586,147.0,13.0,0.140935,6.988069
1,8.0,0.897428,198.0,7.0,0.068724,7.246549
3,5.0,0.834174,96.0,5.0,0.074484,8.470575
0,11.0,0.824203,171.0,7.0,0.037461,9.210968
9,16.0,0.839235,118.0,7.0,0.118355,11.077195
2,18.0,0.855389,115.0,7.0,0.156866,11.606652
5,17.0,0.81898,114.0,6.0,0.05051,12.562163


In [38]:
predict(train, model.user_features, model.item_features)

Unnamed: 0,iid,uid,rating,prediction
0,58,14930,5,1.511545
1,624,1747,4,0.805605
2,1,14930,3,1.210596
3,0,1747,5,1.207255
4,16,1747,4,0.994679
...,...,...,...,...
4780237,780,10703,4,0.905142
4780238,6500,11465,3,1.200742
4780239,3408,11465,1,1.385787
4780240,6778,11465,3,1.196681


In [10]:
model.df.shape

(53424, 10000)

In [19]:
len((1,))

1

In [20]:
a = (1,)

In [21]:
a[0]

1

In [39]:
alist = [
    [1,2,3],
    [2,2,4],
    [3,2,5],
    [1,1,5],
    [2,1,1],
    [1,3,7],
    [2,3,6],
    [3,4,1]
]
a = pd.DataFrame(alist, columns = ['iid', 'uid', 'rating'])

In [40]:
a

Unnamed: 0,iid,uid,rating
0,1,2,3
1,2,2,4
2,3,2,5
3,1,1,5
4,2,1,1
5,1,3,7
6,2,3,6
7,3,4,1


In [41]:
a[a.uid.isin([1,2])]

Unnamed: 0,iid,uid,rating
0,1,2,3
1,2,2,4
2,3,2,5
3,1,1,5
4,2,1,1


In [6]:
import msePipeline as mp


pipeline = mp.MSEPipeline()
pipeline.preprocess()

model = mp.MSErec(df = pipeline.archived_ratings)
model.trainModel()

KeyboardInterrupt: 

In [2]:
test = model.getPredictions(pipeline.user_predictions)
test


GROUP1 0       1
8865    1
6806    1
4759    1
8857    1
       ..
3379    1
9526    1
5432    1
7481    1
2047    1
Name: iid, Length: 10000, dtype: int64
GROUP2         uid   iid  prediction
375   53424   880    4.344810
3331  53424  1478    4.227951
2107  53424  1273    4.199548
6466  53424  9997    4.189937
9327  53424  9225    4.183980
...     ...   ...         ...
2281  53424   628    1.399003
7355  53424  8124    1.374153
2185  53424  7970    1.246541
3928  53424  8370    1.208228
4310  53424  3584    1.129501

[10000 rows x 3 columns]


Unnamed: 0,uid,iid,prediction
375,53424,880,4.34481
3331,53424,1478,4.227951
2107,53424,1273,4.199548
6466,53424,9997,4.189937
9327,53424,9225,4.18398
8274,53424,6753,4.168283
6572,53424,6583,4.149586
7441,53424,6023,4.112508
9567,53424,8012,4.076821
9743,53424,6158,4.074473
