In [1]:
import tensorflow as tf

from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, Reshape, Conv2DTranspose, Activation
from tensorflow.keras import Model, backend as K
from tensorflow.keras.regularizers import l2

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

%matplotlib inline

In [2]:

ratings = pd.read_csv('../Data/MovieLens/ml-latest-small/ratings.csv')
movies = pd.read_csv('../Data/MovieLens/ml-latest-small/movies.csv', index_col='movieId')

In [65]:
def create_rating_matrix(ratings):
    nr_users = int(max(ratings['userId']))
    nr_movies = int(max(ratings['movieId']))
    rating_matrix = np.zeros((nr_users, nr_movies))
    rating_mask = np.zeros((nr_users, nr_movies))
    for index, row in ratings.iterrows():
      if row['movieId'] < nr_movies and row['userId'] < nr_movies:
        rating_matrix[int(row['userId']) - 1, int(row['movieId']) - 1] = row['rating']
        rating_mask[int(row['userId']) - 1, int(row['movieId']) - 1] = 1.0
    return rating_matrix, rating_mask

def convert_to_implicit(rating_matrix):
    return rating_matrix - 3.0 >= 0

def augment_unobserved(rating_mask, rate = 0.2):
    # augment each users observations with unobserved
    
    def activate_ratings(single_row):
        unobserved = single_row == 0.
        nr_observed = len(single_row.nonzero()[0])
        new_observations = round(nr_observed * rate)
        
        single_row[np.random.choice(unobserved.nonzero()[0], new_observations)] = 1.0
        return single_row
    
    return np.apply_along_axis(activate_ratings, 1, rating_mask)


array([[1., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 1., 1., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]])

In [66]:
rating_matrix, rating_mask = create_rating_matrix(ratings)
binary_ratings = convert_to_implicit(rating_matrix)
rating_mask = augment_unobserved(rating_mask)

100835.0

In [70]:
rating_mask.sum()

120991.0