In [1]:
import os
import tensorflow as tf
import numpy as np
import math
from time import time
import pandas as pd
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [2]:
def map_to_existing(pq, filter):
    res = tf.math.multiply(pq,filter)
    return tf.math.multiply(pq,filter)

In [3]:
@tf.function
def GradientDescent_optimized(A,filter_matrix,dim=10,alpha=.1,iters=1):
    # Ascertain what dimensions we are in
    rows    = tf.Variable(A.shape[0],dtype=tf.dtypes.int32)
    cols    = tf.Variable(A.shape[1],dtype=tf.dtypes.int32)
    # Create the P and Q matrices
    p       = tf.Variable(tf.random.uniform(shape = [rows,dim],minval=1, maxval=2,dtype=tf.dtypes.float32))
    q       = tf.Variable(tf.random.uniform(shape = [dim,cols],minval=1, maxval=2,dtype=tf.dtypes.float32))

    # init the constants for a loop
    iteration = tf.Variable(0,dtype=tf.dtypes.int32)
    rmses = []
    # Run 'iter' times
    while tf.less(iteration,iters):
        i = tf.Variable(0,dtype=tf.dtypes.int32)
        j = tf.Variable(0,dtype=tf.dtypes.int32)
        iteration.assign_add(1)
        # iter through cols
        while tf.less(j, cols):

            # iter through rows
            while tf.less(i, rows):

                # skip if 0
                if tf.equal(0,filter_matrix[i][j]):
                    i.assign_add(1)
                    continue

                # get q col and p row
                q_j = q[:,j]
                p_i = p[i]

                # find the err present
                err = tf.math.subtract(A[i][j],tf.tensordot(q_j,p_i,axes=1))

                # find where to nudge down the gradient
                nudge = tf.math.multiply(err,alpha)

                # update p and q vals
                p[i].   assign  (tf.math.add(  p_i,    tf.math.multiply(q_j,nudge)))
                q[:,j]. assign  (tf.math.add(  q_j,    tf.math.multiply(p_i,nudge)))

                i.assign_add(1)
            j.assign_add(1)
        error = RMSE(A,filter_matrix,p,q)
        print(f"rmse now: {error}",TAN)
        rmses.append(error)
    return p,q, error

In [4]:

# A is assumed to be the Sparse Matrix of Ratings with many holes
def RMSE(A, filter,p,q):

    # 1 / T
    Tinv = tf.math.count_nonzero(filter,dtype=tf.dtypes.float32)

    # build pq with only elements that exist in A
    pq = map_to_existing(tf.matmul(p,q),filter)

    # Find distances squared from A
    A_pq = tf.math.subtract(A,pq)
    A_pq_square = tf.square(A_pq)


    # Find sum of distances
    sum = tf.reduce_sum(A_pq_square)

    # Return root of distances
    return math.sqrt(sum * Tinv)


In [None]:
# For s**ts and giggles
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
tf.config.run_functions_eagerly(True)

#TYPES
_f32            = tf.dtypes.float32
_i32            = tf.dtypes.int32
#DATASET LOCATIONS
_RATINGS_SMALL  = os.path.join("ml-latest-small","ratings.csv")
_MOVIE_ID_SMALL = os.path.join("ml-latest-small","movies.csv")
_HEADERS        = ["Black Panther",
                   "Pitch Perfect",
                   "Star Wars: The Last Jedi",
                   "It",
                   "The Big Sick",
                   "Lady Bird",
                   "Pirates of the Caribbean",
                   "Despicable Me",
                   "Coco",
                   "John Wick",
                   "Mamma Mia",
                   "Crazy Rich Asians",
                   "Three Billboards Outside Ebbings, Missouri",
                   "The Incredibles"]

_MOVIE_ID_MAP      = {}
_ROW_MAP           = {}


def fill(df,method):
    for col in df:
        if method == 'mean':
            replacement = df[col].mean()
        elif method   == 'zero':
            replacement = 0
        df[col].fillna(value = replacement, inplace = True)


def read_data():
    print(f"\tReading data")
    t1 = time()

    # Read 'ratings.csv' into a DataFrame from the small set
    small_ratings   = pd.read_csv(  _RATINGS_SMALL,
                                    dtype = {   'userId'    :   np.float32,
                                                'movieId'   :   np.float32,
                                                'rating'    :   np.float32,
                                                'timestamp' :   np.float32})

    small_movie_ids  = pd.read_csv(  _MOVIE_ID_SMALL,
                                    dtype = {   'movieId' : np.float32})

    # Convert the dataframe to correct matrix format: (rows=MovieId, cols=userId)
    small_ratings_matrix = small_ratings.pivot( index='movieId',    columns='userId',   values='rating')

    # Map movieId to row and vice versa
    for row, id in enumerate(small_movie_ids['movieId']):
        _MOVIE_ID_MAP[row]  = id
        _ROW_MAP[id]        = row
    #info
    # Fill with our choice
    fill(small_ratings_matrix,'zero')

    # convert the dataframe to a Tensor
    partial_ratings_tensor  = tf.convert_to_tensor(  small_ratings_matrix,   dtype = _f32)

    # convert all non_zero elements to 1
    partial_ratings_filter  = tf.sparse.to_dense(tf.sparse.map_values(tf.ones_like,tf.sparse.from_dense(partial_ratings_tensor)))
    partial_ratings_filter  = tf.cast(partial_ratings_filter, dtype=_f32)

    print(f"\tRead data in {(time()-t1):.3f} seconds")
    return partial_ratings_tensor, partial_ratings_filter

ratings, filter_matrix = read_data()
# Ascertain what dimensions we are in
rows    = tf.Variable(ratings.shape[0],dtype=tf.dtypes.int32)
cols    = tf.Variable(ratings.shape[1],dtype=tf.dtypes.int32)

alphas = [.001,.0001]
iter_val = [1,3,5]
dimensions = [40,50]


runs =      {a : { d : {'time' : [], 'rmse' : []} for d in dimensions} for a in alphas}
colors =    {.00001 : 'k', .5 : 'g', .000001 : 'r', .05 : 'm', .01 : 'c', .001 : 'b', .0001 : 'y'}

p,q,errors = GradientDescent_optimized(ratings,filter_matrix,dim=20,iters=10,alpha = .001)



	Reading data
	Read data in 0.341 seconds
