In [None]:
%matplotlib inline
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import scipy
import scipy.io
import scipy.sparse as sp
from scipy.sparse.linalg import spsolve

from math import sqrt

%load_ext autoreload
%autoreload 2

In [None]:
import sys
sys.path.insert(1, '../utilities/')
from helpers import *
from plots import *

# Load of training data

The ID indicates both the user and the item : user are identified as "r"+"index" from 1 to 10000, and items as "c"+ "index" from 1 to 1000. The format of the ID are consequently : r index(user) _ c index(item).
The ratings are given as integers from 1 to 5.

We compute them into a matrix with items as row and users as columns. All of the missing ratings are set as 0 and should be predicted to have a valid rating format.

In [None]:
DATA_TRAIN_PATH = '../Datasets/data_train.csv'
ratings = load_data(DATA_TRAIN_PATH)

In [None]:
ratings.shape

# Data analysis

### Plot the number of ratings per movie and user

In [None]:
num_items_per_user, num_users_per_item = plot_raw_data(ratings)

print("min # of items per user = {}, min # of users per item = {}.".format(
        min(num_items_per_user), min(num_users_per_item)))

### Data preprocessing - Split the data into a train and test set

In [None]:
def split_data(ratings, num_items_per_user, num_users_per_item,
               min_num_ratings, p_test=0.1):
    """split the ratings to training data and test data.
    Args:
        min_num_ratings: 
            all users and items we keep must have at least min_num_ratings per user and per item. 
    """
    # set seed
    np.random.seed(988)
    
    # select user and item based on the condition.
    valid_users = np.where(num_items_per_user >= min_num_ratings)[0]
    valid_items = np.where(num_users_per_item >= min_num_ratings)[0]
    valid_ratings = ratings[valid_items, :][: , valid_users]  
    
    # split the data and return train and test data.
    # we only consider users and movies that have more than 10 ratings

    ind_test = np.random.choice(valid_ratings.nnz, int(valid_ratings.nnz*p_test), replace=False)
    ind_train = np.delete(np.arange(valid_ratings.nnz),ind_test)
    
    valid_ratings_coo = valid_ratings.tocoo()
    data = valid_ratings_coo.data
    row = valid_ratings_coo.row
    col = valid_ratings_coo.col
    
    test = sp.coo_matrix((data[ind_test], (row[ind_test], col[ind_test])), shape=valid_ratings.get_shape())
    train = sp.coo_matrix((data[ind_train], (row[ind_train], col[ind_train])), shape=valid_ratings.get_shape()) 
    
    print("Total number of nonzero elements in origial data:{v}".format(v=ratings.nnz))
    print("Total number of nonzero elements in train data:{v}".format(v=train.nnz))
    print("Total number of nonzero elements in test data:{v}".format(v=test.nnz))
    return valid_ratings, train, test

In [None]:
valid_ratings, train, test = split_data(
    ratings, num_items_per_user, num_users_per_item, min_num_ratings=10, p_test=0.1)
plot_train_test_data(train, test)

# Standard implementations

## Implementing Baselines 

### Use the global mean to do the prediction

In [None]:
def baseline_global_mean(train, test):
    """baseline method: use the global mean."""  
    
    return sqrt(calculate_mse(test.data,np.mean(train.data))/(test.nnz))

baseline_global_mean(train, test)

### Use the user means as the prediction

In [None]:
def baseline_user_mean(train, test):
    """baseline method: use the user means as the prediction."""
    mse = 0
    num_items, num_users = train.shape
    
    #Sum over nth user
    sum_ratings_movie = np.squeeze(np.asarray(train.sum(0)))    # sum of the nonzero elements, for each row
    count_ratings_movie = np.diff(train.tocsc().indptr)         # count of the nonzero elements, for each row
    mean_rating_movie = sum_ratings_movie/count_ratings_movie
    return sqrt(calculate_mse(test.data,mean_rating_movie[test.col])/(test.nnz))

baseline_user_mean(train, test)

### Use the item means as prediction

In [None]:
def baseline_item_mean(train, test):
    """baseline method: use item means as the prediction."""
    mse = 0
    num_items, num_users = train.shape
    
    #Sum over dth movie
    sum_ratings_user = np.squeeze(np.asarray(train.sum(1)))    # sum of the nonzero elements, for each row
    count_ratings_user = np.diff(train.tocsr().indptr)         # count of the nonzero elements, for each row
    mean_rating_user = sum_ratings_user/count_ratings_user

    return sqrt(calculate_mse(test.data,mean_rating_user[test.row])/(test.nnz))
    
baseline_item_mean(train, test)

### Use the item/user means as prediction

In [None]:
from scipy.sparse.linalg import spsolve

def baseline_item_user(train, test):
    """baseline method: find best parameters for the model y_dn = w_0 + w_item[d] + w_user[n] (D+N+1) parameters
       and make a prediction."""
    
    global_mean = np.mean(train.data)
    
    #Sum over nth user
    sum_ratings_movie = np.squeeze(np.asarray(train.sum(0)))    # sum of the nonzero elements, for each row
    count_ratings_movie = np.diff(train.tocsc().indptr)         # count of the nonzero elements, for each row
    
    #Sum over dth movie
    sum_ratings_user = np.squeeze(np.asarray(train.sum(1)))    # sum of the nonzero elements, for each col
    count_ratings_user = np.diff(train.tocsr().indptr)         # count of the nonzero elements, for each col
    
    num_items, num_users = train.shape
    
    # Constructing linear system defining the model's optimal parameters in form of a matrix
    
    # Matrix of the same shape as ratings, 1 if rating present, 0 otherwise
    mask_train = sp.coo_matrix((np.ones(train.nnz), (train.row, train.col)), shape=train.shape) 
    
    A = sp.hstack((sp.diags(count_ratings_user), mask_train))
    A = sp.vstack((A, sp.hstack((mask_train.T, sp.diags(count_ratings_movie)))))
    A = sp.hstack((A, sp.coo_matrix(np.concatenate((count_ratings_movie,count_ratings_user))).T))
    A = sp.vstack((A, sp.coo_matrix(np.ones(num_items+num_users+1))))
    
    b = np.append(np.concatenate((sum_ratings_user, sum_ratings_movie)),global_mean)
    
    # Solving the system
    x = spsolve(A.tocsc(),b)
    
    # Extracting the parameters w_0, w_item[d] and w_user[n] 
    w_item, w_user, w_0 = np.split(x,np.array([num_items,num_items+num_users]))
    
    # Plot prediction versus test values
    plt.title("predicted vs actual ratings")
    plt.xlabel("predicted ratings")
    plt.ylabel("actual ratings")
    plt.scatter(w_item[test.row] + w_user[test.col] + w_0, test.data,s=0.1)
    
    return sqrt(calculate_mse(test.data, w_item[test.row] + w_user[test.col] + w_0)/(test.nnz))
    
baseline_item_user(train, test)

### Learn the Matrix Factorization using SGD

#### Initialize matrix factorization

In [None]:
def init_MF(train, num_features):
    """init the parameter for matrix factorization."""
    
    # returns initialized with random values :
    #     user_features: shape = num_features, num_user
    #     item_features: shape = num_features, num_item

    
    max_initial_value = 2*sqrt(np.mean(train.data)/num_features)
    
    user_features = max_initial_value*np.random.rand(num_features, train.shape[1])
    item_features = max_initial_value*np.random.rand(num_features, train.shape[0])

    
    return user_features,item_features

#### Compute errors

In [None]:
def compute_error(data, user_features, item_features, nz):
    """compute the loss (MSE) of the prediction of nonzero elements."""

    # calculate rmse (we only consider nonzero entries.)
    approx_data_matrix = np.dot(item_features.T,user_features)
    return sqrt(calculate_mse(data,approx_data_matrix[nz])/(len(data)))

In [None]:
def compute_error(data, user_features, item_features, nz):
    """compute the loss (MSE) of the prediction of nonzero elements."""

    # calculate rmse (we only consider nonzero entries.)
    approx_data_matrix = np.dot(item_features.T,user_features)
    return sqrt(calculate_mse(data,approx_data_matrix[nz])/(len(data)))

In [None]:
def clip_pred(user_features,item_features):
    """clips the prediction of ratings below 1 to 1 or above 5 to 5 and inbetween clips 
     prediction near an integer to that integer."""
    approx_data_matrix = np.dot(item_features.T,user_features)
    approx_data_matrix[approx_data_matrix>4.9]=5;
    approx_data_matrix[approx_data_matrix<1.1]=1;
    approx_data_matrix[np.all([approx_data_matrix>1.9, approx_data_matrix<2.1],axis=0)]=2;
    approx_data_matrix[np.all([approx_data_matrix>2.9, approx_data_matrix<3.1],axis=0)]=3;
    approx_data_matrix[np.all([approx_data_matrix>3.9, approx_data_matrix<4.1],axis=0)]=4;

    return approx_data_matrix

In [None]:
def compute_clipped_error(data, user_features, item_features, nz):
    """compute the loss (MSE) of the clipped prediction of nonzero elements."""
    approx_data_matrix = clip_pred(user_features, item_features)
    return sqrt(calculate_mse(data,approx_data_matrix[nz])/(len(data)))

#### Matrix factorization SGD basic

In [None]:
def matrix_factorization_SGD(train, test):
    """matrix factorization by SGD."""
    # define parameters
    gamma = 0.05
    num_features = 25   # K in the lecture notes
    num_epochs = 30     # number of full iterations through the train set
    errors = [0]
    
    # set seed
    np.random.seed(988)

    # init matrix
    user_features, item_features = init_MF(train, num_features)
    
    # find the non-zero ratings indices 
    nz_row, nz_col = train.nonzero()
    nz_train = list(zip(nz_row, nz_col, train.data))

    print("learn the matrix factorization using SGD...")
    rmse_tr = compute_error(train.data, user_features, item_features, train.nonzero())
    rmse_te = compute_clipped_error(test.data, user_features, item_features, test.nonzero())
    print("initial RMSE on training set: {}, RMSE on testing set: {}.".format(rmse_tr,rmse_te))
    
    for it in range(num_epochs):        
        # shuffle the training rating indices
        np.random.shuffle(nz_train)
        
        # decrease step size
        gamma /= 1.2
        
        for d, n, x_dn in nz_train:
        # update matrix factorization.     

            item_features[:,d] += gamma*(x_dn - np.inner(item_features[:,d],user_features[:,n]))*user_features[:,n]
            user_features[:,n] += gamma*(x_dn - np.inner(item_features[:,d],user_features[:,n]))*item_features[:,d]
        
        rmse_tr = compute_error(train.data, user_features, item_features, train.nonzero())
        rmse_te = compute_clipped_error(test.data, user_features, item_features, test.nonzero())
        print("iter: {}, RMSE on training set: {}, RMSE on testing set: {}.".format(it, rmse_tr,rmse_te))
        
        errors.append(rmse_te)

    # evaluate the test error.
    rmse = compute_clipped_error(test.data, user_features, item_features, test.nonzero())
    print("RMSE on test data: {}.".format(rmse))

matrix_factorization_SGD(train, test)

#### Matrix factorization SGD regularized

In [None]:
def matrix_factorization_SGD_regularized(train, test, num_features, lambda_user, lambda_item, gamma, gamma_dec_step_size, num_epochs, seed, stop_criterion):
    """matrix factorization by SGD."""
    
    # set seed
    np.random.seed(seed)

    # init matrix
    user_features, item_features = init_MF(train, num_features)
    
    # find the non-zero ratings indices  
    nz_train = list(zip(train.row, train.col, train.data))
    
    print("learn the matrix factorization using SGD...")
    rmse_tr = [compute_error(train.data, user_features, item_features, train.nonzero())]
    rmse_te = [compute_clipped_error(test.data, user_features, item_features, test.nonzero())]
    print("initial RMSE on training set: {}, RMSE on testing set: {}.".format(rmse_tr[0],rmse_te[0]))
    
    for it in range(num_epochs):        
        # shuffle the training rating indices
        np.random.shuffle(nz_train)
        
        # decrease step size
        gamma /= gamma_dec_step_size
        
        for d, n, x_dn in nz_train:
        # update matrix factorization.

            item_features[:,d] += gamma*((x_dn - np.inner(item_features[:,d],user_features[:,n]))*user_features[:,n]-lambda_item*item_features[:,d])
            user_features[:,n] += gamma*((x_dn - np.inner(item_features[:,d],user_features[:,n]))*item_features[:,d]-lambda_user*user_features[:,n])
        
        rmse_tr.append(compute_error(train.data, user_features, item_features, train.nonzero()))
        rmse_te.append(compute_clipped_error(test.data, user_features, item_features, test.nonzero()))
        print("iter: {}, RMSE on training set: {}, RMSE on testing set: {}.".format(it, rmse_tr[-1],rmse_te[-1]))
        
        if np.isclose(rmse_tr[-1],rmse_tr[-2],stop_criterion) or rmse_tr[-1] > rmse_tr[0]:
            break
            
    # evaluate the test error.
    min_rmse_te = min(rmse_te)
    print("RMSE on test data: {}.".format(min_rmse_te))
    
    return min_rmse_te


In [None]:
# define parameters 
num_features = 40   # K in the lecture notes

lambda_user = 0.08
lambda_item = 0.08
    
gamma = 0.05
gamma_dec_step_size = 1.2
num_epochs = 30     # number of full passes through the train set
stop_criterion = 1e-4
    
seed = 988

matrix_factorization_SGD_regularized(train, test, num_features, lambda_user, lambda_item, gamma, gamma_dec_step_size, num_epochs, seed, stop_criterion)


##### Tuning Hyperparameters

In [None]:
def plot_simple_heatmap(data, title, xlabel, xticklabels, ylabel, yticklabels):

    f,a = plt.subplots()
    a.set_xlabel(xlabel)
    a.set_xticks(range(len(xticklabels)))
    a.set_xticklabels(xticklabels)
    a.set_ylabel(ylabel)
    a.set_yticks(range(len(yticklabels)))
    a.set_yticklabels(yticklabels)
    a.set_title(title)
    heatmap_corr = a.imshow(data)
    f.colorbar(heatmap_corr, ax=a)
    
data = np.random.rand(5,7)
plot_simple_heatmap(data, "title", "xlabel",np.arange(7), "ylabel",np.arange(5))

In [None]:
def plot_simple_heatmaps(data_1, data_2, fig_title, subtitle_1, subtitle_2, xlabel_shared, ylabel_shared):

    f,a = plt.subplots(2,1)
    
    a[0].set_xlabel(xlabel_shared)
    a[0].set_ylabel(ylabel_shared)
    a[0].set_title(subtitle_1)
    heatmap_0 = a[0].imshow(data_1)

    a[1].set_xlabel(xlabel_shared)
    a[1].set_ylabel(ylabel_shared)
    a[1].set_title(subtitle_2)
    heatmap_1 = a[1].imshow(data_2)

    plt.tight_layout()
    
    f.colorbar(heatmap_0,ax=a[0])
    f.colorbar(heatmap_1,ax=a[1])
    
    f.suptitle(fig_title)
    
data_1 = np.random.rand(200,300)
data_2 = np.random.rand(200,300)
plot_simple_heatmaps(data_1, data_2, 'fig_title', 'subtitle_1', 'subtitle_2', 'xlabel_shared', 'ylabel_shared')

Finding the best initial gamma and the best decrement step size to compare SGD with ALS.

In [None]:
"""
# define parameters 
num_features = 25   # K in the lecture notes

lambda_user = 0.08
lambda_item = 0.08
    
gammas = np.logspace(-2,-1,num = 5)
gamma_dec_step_sizes = np.linspace(1.1,1.3,5)


num_epochs = 30     # number of full passes through the train set
stop_criterion = 1e-4
    
seeds = np.array([988,1000])
#seeds = np.array([988])

rmse_te = np.zeros((len(seeds),len(gammas),len(gamma_dec_step_sizes)))

for ind_seed, seed in enumerate(seeds):
    for ind_gamma, gamma in enumerate(gammas):
        for ind_gamma_dec_step_size, gamma_dec_step_size in enumerate(gamma_dec_step_sizes):
            print("seed ({}/{}) = {}".format(ind_seed+1, len(seeds), seed))
            print("gamma ({}/{}) = {}".format(ind_gamma+1, len(gammas), gamma))
            print("stepsize decrement ({}/{}) = {}".format(ind_gamma_dec_step_size+1, len(gamma_dec_step_sizes), gamma_dec_step_size))
            
            rmse_te[ind_seed,ind_gamma,ind_gamma_dec_step_size] = matrix_factorization_SGD_regularized(train, test, num_features, lambda_user, lambda_item, gamma, gamma_dec_step_size, num_epochs, seed, stop_criterion)

np.save('../results_of_lengthy_computations/RMSE_test_tuning_gammas',rmse_te)
"""

In [None]:
RMSE_test_tuning_lambdas = np.load('../results_of_lengthy_computations/RMSE_test_tuning_gammas.npy')
gammas = np.logspace(-2,-1,num = 5)
gamma_dec_step_sizes = np.linspace(1.1,1.3,5)
plot_simple_heatmap(np.min(RMSE_test_tuning_lambdas, axis = 0), "RMSE", "decrement",np.around(gamma_dec_step_sizes,2), "gamma",np.around(gammas,2))
plt.savefig('../plots/heatmap_tuning_gammas.png')

### Learn the Matrix Factorization using Alternating Least Squares

In [None]:
def update_user_feature(
        train, item_features, lambda_user,
        nnz_items_per_user, nz_user_itemindices):
    """update user feature matrix."""
    """the best lambda is assumed to be nnz_items_per_user[user] * lambda_user"""

    # update and return user feature.
    user_features = np.zeros((item_features.shape[0],train.shape[1]))
    
    for n in range(train.shape[1]):
        
        item_features_n = np.zeros(item_features.shape)
        item_features_n[:,nz_user_itemindices[n]] = item_features[:,nz_user_itemindices[n]]
        user_features[:,n] = np.linalg.solve(np.dot(item_features_n,item_features.T)+lambda_user*nnz_items_per_user[n]*np.identity(user_features.shape[0]),np.dot(item_features,np.squeeze(np.asarray(train.getcol(n).todense()))))
    
    return user_features

def update_item_feature(
        train, user_features, lambda_item,
        nnz_users_per_item, nz_item_userindices):
    """update item feature matrix."""
    """the best lambda is assumed to be nnz_items_per_item[item] * lambda_item"""

    # update and return item feature.
    item_features = np.zeros((user_features.shape[0],train.shape[0]))
    
    for d in range(train.shape[0]):
        
        user_features_d = np.zeros(user_features.shape)
        user_features_d[:,nz_item_userindices[d]] = user_features[:,nz_item_userindices[d]]
        item_features[:,d] = np.linalg.solve(np.dot(user_features_d,user_features.T)+lambda_item*nnz_users_per_item[d]*np.identity(user_features.shape[0]),np.dot(user_features,np.squeeze(np.asarray(train.getrow(d).todense()))))
    
    return item_features

In [None]:
def ALS(train, test):
    """Alternating Least Squares (ALS) algorithm."""
    # define parameters
    num_features = 25   # K in the lecture notes
    lambda_user = 0.08
    lambda_item = 0.08
    stop_criterion = 1e-4
    change = 1
    error_list = [[0, 0]]
    max_iter = 30
    
    # set seed
    np.random.seed(988)

    # init ALS
    user_features, item_features = init_MF(train, num_features)
    
    # start you ALS-WR algorithm. 
    
    nz_row, nz_col = train.nonzero()
    
    nz_user_itemindices = [nz_row[nz_col==n] for n in range(train.shape[1])]
    nnz_items_per_user = np.array([len(nz_user_itemindice) for nz_user_itemindice in nz_user_itemindices])
    nz_item_userindices = [nz_col[nz_row==d] for d in range(train.shape[0])]
    nnz_users_per_item = np.array([len(nz_item_userindice) for nz_item_userindice in nz_item_userindices])
    
    rmse_tr = compute_error(train.data, user_features, item_features, train.nonzero())
    rmse_te = compute_error(test.data, user_features, item_features, test.nonzero())
    print("initial: RMSE on training set: {}, RMSE on testing set: {}.".format(rmse_tr,rmse_te))
    error_list.append([rmse_tr,rmse_te])
    
    it = 0
    while (it < max_iter and not np.isclose(error_list[it][0],error_list[it+1][0],stop_criterion)):
        it += 1
        
        user_features = update_user_feature(train, item_features, lambda_user, nnz_items_per_user, nz_user_itemindices)
        item_features = update_item_feature(train, user_features, lambda_item, nnz_users_per_item, nz_item_userindices)
        
        rmse_tr = compute_error(train.data, user_features, item_features, train.nonzero())
        rmse_te = compute_error(test.data, user_features, item_features, test.nonzero())
        print("iter: {}, RMSE on training set: {}, RMSE on testing set: {}.".format(it, rmse_tr,rmse_te))
        
        error_list.append([rmse_tr,rmse_te])
        
    rmse = compute_error(test.data, user_features, item_features, test.nonzero())
    print("RMSE on test data: {}.".format(rmse))
       

ALS(train, test)

Finding the best ridge parameters using ALS

In [None]:
def ALS(train, test, num_features, lambda_user, lambda_item, max_iter, seed):
    """Alternating Least Squares (ALS) algorithm."""
    # define parameters
    
    # set seed
    np.random.seed(988)

    # init ALS
    user_features, item_features = init_MF(train, num_features)
    
    # start you ALS-WR algorithm. 
    
    nz_row, nz_col = train.nonzero()
    
    nz_user_itemindices = [nz_row[nz_col==n] for n in range(train.shape[1])]
    nnz_items_per_user = np.array([len(nz_user_itemindice) for nz_user_itemindice in nz_user_itemindices])
    nz_item_userindices = [nz_col[nz_row==d] for d in range(train.shape[0])]
    nnz_users_per_item = np.array([len(nz_item_userindice) for nz_item_userindice in nz_item_userindices])
    
    rmse_tr = [compute_error(train.data, user_features, item_features, train.nonzero())]
    rmse_te = [compute_clipped_error(test.data, user_features, item_features, test.nonzero())]
    print("initial: RMSE on training set: {}, RMSE on testing set: {}.".format(rmse_tr[0],rmse_te[0]))
    

    for it in range(max_iter):
        
        user_features = update_user_feature(train, item_features, lambda_user, nnz_items_per_user, nz_user_itemindices)
        item_features = update_item_feature(train, user_features, lambda_item, nnz_users_per_item, nz_item_userindices)
        
        
        
        rmse_tr.append(compute_error(train.data, user_features, item_features, train.nonzero()))
        rmse_te.append(compute_clipped_error(test.data, user_features, item_features, test.nonzero()))
        print("iter: {}, RMSE on training set: {}, RMSE on testing set: {}.".format(it, rmse_tr[-1],rmse_te[-1]))
        
        if np.isclose(rmse_tr[-1],rmse_tr[-2],stop_criterion) or rmse_tr[-1] > rmse_tr[0]:
            break
        
    min_rmse_te = min(rmse_te)
    print("RMSE on test data: {}.".format(min_rmse_te))
    
    return min_rmse_te


In [None]:
"""
# define parameters 
num_features = 25   # K in the lecture notes
lambda_users = np.logspace(-2,0,num = 5)
lambda_items = np.logspace(-2,0,num = 5)
stop_criterion = 1e-4
max_iter = 15
    
seeds = np.array([988])

rmse_te = np.zeros((len(seeds),len(lambda_users),len(lambda_items)))

for ind_seed, seed in enumerate(seeds):
    for ind_lambda_user, lambda_user in enumerate(lambda_users):
        for ind_lambda_item, lambda_item in enumerate(lambda_items):
            print("seed ({}/{}) = {}".format(ind_seed+1, len(seeds), seed))
            print("lambda_user ({}/{}) = {}".format(ind_lambda_user+1, len(lambda_users), lambda_user))
            print("lambda_item ({}/{}) = {}".format(ind_lambda_item+1, len(lambda_items), lambda_item))
            
            rmse_te[ind_seed,ind_lambda_user,ind_lambda_item] = ALS(train, test, num_features, lambda_user, lambda_item, max_iter, seed)

np.save('../results_of_lengthy_computations/RMSE_test_tuning_lambdas',rmse_te)
"""

In [None]:
RMSE_test_tuning_lambdas = np.load('../results_of_lengthy_computations/RMSE_test_tuning_lambdas.npy')
lambda_users = np.logspace(-2,0,num = 5)
lambda_items = np.logspace(-2,0,num = 5)
plot_simple_heatmap(RMSE_test_tuning_lambdas, "RMSE", "lambda item",np.around(lambda_items,2), "lambda user",np.around(lambda_users,2))
plt.savefig('../plots/heatmap_tuning_lambdas.png')

- test and train

- kmean. surely not the most efficient method
- matrix factorizations
- neural network

# Output

In [None]:
OUTPUT_PATH = '../Datasets/sample_submission.csv'
pred_submission =
create_csv_submission(ids_test, pred_submission, OUTPUT_PATH)
raise NotImplementedError