In [1]:
%%write_and_run CreateMatrix.py

import pandas as pd
import numpy as np
from IPython.display import display
import random
from userclass import User
from sklearn.model_selection import LeaveOneOut
from Utils import recepten_bij_ingredient, ingredienten_bij_recept, recepten_bij_tag, tags_bij_recept, fillInMatrix, customUserDecoder
import json

ingredientsdf = pd.read_csv(f"/data/foodboost/ingredients.csv", index_col=0)

#TRAIN_Matrix
def generateTrainMatrix(Favorieten_Recepten, Columns, randomRecepten):
    
    matrix = pd.DataFrame(columns = Columns)
    
    for i in range(len(Favorieten_Recepten*2)):
        matrix.loc[matrix.shape[0]] = 0
    
    columnPrefix = "2-"
    matrix2 = matrix.copy()
    matrix2.columns = [columnPrefix + columnName for columnName in matrix2.columns]
    
    X = np.array(Favorieten_Recepten)
    loo = LeaveOneOut()
    index_counter = 0

    for train_index, test_index in loo.split(X):
        X_train, X_test = X[train_index], X[test_index]

        X_train_ingredient_unique = np.unique(np.concatenate([ingredienten_bij_recept(x) for x in X_train]))
        X_test_ingredient_unique = np.unique(np.concatenate([ingredienten_bij_recept(x) for x in X_test]))
        
        randomRecept = random.choice(randomRecepten)

        #----- Matrix met train vullen
        fillInMatrix(matrix, X_train_ingredient_unique, index_counter, True, False)
        
        #Rij 1
        fillInMatrix(matrix2, X_test_ingredient_unique, index_counter, False, False, columnPrefix = columnPrefix)
        #print(matrix2)
        
        #Rij 2
        fillInMatrix(matrix2, ingredienten_bij_recept(randomRecept), index_counter+1, False, False, columnPrefix = columnPrefix)
        
        #Put the value for the random tag as 1 and put y at 0, because it should be false
        fillInMatrix(matrix2, 'y', index_counter, True, True)
        
        #Show which random tag is taken and the one out
        matrix2.loc[index_counter+1, 'Randomgerecht'] = randomRecept
        matrix2.loc[index_counter, 'one out'] = X_test
        matrix2.loc[index_counter+1, 'one out'] = X_test
        
        index_counter += 2
    return pd.concat([matrix, matrix2], axis=1)


#TEST_Matrix
def generateTestMatrix(Train_Favorieten, Test_Favorieten, Columns, randomRecepten):
    matrix1 = pd.DataFrame(columns = Columns)
    Recepten = np.array(Train_Favorieten)
    Test_Recepten = np.array(Test_Favorieten)
    
    for i in range(len(Test_Recepten)*2):
        matrix1.loc[matrix1.shape[0]] = 0
    
    matrix2 = matrix1.copy()
    columnPrefix = "2-"
    matrix2.columns = [columnPrefix + columnName for columnName in matrix2.columns]
    
    #-----
    for index_counter in range(len(Test_Recepten)):
        
        Gerecht_Tags = np.unique(np.concatenate([ingredienten_bij_recept(x) for x in Recepten]))
        Test_Gerecht_Tags = ingredienten_bij_recept(Test_Recepten[index_counter])

        #----- Matrix1 met Userlist (80%) invullen
        fillInMatrix(matrix1, Gerecht_Tags, index_counter, False, False)

        #----- Matrix2 met Test_Favorieten (20%) vullen
        fillInMatrix(matrix2, Test_Gerecht_Tags, index_counter, False, False, columnPrefix = columnPrefix)
        
        #Put the y value as true
        fillInMatrix(matrix2, 'y', index_counter, False, False)
    
    for p in range(len(Test_Recepten), len(Test_Recepten)*2):
        #----- Matrix1 met Userlist (80%) invullen
        fillInMatrix(matrix1, Gerecht_Tags, p, False, False)
        
        fillInMatrix(matrix2, ingredienten_bij_recept(random.choice(randomRecepten)), p, False, False, columnPrefix = columnPrefix)
        
        #Put the y value as false
        fillInMatrix(matrix2, 'y', p, False, False, fillInZero = True)
    return pd.concat([matrix1, matrix2], axis=1)

def getRandomRecipes(Tags, diner_recepten, compareToTags, UserTag = ""):
    if (compareToTags):
        TagRecipes = []
        for Tag in Tags:
            TagRecipes = np.concatenate((TagRecipes, recepten_bij_tag(Tag)))
        #All recipes that belong to the list of tags that are also in diner recepten
        return list(set(TagRecipes) & set(diner_recepten))
    else:
        #All diner recipes that are not in the user tag's recipes
        return list(set(diner_recepten).difference(recepten_bij_tag(UserTag)))

def createTrainMatrix(TrainUsers, Tags, diner_recepten, compareToTags = True):
    Matrix = pd.DataFrame()
    Columns = ingredientsdf[ingredientsdf['recipe'].isin(diner_recepten)].ingredient.unique()
    if (compareToTags):
        randomRecepten = getRandomRecipes(Tags, diner_recepten, compareToTags)
        for user in TrainUsers:
            TrainMatrix = generateTrainMatrix(
                user.Favorieten_Recepten[0], 
                Columns, 
                randomRecepten)
            Matrix = pd.concat([Matrix, TrainMatrix], axis=0, ignore_index=True)
    else: 
        for user in TrainUsers:
            randomRecepten = getRandomRecipes(Tags, diner_recepten, compareToTags = False, UserTag = user.UserTag)
            TrainMatrix = generateTrainMatrix(
                user.Favorieten_Recepten[0], 
                Columns, 
                randomRecepten)
            Matrix = pd.concat([Matrix, TrainMatrix], axis=0, ignore_index=True)
    
    return Matrix.fillna(0)



def createValidateOrTestMatrix(ValidateUsers, Tags, diner_recepten, compareToTags = True):
    Matrix = pd.DataFrame()
    Columns = ingredientsdf[ingredientsdf['recipe'].isin(diner_recepten)].ingredient.unique()
    if (compareToTags):
        randomRecepten = getRandomRecipes(Tags, diner_recepten, compareToTags)
        for user in ValidateUsers:
            split = int(len(user.Favorieten_Recepten[0])*0.8)
            ValidateMatrix = generateTestMatrix(
                user.Favorieten_Recepten[0][:split], 
                user.Favorieten_Recepten[0][split:], 
                Columns, 
                randomRecepten)
            Matrix = pd.concat([Matrix, ValidateMatrix], axis=0, ignore_index=True)
    else: 
        for user in ValidateUsers:
            split = int(len(user.Favorieten_Recepten[0])*0.8)
            randomRecepten = getRandomRecipes(Tags, diner_recepten, compareToTags = False, UserTag = user.UserTag)
            ValidateMatrix = generateTestMatrix(
                user.Favorieten_Recepten[0][:split], 
                user.Favorieten_Recepten[0][split:], 
                Columns, 
                randomRecepten)
            Matrix = pd.concat([Matrix, ValidateMatrix], axis=0, ignore_index=True)
        
    return Matrix.fillna(0)

writing file CreateMatrix.py


In [2]:
%store -r
getRandomRecipes(['hollands'], diner_recepten_bonno, False, UserTag = "italiaans")

['Quiche met broccoli en gerookte zalm',
 'Thaise roerbak met komkommer, cherrytomaten, ui, ananas en tofu',
 'Indiase kippenpoten',
 'Kruidencouscous en kipballetjes in saus',
 'Gevulde kalkoen',
 'Zoetzuur varkensvlees met bosui',
 'Traybake met kip en mosterd',
 'Gado gado noedelsalade ',
 'Kip-pie met prei en Emmentaler',
 'Mediterrane bonensalade met gerookte kip',
 'Courgettegratin uit de oven met verse zalm en spinazie',
 'Tuinbonenburger',
 "Canard à l'orange",
 'Ovendrumsticks met rodekoolcouscous',
 'Tom kha kai',
 'Oosterse groentesoep',
 'Taco’s met kibbeling, rodekool &amp; alliolidressing',
 'Volkoren mac and cheese met pompoen',
 "Picadillo-tostada's met guacamole",
 'Kleurrijke groentepasta met zoete aardappel',
 'Thaise kipcurry met venkel',
 'Traybake kip tandoori met kikkererwten en tomaat',
 'Notenquiche',
 'Kipchili',
 'Groentetajine met couscous',
 'Maaltijdsalade met avocadomayo',
 'Gehakthachee met puree',
 'Rendang Padang van Vanja van der Leeden',
 'Aardappelo