In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as sklearn
import seaborn as sns

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import jaccard_similarity_score
# from composes.transformation.scaling.ppmi_weighting import PpmiWeighting

%matplotlib inline

In [2]:
# Load data.
X_train = pd.read_csv('train_dataset.csv', sep = '\t', index_col = 0).drop('cuisine', axis = 1)
cuisines = pd.read_csv('train_dataset.csv', sep = '\t', usecols = ['cuisine'])
cuisines.set_index(X_train.index, inplace = True)

In [3]:
# Calculate cosine similarities.
sims = cosine_similarity(X_train.T)
np.fill_diagonal(sims, 0)

In [4]:
def recommend_ingredients(partial_recipes, user_item_matrix, k = 10, similarity_measure = "cosine", 
                          similarity_matrix = None, n_recommendations = 10):
    """Recommend ingredients to (partial) recipes based on the similarity between ingredients.
    
    Inputs:
        - partial_recipes: pandas dataframe of recipes that ingredient recommendations are produced for. Should be
                           of the shape recipes x ingredients.
        - user_item_matrix: pandas dataframe of training recipes. Should be of the shape recipes x ingredients.
        - k: number of neighbours (ingredients) used when calculating the ingredient recommendation scores.
        - similarity_measure: the measure used for calculating the similarity between ingredients. One of
                              'cosine', 'asymmetric_cosine', 'jaccard', 'pmi'.
        - similarity_matrix: the precomputed matrix of ingredient similarities. If not given, this will be
                             computed by the function.
        - n_recommendations: the desired number of recommended ingredients per recipe.
        
    Outputs a matrix of the recommended ingredients (columns) for the given partial recipes (rows).
        
    """
    
    # Calculate the similarity matrix if none was given as input.
    if similarity_matrix == None:
        if similarity_measure == "cosine":
            similarity_matrix = cosine_similarity(user_item_matrix.T)
        elif similarity_measure == "asymmetric_cosine":
            pass
        elif similarity_measure == "jaccard":
            pass
        elif similarity_measure == "pmi":
            pass
        else: 
            raise ValueError("The similarity measure must be one of: 'cosine', 'asymmetric_cosine', 'jaccard', 'pmi'.")
    
    # Set similarity to self to zero.
    np.fill_diagonal(similarity_matrix, 0) 
    
    # For each ingredient, find the ingredients that are not among the k most similar and set similarity to zero.
    for i in range(np.shape(similarity_matrix)[0]):
        not_kNN = similarity_matrix[i, ] < sims[i, np.argpartition(sims[i, ], -k)[-k]]
        similarity_matrix[i, not_kNN] = 0

    # Calculate the ingredient scores.
    ingredient_scores = np.matmul(similarity_matrix, partial_recipes.T) / np.sum(abs(similarity_matrix), axis = 1)[:, None]
    ingredient_scores = ingredient_scores.T
    
    # Set ingredient scores of already present ingredients to zero.
    ingredient_scores[partial_recipes == 1] = 0
    
    # For each recipe, get the indices of the *n_recommendations* highest-scoring ingredients.
    recommendations_idx = np.argpartition(ingredient_scores, -n_recommendations).T[-n_recommendations:].T
    
    # Convert recommendation indices to ingredient names.
    recommendations = user_item_matrix.columns[recommendations_idx]
    
    return recommendations

In [5]:
# Apply CF with cosine similarity to original recipes.
recommendations = recommend_ingredients(X_train, X_train)

In [6]:
# Print some recipe + recommendation samples.
recipe1 = 7
recipe2 = 4
recipe3 = 6

print('Recipe {} ({}): \n{}\n'.format(X_train.index[recipe1], 
                                      cuisines.iloc[recipe1, 0],
                                      X_train.columns[np.where(X_train.iloc[recipe1,] != 0)]))
print('Recommended ingredients: \n{}\n\n'.format(recommendations[recipe1,]))

print('Recipe {} ({}): \n{}\n'.format(X_train.index[recipe2], 
                                      cuisines.iloc[recipe2, 0],
                                      X_train.columns[np.where(X_train.iloc[recipe2,] != 0)]))
print('Recommended ingredients: \n{}\n\n'.format(recommendations[recipe2,]))

print('Recipe {} ({}): \n{}\n'.format(X_train.index[recipe3],
                                      cuisines.iloc[recipe3, 0],
                                      X_train.columns[np.where(X_train.iloc[recipe3,] != 0)]))
print('Recommended ingredients: \n{}\n\n'.format(recommendations[recipe3,]))

Recipe 3735 (italian): 
Index(['bakingpowder', 'eggs', 'flour', 'oliveoil', 'sugar', 'vanillaextract'], dtype='object')

Recommended ingredients: 
Index(['buttermilk', 'purposeflour', 'unsaltedbutter', 'largeeggs', 'butter',
       'bakingsoda', 'granulatedsugar', 'whitesugar', 'milk',
       'groundcinnamon'],
      dtype='object')


Recipe 13162 (indian): 
Index(['blackpepper', 'butter', 'cayennepepper', 'chicken', 'chilipowder',
       'flour', 'garammasala', 'garlic', 'groundcumin', 'lemonjuice', 'milk',
       'oil', 'onions', 'salt', 'shallots', 'water'],
      dtype='object')

Recommended ingredients: 
Index(['cumin', 'carrots', 'vegetableoil', 'garlicpowder', 'groundturmeric',
       'chickenbroth', 'bayleaves', 'paprika', 'pepper', 'potatoes'],
      dtype='object')


Recipe 42779 (spanish): 
Index(['cilantro', 'flatleafparsley', 'garlic', 'jalapenochilies', 'oliveoil',
       'pepper', 'salt', 'seasalt', 'shrimp'],
      dtype='object')

Recommended ingredients: 
Index(['shal