In [1]:
from sklearn.linear_model import LogisticRegression

from cuisine.cookbook import get_ingredient_list, get_cookbook_train, get_cookbook_valid_question, get_cookbook_valid_answer
from cuisine.embedding import import_embedding, create_random_embedding
from cuisine.chef import make_embedding_data
from cuisine.utils import move_to_top_directory

In [2]:
move_to_top_directory()
%pwd

'/home/felix/cuisine'

In [3]:
cookbook_train = get_cookbook_train()
cookbook_valid_question = get_cookbook_valid_question()
cookbook_valid_answer = get_cookbook_valid_answer()
ingredient_list = get_ingredient_list()

cookbook_train[15]

{'recipe_id': 15,
 'ingredients': [2866, 4243, 4362, 5377, 5408, 6187, 6352, 6568],
 'kitchen_name': 'italian',
 'kitchen_id': 5}

In [4]:
# Import the embedding
embedding_random_walk_2 = import_embedding("Embp1q2", ingredient_list)
embedding_random_walk_10 = import_embedding("Embp1q10", ingredient_list)
embedding_uniform_rand = create_random_embedding(ingredient_list, 128)
embedding_svd_32 = import_embedding("SVD32", ingredient_list)
embedding_svd_64 = import_embedding("SVD64", ingredient_list)
embedding_svd_128 = import_embedding("SVD128", ingredient_list)

embedding_svd_32[2813]

array([34.63750259, 20.35864365, -1.20763975, -0.97736046,  0.23479278,
        5.40115298,  8.51684704, -0.53078058, -0.53245975, -0.8002612 ,
       -3.08002422,  0.61509874,  1.03727886, -1.68163813,  4.22448238,
        4.41397363, -2.43951389,  1.38136843, -0.5487753 , -0.09986042,
       -1.99060814, -0.10028059,  0.17231477, -0.5892525 , -0.08022406,
       -2.57672924,  4.58388978, -2.31123317, -0.80378753,  1.565003  ,
        0.76383124, -3.8348879 ])

In [8]:
def do_logistic_regression(embedding, name):

    X_train, y_train = make_embedding_data(cookbook_train, embedding)
    X_valid, _       = make_embedding_data(cookbook_valid_question, embedding)
    _      , y_valid = make_embedding_data(cookbook_valid_answer, embedding)

    clf = LogisticRegression(penalty='l2', max_iter=100).fit(X_train, y_train)
    train_score = clf.score(X_train, y_train)
    valid_score = clf.score(X_valid, y_valid)
    print(f"Embedding {name:14} has training score {train_score:.3f} and validation score {valid_score:.3f}")

In [9]:
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

do_logistic_regression(embedding_svd_32, "SVD-32")
do_logistic_regression(embedding_svd_64, "SVD-64")
do_logistic_regression(embedding_svd_128, "SVD-128")
do_logistic_regression(embedding_random_walk_2, "Random Walk 2")
do_logistic_regression(embedding_random_walk_10, "Random Walk 10")
do_logistic_regression(embedding_uniform_rand, "Uniform Random")

Embedding SVD-32         has training score 0.665 and validation score 0.669
Embedding SVD-64         has training score 0.689 and validation score 0.692
Embedding SVD-128        has training score 0.710 and validation score 0.706
Embedding Random Walk 2  has training score 0.651 and validation score 0.657
Embedding Random Walk 10 has training score 0.646 and validation score 0.655
Embedding Uniform Random has training score 0.524 and validation score 0.504
