In [1]:
import json
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from gensim.models import Word2Vec, WordEmbeddingSimilarityIndex, KeyedVectors, TfidfModel
from gensim.similarities import SoftCosineSimilarity, SparseTermSimilarityMatrix
from gensim.corpora import Dictionary
import pandas as pd
import warnings
from pprint import pprint
import numpy as np
import networkx as nx
from itertools import cycle
from collections import defaultdict
warnings.filterwarnings('ignore')


with open('wine_food_pairing.json') as f:
    food = json.load(f)
    
with open('Recipes.json', 'r') as f:
    recipes = json.load(f)
recipe_list = [i["ingredients"] for i in recipes]
ingredient_set = list(set([element for row in recipe_list for element in row]))

G = nx.read_graphml('recipe_network.gz')

all_food_set = []
for key,value in food.items():
    for key1, value1 in value.items():
        for key2,value2 in value1.items():
            for value3 in value2.values():
                for i in value3:
                    all_food_set.append(i)
            
all_food_set = list(set(all_food_set))
available_words = list(set(all_food_set).intersection(set(ingredient_set)))

food_cat1 = [] #stepping stone
for key, value in food.items():
    for key1,value1 in value.items():
        for key2, value2 in value1.items():
            for key3,value3 in value2.items():
                for i in value3:
                    food_cat1.append((key2, i))
            
food_cat = defaultdict(lambda:list) # all pairing ingredients
for i in food_cat1:
    if i[0] in food_cat:
        food_cat[i[0]].append(i[1])
    else:
        food_cat[i[0]] = [i[1]]
        
        
food_categories = {} #used for term-similarity, creating dict with values in both pairing dataset and recipes
for k,v in food_cat.items():
    food_categories[k] = list(set(v).intersection(set(ingredient_set)))
    
food_pairs = {} #food dct without wine style keys
for i in food.values():
    for key,value in i.items():
        food_pairs[key]= value

In [4]:
def getInputs():
    food_category = ''
    category_list = ['Meat', 'Dairy', 'Vegetables', 'Herbs Spices', 'Starch', 'Sweet']
    while food_category not in category_list:
        food_category = str(input('Please select one of the food categories: Meat, Dairy, Vegetables, Herbs Spices, Starch, Sweet:\n')).title()
        if food_category not in category_list:
            print('Not a category listed above\n')
        else:
            break
    
    input_word = ''
    while (input_word not in food_cat[food_category]) and (input_word not in ingredient_set):
        input_word = str(input(f'Please input a food from the {food_category} category:\n')).lower()
        if (input_word not in food_cat[food_category]) and (input_word not in ingredient_set):
            print(f'This ingredient is not in our ingredient, please input another ingredient in the {food_category} category\n')
        else:
            break
    if input_word in food_cat[food_category]:
        ingredient= input_word
    else:
        term_sim = nx.jaccard_coefficient(G, [(str(a), str(b)) for a,b in zip(cycle([input_word]), food_categories[food_category])])
        sim_ingredients = []
        for u,v,p in term_sim:
            sim_ingredients.append(((u,v),p))
        sim_ingredients = sorted(sim_ingredients, key = lambda x:x[1], reverse = True)  
        ingredient = sim_ingredients[0][0][1]

    description_lst = input('Please input a list of words describing the qualities you enjoy in your wine:\n')
    description_lst = description_lst.split(', ')
    
    return food_category, ingredient, description_lst
    

def getWineTypes(food_category, ingredient, description_lst):
    wine_type_set = []
    for key,value in food.items():
        for key1, value1 in value.items():
            for key2,value2 in value1.items():
                for value3 in value2.values():
                    if ingredient in value3:
                        wine_type_set.append(key)

    wine_type_set= list(set(wine_type_set))
    return wine_type_set


def getTopWines(wine_type_set):
    top_wines = []
    for df in wine_type_set:
        try:
            dct = Dictionary.load(f'{df}_dct.model')
            doc_sim_index = SoftCosineSimilarity.load(f'{df}_sim_index.model')
            sims = doc_sim_index[dct.doc2bow(description_lst)]
            for i,j in enumerate(sims):
                top_wines.append((df, i,j))
        except:
            continue

    top_wines = sorted(top_wines, key = lambda x:x[2], reverse = True)
    return top_wines

def getTopThreeWines(top_wines, food_category):
    wine_output = {}
    for i,wine_style in enumerate(top_wines[:3]):
        style = wine_style[0]
        index = wine_style[1]
        similarity = round(wine_style[2],3)
        
        wine_df = pd.read_csv(f'{style}_df.csv')
        variety = wine_df['variety'].iloc[index]
        title = wine_df['title'].iloc[index]
        description = wine_df['description'].iloc[index]
        description_dct = {'Variety': variety,
                          'Title': title,
                           'Description':description,
                          'Similarity': similarity,
                          f'Additional {food_category} Pairings': ', '.join([ing for lst in list(food[style][variety][food_category].values()) for ing in lst])}
        wine_output[f'Suggestion {i+1}'] = description_dct
        
    return wine_output

def formatted_output(wine_output):
    print()
    for key, value in wine_output.items():
        print(key)
        print()
        for key2, value2 in value.items():
            print(key2, ':', value2)
        print('________________________')
        print()
        
    
def getPairings():
    food_category, ingredient, description_lst = getInputs()
    wine_type_set = getWineTypes(food_category, ingredient, description_lst)
    top_wines = getTopWines(wine_type_set)
    wine_output = getTopThreeWines(top_wines, food_category)
    formatted_output(wine_output)


In [6]:
getPairings()

Please select one of the food categories: Meat, Dairy, Vegetables, Herbs Spices, Starch, Sweet:
meat
Please input a food from the Meat category:
fish
Please input a list of words describing the qualities you enjoy in your wine:
spicy, fruity, bold, crazy wild, okay

Suggestion 1

Variety : Zinfanadel
Title : Ottimino Vineyards 2013 Zinfinity Zinfandel (Sonoma County)
Description : From a selection of its best vineyards across the county, this producer showcases a majority of the variety with 16% Petite Sirah, all aged in French oak, 25% of it new. Black olive, spicy leather and cinnamon make for a bold, dry and boisterous wine that's entirely enjoyable, integrated in terms of both tannin and oak.
Similarity : 0.943
Additional Meat Pairings : beef, lamb, venison, salami, proscuitto, bresaola, bacon, roast, tenderloin, pork chop, chicken, duck, turkey
________________________

Suggestion 2

Variety : Syrah / Shiraz
Title : Adelsheim 2009 Calkins Lane Vineyard Syrah (Chehalem Mountains)
D