# Get a flavor pairing from the flavor generator, find the nearest cluster, pull the rest of the ingredients from that cluster

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise
import random

In [2]:
# Define flavor matrices here
recs = pd.read_csv("Recipe_Components_Dense.csv")
shares = pd.read_csv("Shared_Compounds_Dense.csv")

# Import cluster centroids
cl_gb = pd.read_csv('Clusters/cluster_lookup.csv')

# Let's not keep the target column (Cluster)
cl_gb = cl_gb.drop(columns='Cluster')

## Make a novel flavor combination

In [3]:
def flavor_generator(first_flavor, num_flavors, flavor_space):
    flavors = []
    flavors.append(first_flavor)
    n = num_flavors - 1
    current_flavor = first_flavor
    fail = 0
    while n > 0:
        new_flavor_index = random.choices(flavor_space.index,
                                          weights=flavor_space[current_flavor], k=1)
        new_flavor = flavor_space.iloc[new_flavor_index[0]][0]
        if new_flavor in flavors:
            fail += 1
            if fail > 10000:
                print("Too much failure. Can't continue.")
                break
            else:
                current_flavor = first_flavor
                continue
        else:
            flavors.append(new_flavor)
            print('Went from ', current_flavor, ' to ', new_flavor, '. ', 
                  flavor_space[current_flavor].iloc[new_flavor_index[0]], ' shared flavor compounds.')
            current_flavor = new_flavor
            n -= 1
    return flavors

In [107]:
gen_1 = flavor_generator('bacon', 5, shares)
gen_1

Went from  bacon  to  japanese_mint .  7  shared flavor compounds.
Went from  japanese_mint  to  roasted_pecan .  9  shared flavor compounds.
Went from  roasted_pecan  to  french_bean .  26  shared flavor compounds.
Went from  french_bean  to  shrimp .  28  shared flavor compounds.


['bacon', 'japanese_mint', 'roasted_pecan', 'french_bean', 'shrimp']

## Find a cluster for the flavor combination

In [9]:
def cluster_choice(flavor_set):
    gen_rec = np.ma.zeros(len(cl_gb.columns), dtype='Int16')
    gen_ing = pd.Series(gen_rec, index=cl_gb.columns)
    gen_2 = []
    for ing in flavor_set:
        twos = ing.split('_')
        gen_2.extend(twos)
        
    for ing in gen_2:
        try:
            gen_ing[ing] += 1
        finally:
            continue
    
    cosinesim = pairwise.cosine_similarity(np.array(gen_ing).reshape(1, -1), cl_gb)
    cosinesim = cosinesim[0]
    clusterchoix = np.argmax(cosinesim)
    return flavor_set, clusterchoix, cosinesim

In [108]:
cl_map = cluster_choice(gen_1)
cl_map

(['bacon', 'japanese_mint', 'roasted_pecan', 'french_bean', 'shrimp'],
 0,
 array([0.11408472, 0.03379565, 0.01137763, 0.07297048, 0.03653373,
        0.03569394, 0.0319575 , 0.03715399, 0.03950668, 0.06630271,
        0.04892031, 0.04984783, 0.04726951, 0.04163775]))

In [129]:
# Cluster can be changed here, cosine distance visible
cl_map_revised = [cl_map[0], 3, cl_map[2]]

## Bring in additional flavors from that cluster

In [131]:
def cluster_explorer(cluster_map, num_ingredients):
    ingredients = []
    flavor_space = pd.read_csv('Clusters/cluster_' + str(cluster_map[1]) + '_recipe_matrix.csv')
    gen_2 = []
    
    for ing in cluster_map[0]:
        twos = ing.split('_')
        gen_2.extend(twos)
    
    liftoff = random.choice(gen_2)
    jumpoff = liftoff
    failures = 0
    
    while len(set(ingredients)) < num_ingredients:
        if failures > 500:
            print('Failed to find ingredients in cluster ', str(cluster_map[1]), '!')
            break
        else:
            try:
                new_ind = random.choices(flavor_space.index,
                                         weights=np.log(flavor_space[liftoff] + 1), k=1)
                new_flavor = flavor_space.iloc[new_ind[0]][0]
                if new_flavor in ingredients:
                    liftoff = random.choice(gen_2)
                    failures += 1
                    continue
                else:
                    ingredients.append(new_flavor)
                    print(liftoff, ' and ', new_flavor, ' appear together in ', 
                          flavor_space[liftoff].iloc[new_ind[0]], ' recipes in cluster ', cluster_map[1] )
                    #liftoff = new_flavor
            except KeyError:
                liftoff = random.choice(gen_2)
                failures += 1
                continue
    print('Started with', jumpoff, '...')    
    return cluster_map[0], set(ingredients)

In [166]:
cluster_explorer(cl_map_revised, 8)

bacon  and  cornflakes  appear together in  1  recipes in cluster  3
bacon  and  oregano  appear together in  12  recipes in cluster  3
bacon  and  onions  appear together in  169  recipes in cluster  3
bacon  and  tarragon  appear together in  10  recipes in cluster  3
bacon  and  kernels  appear together in  28  recipes in cluster  3
bacon  and  broccoli  appear together in  31  recipes in cluster  3
bacon  and  steak  appear together in  12  recipes in cluster  3
bacon  and  ginger  appear together in  4  recipes in cluster  3
Started with roasted ...


(['bacon', 'japanese_mint', 'roasted_pecan', 'french_bean', 'shrimp'],
 {'broccoli',
  'cornflakes',
  'ginger',
  'kernels',
  'onions',
  'oregano',
  'steak',
  'tarragon'})