In [8]:
import pickle
import pandas as pd
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

# Import data from Cook's Thesaurus

In [9]:
df = pd.read_pickle('cooks_thesaurus_results_Oct12.pkl')
df.shape

(2181, 9)

# Process data

In [11]:
df_new = df.copy().reset_index(drop=True)
df_new['item_name'] =  [ l[0] for l in df['item'] ]
df_new['item_lem'] = df_new['item_name'].apply( lemmatizer.lemmatize ) #We use the lemmatized item name for the unique key associated with each ingredient
df_new['subcat_lem']= df_new['subcat'].apply( lemmatizer.lemmatize )
df_new['cat_lem']= df_new['cat'].apply( lemmatizer.lemmatize )
df_new = df_new.drop_duplicates(subset=['item_lem'])

In [12]:
get_subs = pd.Series(df_new['item substitutions'].values,index=df_new['item_lem']).to_dict() # This dictionary helps us get substitutions
get_common_name = pd.Series(df_new['item_name'].values,index=df_new['item_lem']).to_dict() # This dictionary helps us convert from name key to common name
get_name_key = pd.Series(df_new['item'].values,index=df_new['item_lem']) # This dictionary helps us get name key from synonymns
get_name_key = get_name_key.apply(pd.Series).stack().apply( lemmatizer.lemmatize )
get_name_key = get_name_key.drop_duplicates() #Again, for now, we drop duplicates
get_name_key = pd.Series( get_name_key.index.get_level_values(0), index=get_name_key.values ).to_dict() 

In [19]:
get_common_name.values()

dict_values(['beet', 'black radish', 'burdock', 'carrot', 'celeriac', 'daikon', 'horseradish', 'lotus root', 'parsley root', 'parsnip', 'radish', 'rutabaga', 'salsify', 'turnip', 'arrowroot', 'cassava', 'jerusalem artichoke', 'jicama', 'malanga', 'potatoes', 'sweet potatoes', 'taro', 'water chestnut', 'bintje potato', 'fingerling potato', 'huckleberry potato', 'long white potato', 'new potatoes', 'oca potato', 'purple potato', 'red-skinned potato', 'russet potato', 'white round potato', 'yellow finn potato', 'yukon gold potato', 'boniato', 'japanese yam', 'okinawan purple potato', 'sweet potato', 'tropical yam', 'yam', 'yamaimo', 'asparagus', 'bamboo shoots', 'cardoon', 'celery', 'chinese celery', 'fennel', 'fiddlehead fern', 'hearts of palm', 'rhubarb', 'white asparagus', 'wild asparagus', 'green onion', 'leek', 'ramps', 'spring onion', 'bermuda onion', 'boiling onion', 'cippolini onion', 'onion flakes', 'onion powder', 'pearl onion', 'red onion', 'shallot', 'sweet onion', 'white onio

In [9]:
cooks_thesaurus = { 'key': get_name_key,
                    'subs': get_subs,
                    'common': get_common_name
                }
with open('cooks_thesaurus_dict.pkl', 'wb') as file:
     pickle.dump( cooks_thesaurus, file )

In [3]:
with open('cooks_thesaurus_dict.pkl', 'rb') as file:
     cooks_thesaurus = pickle.load( file )

## How to use dictionaries
### 1. Get the name key for ingredient

In [5]:
cooks_thesaurus['key'][ lemmatizer.lemmatize('gobo root') ]

'burdock'

### 2. Get substitutions

In [6]:
cooks_thesaurus['subs'][ cooks_thesaurus['key'][ lemmatizer.lemmatize('gobo root') ] ]

['salsify', 'asparagus', 'artichoke hearts']

### 3. Get common name

In [7]:
cooks_thesaurus['common'][ cooks_thesaurus['key'][ lemmatizer.lemmatize('gobo root') ] ]

'burdock'