In [1]:
from pymongo import MongoClient
import datetime
import numpy as np
import pandas as pd
import getpass
import json

import nltk
from nltk.stem import WordNetLemmatizer

Be sure you are connect on the **EPFL network** (connection at EPFL or via VPN)

Note on authentification:
* Your username-password pair has read-only credentials
* Use the admin user to insert, modify, or create indexes

In [2]:
database = 'ada-project'
user = input('MongoDB name: ')
password = getpass.getpass('MongoDB password: ')

MongoDB name: davidrivollet
MongoDB password: ········


In [3]:
# Mongo Client and authentification
client = MongoClient('www.cocotte-minute.ovh', 27017)
db = client[database]
db.authenticate(user, password)
collection = db['recipes']

# Construct ingredient DataFrame

## Find ingredient labels

In [4]:
def tokenize_and_lemmatize(text):
    # Tokenize
    tokens_raw = nltk.regexp_tokenize(text, pattern='\w+')

    # Normalization
    tokens_norm = [t.lower() for t in tokens_raw]

    # Removing the numbers
    tokens_without_digits = [t for t in tokens_norm if t.isdigit() == False]
    
    wnl = WordNetLemmatizer()
    tokens_lemmatized = [wnl.lemmatize(t) for t in tokens_without_digits]
    
    return tokens_lemmatized

In [5]:
def count_occurences(text):
    tokens_ref = tokenize_and_lemmatize(text[0])
    
    df_occurences = pd.DataFrame({'occurence': np.zeros(len(tokens_ref))}, index = tokens_ref)
    df_occurences = df_occurences[~df_occurences.index.duplicated()]
    
    for sentence in text:
        for token in tokenize_and_lemmatize(sentence):
            found_token = False
            
            for token_ref in tokens_ref:
                if(token == token_ref):
                    occ = df_occurences.get_value(token, 'occurence')
                    df_occurences.set_value(token, 'occurence', occ + 1)
                    found_token = True
            
            if(found_token == False):
                tokens_ref.append(token)
                df_occurences.set_value(token, 'occurence', 1)

    return df_occurences

In [6]:
def define_ingredient_label(text):
    df_occurences = count_occurences(text)
    
    max_occ = df_occurences.max(axis=0).values[0]
    if max_occ < len(text):
        name_array = df_occurences[(df_occurences['occurence'] > max_occ - (max_occ/100*10))].index.values
    else:
        name_array = df_occurences[(df_occurences['occurence'] > len(text) - (len(text)/100*10))].index.values
    
    name = name_array[0]
    for word in name_array[1:]:
        name = name + " " + word
    
    return name

In [7]:
ingredients = collection.aggregate([
    {"$unwind": "$ingredients" }, 
    {"$group": 
        {"_id": "$ingredients.ingredientID",
         "doc_count": { "$sum": 1 },
         "doc_name": {"$addToSet": "$ingredients.displayValue"}
        }
    },
    {"$sort": { "doc_count":-1}}
    ])

df_ingredients = pd.DataFrame(columns=['id', 'name', 'occurence'])

i = 0;
for ingredient in ingredients:
    if(ingredient["_id"] != 0):
        name = define_ingredient_label(ingredient["doc_name"])
        df_ingredients.loc[len(df_ingredients)] = (ingredient["_id"], name, ingredient["doc_count"])
    
df_ingredients.drop(df_ingredients[df_ingredients['occurence'] < 10].index, inplace = True)

In [8]:
df_ingredients.head(10)

Unnamed: 0,id,name,occurence
0,16421.0,salt,8730.0
1,4342.0,garlic,5246.0
2,4397.0,onion,4947.0
3,2496.0,water,4583.0
4,1526.0,sugar,4371.0
5,16157.0,butter,4036.0
6,16317.0,egg,3449.0
7,1684.0,all purpose flour,3215.0
8,16406.0,pepper,3194.0
9,6307.0,olive oil,3127.0


In [9]:
len(df_ingredients)

1046

In [10]:
df_ingredients_duplicates = df_ingredients.groupby('name').count()
len(df_ingredients_duplicates)

861

In [11]:
df_ingredients_duplicates = df_ingredients_duplicates[df_ingredients_duplicates['occurence'] > 2]
df_ingredients_duplicates.head(10)

Unnamed: 0_level_0,id,occurence
name,Unnamed: 1_level_1,Unnamed: 2_level_1
cheese,5,5
chicken,12,12
chipotle,3,3
coconut,3,3
corn,4,4
crabmeat,3,3
cucumber,3,3
cup rice,6,6
flour tortilla,3,3
ginger,3,3


In [12]:
len(df_ingredients_duplicates)

37

## Find ingredient type

In [13]:
df_ingredients['type'] = 'None'

In [14]:
def find_ingredient_ids(df, name):
    found_ingredients = []
    ids = []
    for ingredient in df['name']:
        if ingredient.find(name) != -1:
            found_ingredients.append(ingredient)
    
    for value in df[df['name'].isin(found_ingredients)]["id"].values:
        ids.append(value)
    return ids

In [15]:
def fill_ingredient_type(df, type_name, ingredient_names):
    ids = []
    for ingredient in ingredient_names:
        ids = ids + find_ingredient_ids(df, ingredient)
    
    df.loc[df['id'].isin(ids),'type'] = type_name
    return

#### Meat

In [16]:
meat_names = ['chicken', 'turkey', 'beef', 'pork', 'veal', 'lamb', 'bacon', 'sausage', 'ham',
             'prosciutto', 'steak', 'dog', 'chorizo', 'lard', 'salami', 'pancetta']
fill_ingredient_type(df_ingredients, 'meat', meat_names)

#### Fish

In [17]:
fish_names = ['fish', 'cod', 'salmon', 'swordfish', 'shrimp', 'sea', 'crabmeat', 'anchovy', 
              'clam', 'tuna', 'prawn']
fill_ingredient_type(df_ingredients, 'fish and seafood', fish_names)

#### Egg

In [18]:
egg_names = ['egg']
fill_ingredient_type(df_ingredients, 'egg', egg_names)

#### Dairy

In [19]:
dairy_names = ['milk', 'cream', 'cheese', 'yogurt', 'butter', 'margarine', 'mozzarella', 'queso']
fill_ingredient_type(df_ingredients, 'dairy', dairy_names)

#### Vegetables

In [20]:
vegetables_names = ['garlic', 'onion', 'carrot', 'tomato', 'celery', 'mushroom', 'zucchini',
                    'olive', 'cabbage', 'spinach', 'cucumber', 'lettuce', 'broccoli', 'vegetable',
                    'pepper', 'pea', 'caper', 'asparagus', 'cauliflower', 'chipotle', 'chestnut',
                    'artichoke', 'shallot', 'leek', 'pickle', 'chile', 'choy', 'kale', 'pumpkin',
                    'squash', 'radish', 'salad']
fill_ingredient_type(df_ingredients, 'vegetables', vegetables_names)

#### Fruit

In [21]:
fruit_names = ['lemon', 'lime', 'avocado', 'raisin', 'orange', 'apple', 'banana', 'mango',
               'coconut', 'sherry', 'strawberry', 'pineapple', 'blueberry', 'papaya', 'fig',
               'cherry', 'date', 'cranberry', 'raspberry', 'prune', 'apricot', 'blackberry',
               'guacamole']
fill_ingredient_type(df_ingredients, 'fruit', fruit_names)

#### Starchy

In [22]:
starchy_names = ['potato', 'rice', 'bread', 'cornstarch', 'corn', 'walnut', 'almond', 'bean',
                 'flour', 'sesame', 'pecan', 'polenta', 'noodle', 'lentil', 'spaghetti', 'pasta',
                 'macaroni', 'tortilla', 'tofu', 'linguine', 'oat', 'paste', 'seed', 'cashew',
                 'ravioli', 'gnocchi', 'crust', 'nut', 'baguette', 'quinoa', 'couscous']
fill_ingredient_type(df_ingredients, 'starchy', starchy_names)

#### Condiment

In [23]:
condiment_names = ['salt', 'oil', 'soy sauce', 'vinegar', 'mayonnaise', 'ketchup', 'mustard',
                   'sauce', 'salsa', 'seasoning', 'mix']
fill_ingredient_type(df_ingredients, 'condiment', condiment_names)

#### Spicies

In [24]:
spicies_names = ['curry', 'cinnamon', 'cumin', 'ginger', 'vanilla', 'paprika', 'powder', 'nutmeg',
                'cocoa', 'allspice', 'masala', 'clove', 'cardamom', 'turmeric', 'saffron']
fill_ingredient_type(df_ingredients, 'spicies', spicies_names)

#### Herbs

In [25]:
herbs_names = ['cilantro', 'basil','parsley', 'oregano', 'mint', 'thyme', 'coriander', 'rosemary',
              'leaf', 'chive', 'sage']
fill_ingredient_type(df_ingredients, 'herbs', herbs_names)

#### Sugared Integredient

In [26]:
sugared_names = ['sugar', 'honey', 'maple syrup', 'chocolate']
fill_ingredient_type(df_ingredients, 'sugared', sugared_names)

#### Alcohol

In [27]:
alcohol_names = ['wine', 'liqueur', 'pisco', 'beer', 'tequila', 'rum', 'brandy', 'triple sec',
                 'vodka', 'sake', 'whiskey', 'bourbon']
fill_ingredient_type(df_ingredients, 'alcohol', alcohol_names)

In [28]:
len(df_ingredients[df_ingredients['type'] == 'None'])

199

In [29]:
len(df_ingredients)

1046

In [30]:
df_ingredients.head(10)

Unnamed: 0,id,name,occurence,type
0,16421.0,salt,8730.0,condiment
1,4342.0,garlic,5246.0,vegetables
2,4397.0,onion,4947.0,vegetables
3,2496.0,water,4583.0,
4,1526.0,sugar,4371.0,sugared
5,16157.0,butter,4036.0,dairy
6,16317.0,egg,3449.0,egg
7,1684.0,all purpose flour,3215.0,starchy
8,16406.0,pepper,3194.0,vegetables
9,6307.0,olive oil,3127.0,condiment


# Counting number of recipes per country

In [31]:
def initialize_dataFrame(json_name, json_field):
    with open(json_name) as data_file:
        agg_data = json.load(data_file)
    
    df_countries = pd.DataFrame(columns=['geo_identifier', 'geo_name', 'database_name', 'nbRecipes'])
    
    for country in agg_data[json_field]:
        df_countries.loc[len(df_countries)] = pd.Series([country["geo_identifier"], country["geo_name"], country["database_name"], country["nbRecipes"]], 
               index=['geo_identifier', 'geo_name', 'database_name', 'nbRecipes'])
  
    return df_countries.sort_values(by='geo_identifier', ascending=True)

In [32]:
df_countries = initialize_dataFrame('fullAggregatedData.json', "per_country")

df_countries = df_countries[df_countries['database_name'] != 'chili']
if(len(df_countries[df_countries["geo_identifier"] == 'invalid']) != 0):
    ISR_index = df_countries[df_countries["geo_identifier"] == 'invalid'].index.values[0]
    df_countries.set_value(ISR_index, 'geo_identifier', 'ISR')
    df_countries.set_value(ISR_index, 'geo_name', 'Israel')
    df_countries.sort_values(by='geo_identifier', ascending=True)
df_countries

Unnamed: 0,geo_identifier,geo_name,database_name,nbRecipes
0,ARG,Argentina,argentinian,23.0
6,AUT,Austria,austrian,30.0
18,BEL,Belgium,belgian,15.0
20,BGD,Bangladesh,bangladeshi,14.0
29,BRA,Brazil,brazilian,88.0
25,CAN,Canada,canadian,1167.0
22,CHE,Switzerland,swiss,29.0
12,CHL,Chile,chilean,32.0
23,CHN,China,chinese,247.0
7,COL,Colombia,colombian,13.0


# Fill Countries DataFrames

In [33]:
def request_recipes_by_ingredient(ingredient_ids):
    recipes = collection.find(
    {"ingredients":{
            "$elemMatch": {
                "ingredientID": {"$in":ingredient_ids}
            }
        }}, 
    )
    return recipes

In [34]:
def fill_dataFrame_ingredient_by_ids(df, ingredient_ids, column_label, recipe_field):
    recipes = request_recipes_by_ingredient(ingredient_ids)
    
    df[column_label] = 0
    
    for recipe in recipes:
        for country in recipe[recipe_field]:

            country_index = "Unfound"

            found_row = df[df['database_name'] == country]
            if(len(found_row) != 0):
                country_index = found_row.index.values[0]

            else:
                found_row = df[df['geo_name'] == country]
                if(len(found_row) != 0):
                    country_index = found_row.index.values[0]

            if(country_index != "Unfound"):
                df.set_value(country_index,
                             column_label,
                             df.get_value(country_index, column_label) + 1)
    
    df[column_label] = df[column_label]/df['nbRecipes']
    return 

## Countries and food types

In [35]:
def find_type_ids(df, type_name):
    type_ids = []
    for idx in df[df['type'] == type_name]['id']:
        type_ids.append(idx)
    
    return type_ids

In [36]:
def fill_dataFrame_ingredient_type(df, df_ingredients, type_name, recipe_field):
    type_ids = find_type_ids(df_ingredients, type_name)
    if len(type_ids) != 0:
        fill_dataFrame_ingredient_by_ids(df, type_ids, type_name, recipe_field)
    print(type_name + ' food type has been added to the DataFrame')
    return 

In [37]:
df_countries_and_types = df_countries.copy()

In [38]:
fill_dataFrame_ingredient_type(df_countries_and_types, df_ingredients, 'meat', "ada-country")
fill_dataFrame_ingredient_type(df_countries_and_types, df_ingredients, 'fish and seafood', "ada-country")
fill_dataFrame_ingredient_type(df_countries_and_types, df_ingredients, 'egg', "ada-country")
fill_dataFrame_ingredient_type(df_countries_and_types, df_ingredients, 'dairy', "ada-country")
fill_dataFrame_ingredient_type(df_countries_and_types, df_ingredients, 'vegetables', "ada-country")
fill_dataFrame_ingredient_type(df_countries_and_types, df_ingredients, 'fruit', "ada-country")
fill_dataFrame_ingredient_type(df_countries_and_types, df_ingredients, 'starchy', "ada-country")
fill_dataFrame_ingredient_type(df_countries_and_types, df_ingredients, 'condiment', "ada-country")
fill_dataFrame_ingredient_type(df_countries_and_types, df_ingredients, 'spicies', "ada-country")
fill_dataFrame_ingredient_type(df_countries_and_types, df_ingredients, 'herbs', "ada-country")
fill_dataFrame_ingredient_type(df_countries_and_types, df_ingredients, 'sugared', "ada-country")
fill_dataFrame_ingredient_type(df_countries_and_types, df_ingredients, 'alcohol', "ada-country")

meat food type has been added to the DataFrame
fish and seafood food type has been added to the DataFrame
egg food type has been added to the DataFrame
dairy food type has been added to the DataFrame
vegetables food type has been added to the DataFrame
fruit food type has been added to the DataFrame
starchy food type has been added to the DataFrame
condiment food type has been added to the DataFrame
spicies food type has been added to the DataFrame
herbs food type has been added to the DataFrame
sugared food type has been added to the DataFrame
alcohol food type has been added to the DataFrame


In [39]:
df_countries_and_types.head(10)

Unnamed: 0,geo_identifier,geo_name,database_name,nbRecipes,meat,fish and seafood,egg,dairy,vegetables,fruit,starchy,condiment,spicies,herbs,sugared,alcohol
0,ARG,Argentina,argentinian,23.0,0.173913,0.0,0.391304,0.391304,0.608696,0.347826,0.434783,0.826087,0.608696,0.434783,0.173913,0.304348
6,AUT,Austria,austrian,30.0,0.066667,0.0,0.7,0.8,0.1,0.433333,0.9,0.7,0.6,0.066667,0.766667,0.4
18,BEL,Belgium,belgian,15.0,0.466667,0.0,0.4,0.733333,0.466667,0.333333,0.933333,0.866667,0.466667,0.4,0.4,0.266667
20,BGD,Bangladesh,bangladeshi,14.0,0.357143,0.357143,0.142857,0.142857,1.0,0.071429,0.857143,1.0,1.0,0.785714,0.214286,0.0
29,BRA,Brazil,brazilian,88.0,0.261364,0.045455,0.329545,0.534091,0.465909,0.318182,0.534091,0.693182,0.295455,0.329545,0.386364,0.102273
25,CAN,Canada,canadian,1167.0,0.296487,0.040274,0.289632,0.579263,0.568123,0.32048,0.630677,0.75407,0.482434,0.231362,0.481577,0.133676
22,CHE,Switzerland,swiss,29.0,0.241379,0.0,0.310345,0.689655,0.586207,0.241379,0.931034,0.793103,0.37931,0.137931,0.344828,0.275862
12,CHL,Chile,chilean,32.0,0.4375,0.09375,0.40625,0.5625,0.5,0.4375,0.5625,0.625,0.53125,0.28125,0.28125,0.375
23,CHN,China,chinese,247.0,0.643725,0.11336,0.315789,0.093117,0.813765,0.210526,0.740891,0.898785,0.502024,0.105263,0.546559,0.182186
7,COL,Colombia,colombian,13.0,0.615385,0.0,0.076923,0.307692,0.846154,0.230769,0.615385,1.0,0.615385,0.461538,0.076923,0.076923


## Countries and Ingredients

In [40]:
def fill_dataFrame_ingredient(df, df_ingredients, ingredient_name, recipe_field):
    ingredient_ids = find_ingredient_ids(df_ingredients, ingredient_name)
    if len(ingredient_ids) != 0:
        fill_dataFrame_ingredient_by_ids(df, ingredient_ids, ingredient_name, recipe_field)
    print(ingredient_name + ' recipes have been added to the DataFrame')
    return 

In [41]:
df_countries_and_ingredients = df_countries.copy()

In [42]:
fill_dataFrame_ingredient(df_countries_and_ingredients, df_ingredients, 'chicken', "ada-country")
fill_dataFrame_ingredient(df_countries_and_ingredients, df_ingredients, 'beef', "ada-country")
fill_dataFrame_ingredient(df_countries_and_ingredients, df_ingredients, 'pork', "ada-country")
fill_dataFrame_ingredient(df_countries_and_ingredients, df_ingredients, 'rice', "ada-country")
fill_dataFrame_ingredient(df_countries_and_ingredients, df_ingredients, 'soy sauce', "ada-country")
fill_dataFrame_ingredient(df_countries_and_ingredients, df_ingredients, 'mozzarella', "ada-country")
fill_dataFrame_ingredient(df_countries_and_ingredients, df_ingredients, 'garlic', "ada-country")
fill_dataFrame_ingredient(df_countries_and_ingredients, df_ingredients, 'butter', "ada-country")
fill_dataFrame_ingredient(df_countries_and_ingredients, df_ingredients, 'oil', "ada-country")
fill_dataFrame_ingredient(df_countries_and_ingredients, df_ingredients, 'shrimp', "ada-country")

chicken recipes have been added to the DataFrame
beef recipes have been added to the DataFrame
pork recipes have been added to the DataFrame
rice recipes have been added to the DataFrame
soy sauce recipes have been added to the DataFrame
mozzarella recipes have been added to the DataFrame
garlic recipes have been added to the DataFrame
butter recipes have been added to the DataFrame
oil recipes have been added to the DataFrame
shrimp recipes have been added to the DataFrame


In [43]:
df_countries_and_ingredients.head(10)

Unnamed: 0,geo_identifier,geo_name,database_name,nbRecipes,chicken,beef,pork,rice,soy sauce,mozzarella,garlic,butter,oil,shrimp
0,ARG,Argentina,argentinian,23.0,0.043478,0.043478,0.0,0.0,0.0,0.0,0.434783,0.26087,0.521739,0.0
6,AUT,Austria,austrian,30.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.7,0.1,0.0
18,BEL,Belgium,belgian,15.0,0.133333,0.066667,0.066667,0.0,0.0,0.0,0.133333,0.6,0.2,0.0
20,BGD,Bangladesh,bangladeshi,14.0,0.071429,0.214286,0.0,0.071429,0.0,0.0,0.785714,0.142857,0.857143,0.071429
29,BRA,Brazil,brazilian,88.0,0.125,0.045455,0.045455,0.079545,0.022727,0.034091,0.329545,0.227273,0.477273,0.011364
25,CAN,Canada,canadian,1167.0,0.119966,0.057412,0.037704,0.042845,0.06341,0.023136,0.277635,0.351328,0.292202,0.012853
22,CHE,Switzerland,swiss,29.0,0.034483,0.172414,0.0,0.0,0.0,0.0,0.206897,0.448276,0.172414,0.0
12,CHL,Chile,chilean,32.0,0.125,0.15625,0.0625,0.03125,0.0,0.0,0.28125,0.3125,0.3125,0.0625
23,CHN,China,chinese,247.0,0.417004,0.020243,0.242915,0.279352,0.651822,0.0,0.473684,0.044534,0.724696,0.089069
7,COL,Colombia,colombian,13.0,0.461538,0.230769,0.076923,0.0,0.0,0.076923,0.538462,0.076923,0.769231,0.0


# Counting number of recipes per US states

In [44]:
df_states = initialize_dataFrame('fullAggregatedData.json', "per_region")
df_states

Unnamed: 0,geo_identifier,geo_name,database_name,nbRecipes
21,01,Alabama,alabama,21.0
46,02,Alaska,alaska,36.0
10,04,Arizona,arizona,31.0
27,05,Arkansas,arkansas,42.0
7,08,Colorado,colorado,60.0
34,09,Connecticut,connecticut,58.0
31,10,Delaware,delaware,37.0
0,12,Florida,florida,92.0
36,15,Hawaii,hawaii,205.0
18,16,Idaho,idaho,72.0


## US States and food types

In [45]:
df_states_and_types = df_states.copy()

In [46]:
fill_dataFrame_ingredient_type(df_states_and_types, df_ingredients, 'meat', "ada-region")
fill_dataFrame_ingredient_type(df_states_and_types, df_ingredients, 'fish and seafood', "ada-region")
fill_dataFrame_ingredient_type(df_states_and_types, df_ingredients, 'egg', "ada-region")
fill_dataFrame_ingredient_type(df_states_and_types, df_ingredients, 'dairy', "ada-region")
fill_dataFrame_ingredient_type(df_states_and_types, df_ingredients, 'vegetables', "ada-region")
fill_dataFrame_ingredient_type(df_states_and_types, df_ingredients, 'fruit', "ada-region")
fill_dataFrame_ingredient_type(df_states_and_types, df_ingredients, 'starchy', "ada-region")
fill_dataFrame_ingredient_type(df_states_and_types, df_ingredients, 'condiment', "ada-region")
fill_dataFrame_ingredient_type(df_states_and_types, df_ingredients, 'spicies', "ada-region")
fill_dataFrame_ingredient_type(df_states_and_types, df_ingredients, 'herbs', "ada-region")
fill_dataFrame_ingredient_type(df_states_and_types, df_ingredients, 'sugared', "ada-region")
fill_dataFrame_ingredient_type(df_states_and_types, df_ingredients, 'alcohol', "ada-region")
print('FINISH')

meat food type has been added to the DataFrame
fish and seafood food type has been added to the DataFrame
egg food type has been added to the DataFrame
dairy food type has been added to the DataFrame
vegetables food type has been added to the DataFrame
fruit food type has been added to the DataFrame
starchy food type has been added to the DataFrame
condiment food type has been added to the DataFrame
spicies food type has been added to the DataFrame
herbs food type has been added to the DataFrame
sugared food type has been added to the DataFrame
alcohol food type has been added to the DataFrame
FINISH


## US States and food types

In [47]:
df_states_and_ingredients = df_states.copy()

In [48]:
fill_dataFrame_ingredient(df_states_and_ingredients, df_ingredients, 'chicken', "ada-region")
fill_dataFrame_ingredient(df_states_and_ingredients, df_ingredients, 'beef', "ada-region")
fill_dataFrame_ingredient(df_states_and_ingredients, df_ingredients, 'pork', "ada-region")
fill_dataFrame_ingredient(df_states_and_ingredients, df_ingredients, 'rice', "ada-region")
fill_dataFrame_ingredient(df_states_and_ingredients, df_ingredients, 'soy sauce', "ada-region")
fill_dataFrame_ingredient(df_states_and_ingredients, df_ingredients, 'mozzarella', "ada-region")
fill_dataFrame_ingredient(df_states_and_ingredients, df_ingredients, 'garlic', "ada-region")
fill_dataFrame_ingredient(df_states_and_ingredients, df_ingredients, 'butter', "ada-region")
fill_dataFrame_ingredient(df_states_and_ingredients, df_ingredients, 'oil', "ada-region")
fill_dataFrame_ingredient(df_states_and_ingredients, df_ingredients, 'shrimp', "ada-region")
print('FINISH')

chicken recipes have been added to the DataFrame
beef recipes have been added to the DataFrame
pork recipes have been added to the DataFrame
rice recipes have been added to the DataFrame
soy sauce recipes have been added to the DataFrame
mozzarella recipes have been added to the DataFrame
garlic recipes have been added to the DataFrame
butter recipes have been added to the DataFrame
oil recipes have been added to the DataFrame
shrimp recipes have been added to the DataFrame
FINISH


# JSONs generation

In [49]:
def create_json_from_dataFrames(df_countries, df_states):
    coutries_json = []
    for i in range(len(df_countries)):
        coutries_json.append(df_countries.iloc[i].to_dict())
    
    states_json = []
    for i in range(len(df_states)):
        states_json.append(df_states.iloc[i].to_dict())
    
    final_json = {"per_country": coutries_json, "per_region": states_json}
    return final_json

In [50]:
food_type_json = create_json_from_dataFrames(df_countries_and_types, df_states_and_types)
ingredients_json = create_json_from_dataFrames(df_countries_and_ingredients, df_states_and_ingredients)

In [51]:
with open('foodTypeRecipePerc.json', 'w') as file:
    json.dump(food_type_json, file, indent=4, sort_keys=True, separators=(',', ': '), ensure_ascii=False)

In [52]:
with open('IngredientRecipePerc.json', 'w') as file:
    json.dump(ingredients_json, file, indent=4, sort_keys=True, separators=(',', ': '), ensure_ascii=False)