In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import ast
import random

In [26]:
dish_ds = pd.read_csv('3A2M_EXTENDED.csv')

In [27]:
dish_ds.head()

Unnamed: 0,title,NER,Extended_NER,genre,label,directions
0,\t Arugula Pomegranate Salad,"[""baby spinach"", ""baby arugula"", ""pomegranate ...","['alfalfa sprouts', 'baby spinach', 'baby arug...",vegetables,4,"[""Toss together spinach and arugula, then plac..."
1,\t Black Bean And Turkey Chili,"[""olive oil"", ""yellow onion"", ""garlic"", ""groun...","['one', 'yellow onion', 'tomato paste', 'about...",sides,8,"[""Dice the onion and mince the garlic. Add the..."
2,\t Finger Lickin' Tofu Nuggets,"[""extra firm"", ""almond flour"", ""nutritional ye...","['extra firm', '2', 'coconut oil', 'almond flo...",nonveg,3,"[""Wrap the tofu in a clean tea towel and press..."
3,\t Jerk Beef Stew With Carrots And Tomatoes,"[""olive oil"", ""boneless beef chuck"", ""onion"", ...","['boneless beef chuck', '2', 'Saute', 'onion',...",vegetables,4,"[""Preheat oven to 350 degrees F."", ""Heat the o..."
4,\t Pomegranate Couscous Salad,"[""pomegranate arils"", ""whole wheat couscous"", ...","['whole wheat couscous', '10 minutes', 'lemon ...",vegetables,4,"[""Place couscous in a bowl with 11/2 cups of h..."


In [28]:
def preprocess_ingredients(row):
    ingredients = ast.literal_eval(row['Extended_NER'])
    second_ingredients = ast.literal_eval(row['NER'])
    return str(ingredients + second_ingredients)

In [29]:
dish_ds['ingredients'] = dish_ds.apply(preprocess_ingredients, axis=1)
dish_ds['genre'] = dish_ds['genre'].apply(lambda x: x.lower())

In [30]:
dish_ds_ner_combined = dish_ds.drop(['NER', 'Extended_NER'], axis=1)

In [31]:
dish_ds_ner_combined.head()

Unnamed: 0,title,genre,label,directions,ingredients
0,\t Arugula Pomegranate Salad,vegetables,4,"[""Toss together spinach and arugula, then plac...","['alfalfa sprouts', 'baby spinach', 'baby arug..."
1,\t Black Bean And Turkey Chili,sides,8,"[""Dice the onion and mince the garlic. Add the...","['one', 'yellow onion', 'tomato paste', 'about..."
2,\t Finger Lickin' Tofu Nuggets,nonveg,3,"[""Wrap the tofu in a clean tea towel and press...","['extra firm', '2', 'coconut oil', 'almond flo..."
3,\t Jerk Beef Stew With Carrots And Tomatoes,vegetables,4,"[""Preheat oven to 350 degrees F."", ""Heat the o...","['boneless beef chuck', '2', 'Saute', 'onion',..."
4,\t Pomegranate Couscous Salad,vegetables,4,"[""Place couscous in a bowl with 11/2 cups of h...","['whole wheat couscous', '10 minutes', 'lemon ..."


In [32]:
available_ingredients = [
    "pig",
    "garlic",
    "calamansi",
    "tomato",
    "bell pepper",
    "carrot",
    "ginger",
    "chicken",
    "potato",
    "cucumber",
    "onion",
    "chili",
    "long chili",
]

def filter_ingredients(row):
    ingredients = ast.literal_eval(row['ingredients'])
    ingredients = set([ingredient.lower().strip() for ingredient in ingredients])
    return not ingredients.isdisjoint(available_ingredients)

In [33]:
mask = dish_ds_ner_combined.apply(filter_ingredients, axis=1)

In [34]:
mask.value_counts()

False    1457924
True      773219
Name: count, dtype: int64

In [35]:
dish_ds_filtered = dish_ds_ner_combined[mask]

In [36]:
dish_ds_filtered.head()

Unnamed: 0,title,genre,label,directions,ingredients
1,\t Black Bean And Turkey Chili,sides,8,"[""Dice the onion and mince the garlic. Add the...","['one', 'yellow onion', 'tomato paste', 'about..."
2,\t Finger Lickin' Tofu Nuggets,nonveg,3,"[""Wrap the tofu in a clean tea towel and press...","['extra firm', '2', 'coconut oil', 'almond flo..."
3,\t Jerk Beef Stew With Carrots And Tomatoes,vegetables,4,"[""Preheat oven to 350 degrees F."", ""Heat the o...","['boneless beef chuck', '2', 'Saute', 'onion',..."
15,Sparkrecipes Is Powered By Sparkpeople.Com T...,cereal,6,"[""Portion the turkey as soon as you get home f...","['Place', 'egg whites', 'onion', 'chedder', 'g..."
16,(Cheesy Mashed Potatoes) Recipe,vegetables,4,"[""In a large saucepan, cover potatoes and 1 wh...","['Bring', 'mixed Alpine cheeses', 'about 20 mi..."


In [37]:
def transform_list_to_string(row):
    return ';'.join(ast.literal_eval(row['ingredients']))

In [38]:
dish_ds_filtered['ingredients'] = dish_ds_filtered.apply(transform_list_to_string, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dish_ds_filtered['ingredients'] = dish_ds_filtered.apply(transform_list_to_string, axis=1)


In [39]:
dish_ds_filtered.head()

Unnamed: 0,title,genre,label,directions,ingredients
1,\t Black Bean And Turkey Chili,sides,8,"[""Dice the onion and mince the garlic. Add the...",one;yellow onion;tomato paste;about 10 minutes...
2,\t Finger Lickin' Tofu Nuggets,nonveg,3,"[""Wrap the tofu in a clean tea towel and press...",extra firm;2;coconut oil;almond flour;nutritio...
3,\t Jerk Beef Stew With Carrots And Tomatoes,vegetables,4,"[""Preheat oven to 350 degrees F."", ""Heat the o...",boneless beef chuck;2;Saute;onion;1 hour to 1 ...
15,Sparkrecipes Is Powered By Sparkpeople.Com T...,cereal,6,"[""Portion the turkey as soon as you get home f...",Place;egg whites;onion;chedder;ground turkey;B...
16,(Cheesy Mashed Potatoes) Recipe,vegetables,4,"[""In a large saucepan, cover potatoes and 1 wh...",Bring;mixed Alpine cheeses;about 20 minutes;th...


In [40]:
genres = dish_ds_filtered['genre'].unique()
def sample_genre(genre_df):
    return genre_df.sample(n=1000, random_state=9)
genres

array(['sides', 'nonveg', 'vegetables', 'cereal', 'drinks', 'fusion',
       'fastfood', 'bakery', 'meal'], dtype=object)

In [41]:
# Take 1000 of each genre and put it into a new df
dish_ds_filtered_sampled = dish_ds_filtered.groupby('genre').apply(sample_genre).reset_index(drop=True)
dish_ds_filtered_sampled.head()

  dish_ds_filtered_sampled = dish_ds_filtered.groupby('genre').apply(sample_genre).reset_index(drop=True)


Unnamed: 0,title,genre,label,directions,ingredients
0,Fusilli With Artichoke Hearts And Parmesan Cream,bakery,1,"[""In a medium saucepan, melt the butter over m...",salt;Marinated Artichoke;Marinated Artichoke H...
1,Susan'S Pumpkin Cookies,bakery,1,Mix pumpkin sugar and spices together and set ...,brown sugar;molasses;Seal;soda;flour;rolled oa...
2,Jambalaya,bakery,1,"[""Heat the oil in a saucepan over medium heat....",chicken;shrimp;basil;salt;Tabasco sauce;garlic...
3,Quiche Lorraine,bakery,1,"[""You may use ham or sausage or all 3 meats.""]",onion;heavy cream;another 10 minutes;Serve;325...
4,Balsamic Delight,bakery,1,"[""Cut up all vegetable ingredients and toss. P...",Broccoli Flowerettes;Celery;salt;Carrots;white...


In [42]:
dish_ds_filtered_sampled['genre'].value_counts()

genre
bakery        1000
cereal        1000
drinks        1000
fastfood      1000
fusion        1000
meal          1000
nonveg        1000
sides         1000
vegetables    1000
Name: count, dtype: int64

In [43]:
def add_image_to_df(row):
    genre_to_url_map = {
        "bakery" : [
            "https://images.pexels.com/photos/128865/pexels-photo-128865.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/9228619/pexels-photo-9228619.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/2837005/pexels-photo-2837005.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/1495534/pexels-photo-1495534.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/8108119/pexels-photo-8108119.jpeg?auto=compress&cs=tinysrgb&w=600",
            ],
        "cereal" : [
            "https://images.pexels.com/photos/128865/pexels-photo-128865.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/9228619/pexels-photo-9228619.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/2837005/pexels-photo-2837005.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/1495534/pexels-photo-1495534.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/8108119/pexels-photo-8108119.jpeg?auto=compress&cs=tinysrgb&w=600", 
            ],
        "drinks" : [
            "https://images.pexels.com/photos/338713/pexels-photo-338713.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/1200348/pexels-photo-1200348.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/1089930/pexels-photo-1089930.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1",
            "https://images.pexels.com/photos/312418/pexels-photo-312418.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1",
            "https://images.pexels.com/photos/230490/pexels-photo-230490.jpeg?auto=compress&cs=tinysrgb&w=600", 
            ],
        "fastfood" : [
            "https://images.pexels.com/photos/70497/pexels-photo-70497.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2",
            "https://images.pexels.com/photos/1199957/pexels-photo-1199957.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2",
            "https://images.pexels.com/photos/1123249/pexels-photo-1123249.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2",
            "https://images.pexels.com/photos/315755/pexels-photo-315755.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2",
            "https://images.pexels.com/photos/6697273/pexels-photo-6697273.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2" 

            ],
        "fusion" : [
            "https://images.pexels.com/photos/12706240/pexels-photo-12706240.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2",
            "https://images.pexels.com/photos/4110430/pexels-photo-4110430.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2",
            "https://images.pexels.com/photos/2792186/pexels-photo-2792186.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2",
            "https://images.pexels.com/photos/25524075/pexels-photo-25524075/free-photo-of-dessert-with-fruits.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2",
            "https://images.pexels.com/photos/20571445/pexels-photo-20571445/free-photo-of-bowl-of-food.jpeg?auto=compress&cs=tinysrgb&w=800",
            ],
        "meal" : [
            "https://images.pexels.com/photos/8827916/pexels-photo-8827916.jpeg?auto=compress&cs=tinysrgb&w=800",
            "https://images.pexels.com/photos/8954526/pexels-photo-8954526.jpeg?auto=compress&cs=tinysrgb&w=800",
            "https://images.pexels.com/photos/6170473/pexels-photo-6170473.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2",
            "https://images.pexels.com/photos/6064810/pexels-photo-6064810.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2",
            "https://images.pexels.com/photos/15832878/pexels-photo-15832878/free-photo-of-mixed-salad-on-a-tray.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2" 
            ],
        "nonveg" : [
            "https://images.pexels.com/photos/20644799/pexels-photo-20644799/free-photo-of-choice-of-meat-snacks.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/1482803/pexels-photo-1482803.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/1251208/pexels-photo-1251208.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/588776/pexels-photo-588776.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/410648/pexels-photo-410648.jpeg?auto=compress&cs=tinysrgb&w=600", 
            ],
        "sides" : [
            "https://images.pexels.com/photos/10520474/pexels-photo-10520474.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/6740528/pexels-photo-6740528.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/10420372/pexels-photo-10420372.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/1893555/pexels-photo-1893555.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/5840089/pexels-photo-5840089.jpeg?auto=compress&cs=tinysrgb&w=600",
            ],
        "vegetables" : [
            "https://images.pexels.com/photos/20640320/pexels-photo-20640320/free-photo-of-tomatoes-on-a-food-market.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/842571/pexels-photo-842571.jpeg?auto=compress&cs=tinysrgb&w=600",
            "https://images.pexels.com/photos/3872373/pexels-photo-3872373.jpeg?auto=compress&cs=tinysrgb&w=60", 
            "https://images.pexels.com/photos/2862154/pexels-photo-2862154.jpeg?auto=compress&cs=tinysrgb&w=600", 
            "https://images.pexels.com/photos/3026808/pexels-photo-3026808.jpeg?auto=compress&cs=tinysrgb&w=600",
            ],
    }
    genre_urls = genre_to_url_map.get(row['genre'], [])
    if genre_urls:
        link = random.choice(genre_urls)
    else:
        link = None
    return link

In [44]:
dish_ds_filtered_sampled['image_url'] = dish_ds_filtered_sampled.apply(add_image_to_df, axis=1)

In [45]:
dish_ds_filtered_sampled.to_csv('3A2M_EXTENDED_FILTERED_SAMPLED.csv', index=False)
dish_ds_filtered_sampled.to_json('3A2M_EXTENDED_FILTERED_SAMPLED.json', orient='records')