In [1]:
import numpy as np
import pandas as pd
import requests
import json

In [2]:
with open('credentials.json') as f:
    credentials = json.load(f)

In [3]:
search_request = requests.get(
    f"https://api.spoonacular.com/recipes/complexSearch?"
    f"diet=vegetarian&type=main%20course&number=100&limitLicense=True"
    f"&apiKey={credentials['API_KEY']}"
)

In [4]:
search_request.status_code

200

In [5]:
results = search_request.json()

In [6]:
recipe_ids = [str(x["id"]) for x in results["results"]]

In [7]:
recipe_id_string = ",".join(recipe_ids)

In [8]:
len(recipe_ids)

100

In [9]:
recipe_details_request = requests.get(
    f"https://api.spoonacular.com/recipes/informationBulk?"
    f"apiKey={credentials['API_KEY']}&ids={recipe_id_string}"
)

In [10]:
recipe_details_request.status_code

200

In [11]:
recipe_details_results = recipe_details_request.json()

In [12]:
len(recipe_details_results)

100

In [13]:
def ingredients_to_cols(recipe_json):
    df = pd.DataFrame.from_dict(recipe_json["extendedIngredients"])
    df["aisle"] = df["aisle"].apply(lambda x: "None" if x is None else x.split(";")[0])
    df = df.query("~name.str.contains('salt')")
    return df.groupby("aisle")["name"].apply(','.join).reset_index().set_index("aisle")["name"].to_dict()

In [14]:
df = pd.DataFrame([{
    **{
        "title": x["title"],
        "source": x["creditsText"],
        "link": x["sourceUrl"],
        "minutes_to_prepare": x["readyInMinutes"],
        "cuisine": "" if len(x["cuisines"]) == 0 else x["cuisines"][0]
    },
    **ingredients_to_cols(x)
} for x in recipe_details_results])

In [15]:
df.isna().sum()

title                            0
source                           1
link                             0
minutes_to_prepare               0
cuisine                          0
Baking                          75
None                            95
Oil, Vinegar, Salad Dressing    39
Produce                          6
Spices and Seasonings           41
Canned and Jarred               63
Condiments                      91
Pasta and Rice                  48
Beverages                       71
Ethnic Foods                    70
Nuts                            83
Cheese                          64
Milk, Eggs, Other Dairy         65
Bakery/Bread                    89
?                               96
Health Foods                    92
Nut butters, Jams, and Honey    93
Frozen                          97
Dried Fruits                    96
Online                          99
Refrigerated                    93
Savory Snacks                   95
Cereal                          93
Alcoholic Beverages 

In [16]:
df = df.fillna("")

In [17]:
def combine_str_cols(col_list):
    new_col = np.add.reduce([df[x].astype(str)+"," for x in col_list])
    new_col = pd.Series(new_col)\
        .str.split(",")\
        .apply(set).apply(",".join)\
        .str.replace(",_", ",")\
        .str.replace(' {2,}', ' ')\
        .str.replace(',{2,}', ' ')\
        .str.strip(",")  
    return new_col

In [18]:
# merge columns to create Pantry, which is less unwieldy
pantry_columns = [
    "Baking", 
    "Oil, Vinegar, Salad Dressing",
    "Canned and Jarred", 
    "Nuts", 
    "Nut butters, Jams, and Honey",
    "Dried Fruits",
    "Savory Snacks",
    "Cereal",
    "Online",
    "Health Foods"
]

df["Pantry"] = combine_str_cols(pantry_columns)

In [19]:
df["None"].value_counts()

            95
potatoes     4
potato       1
Name: None, dtype: int64

In [20]:
df["Gluten Free"].value_counts()

                    96
almond meal          2
buckwheat flour      1
brown rice flour     1
Name: Gluten Free, dtype: int64

In [21]:
df["Baking"] = combine_str_cols(["Baking", "Gluten Free"])

In [22]:
df["Dairy"] = combine_str_cols(["Milk, Eggs, Other Dairy", "Cheese"])

In [23]:
df["Produce"] = np.where(
    df["None"] != "",
    df["Produce"] + ",potatoes",
    df["Produce"]
)

In [24]:
df = df.drop(
    pantry_columns\
    + ["None", "?", "Gluten Free", "Milk, Eggs, Other Dairy", "Cheese", "Health Foods", "Online"], 
    axis=1
)

In [29]:
ingredient_col_rename_map = {
    x:"Ingredients: "+x for x in df.columns if x not in ['title', 'source', 'link', 'minutes_to_prepare', 'cuisine']
}

In [32]:
ingredient_col_rename_map.update({"title":"Recipe Name"})

In [34]:
df = df.rename(ingredient_col_rename_map, axis=1)

In [35]:
df.to_csv("data/sample.csv", index=False)