In [45]:
# Import Dependencies
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [46]:
# Recipe dataset
df = pd.read_csv('filtered_recipes.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Title,Rating,Description,Ingredients,Recipe Facts,Directions,Nutrition Facts,Number of Servings,Calories,...,Vitamin C,Calcium,Iron,Potassium,Tags,Course Keywords,Diet Keywords,Cuisine Keywords,Recipe URLs,Image Link
0,0,Brown Butter Rosemary Roasted Sweet Potatoes R...,5,"Browned, rosemary-infused butter is enriched w...","3 pounds (1.4kg) sweet potatoes, peeled and cu...",Active: 20 mins Total: 2 hrs 15 mins Serves:...,Place sweet potatoes in a large saucepan and c...,303 Calories 17g Fat 35g Carbs 4g Protein,6 to 8,303,...,34mg,71mg,1mg,820mg,"Sweet Potatoes, Gluten-free Sides, Roasted Pot...",['side'],['gluten-free'],[],https://www.seriouseats.com/brown-butter-rosem...,https://www.seriouseats.com/thmb/FXfXSVxQKFZxQ...
1,1,Grilled Sriracha Hot Wings,5,Rich and spicy thanks to a liberal dose of gar...,"4 pounds chicken wings, cut into drumettes and...",Active: 15 mins Total: 9 hrs Serves: 4 to 6...,Pat chicken wings dry with paper towels. Place...,475 Calories 29g Fat 12g Carbs 39g Protein,4 to 6,475,...,9mg,166mg,3mg,436mg,"Hors D'Oeuvres, Grilled Chicken, Chicken Wings",[],[],[],https://www.seriouseats.com/sriracha-hot-wings...,https://www.seriouseats.com/thmb/_cT9rVKj3CDSh...
2,2,Grilled Honey Chipotle Wings,5,"Crisp, sweet wings with a smoky finish.","3 pounds chicken wings (18 wings), cut up 1 ta...",Prep: 40 mins Cook: 45 mins Chilling Time: ...,Pat the chicken wings dry with paper towels. P...,187 Calories 3g Fat 43g Carbs 3g Protein,3 to 4,187,...,5mg,230mg,2mg,254mg,"Hors D'Oeuvres, Grilled Chicken, Chicken Wings",[],[],[],https://www.seriouseats.com/grilling-honey-chi...,https://www.seriouseats.com/thmb/r8gVcyUFq_IzK...
3,3,Grilled Turkish-Style Chicken Wings,5,An improved setup for skewers brings these gri...,1 cup (260g) Turkish hot pepper paste (see not...,Active: 30 mins Total: 90 mins Serves: 4 to...,"In a medium bowl, stir together pepper paste, ...",999 Calories 74g Fat 45g Carbs 40g Protein,4 to 6,999,...,19mg,71mg,4mg,492mg,,[],[],[],https://www.seriouseats.com/grilled-turkish-wi...,https://www.seriouseats.com/thmb/KhpkXCH6Rmavi...
4,4,Grilled Hoisin-Glazed Chicken Wings,5,Sweet and salty wings with a lightly charred c...,"3 pounds chicken wings, cut into drumettes and...",Active: 45 mins Total: 9 hrs Serves: 4 serv...,Pat chicken wings dry with paper towels. Place...,582 Calories 29g Fat 29g Carbs 50g Protein,4,582,...,3mg,274mg,5mg,553mg,"Hors D'Oeuvres, Grilled Chicken, Chicken Wings...",[],[],[],https://www.seriouseats.com/hoisin-glazed-chic...,https://www.seriouseats.com/thmb/C57mMT6MXuqB4...


In [47]:
# Drop Nutrition Facts Column and Unnamed: 0 Column
df = df.drop(columns=['Unnamed: 0', 'Nutrition Facts'])
df.columns

Index(['Title', 'Rating', 'Description', 'Ingredients', 'Recipe Facts',
       'Directions', 'Number of Servings', 'Calories', 'Total Fat',
       'Saturated Fat', 'Cholesterol', 'Sodium', 'Total Carbohydrate',
       'Dietary Fiber', 'Total Sugars', 'Protein', 'Vitamin C', 'Calcium',
       'Iron', 'Potassium', 'Tags', 'Course Keywords', 'Diet Keywords',
       'Cuisine Keywords', 'Recipe URLs', 'Image Link'],
      dtype='object')

In [48]:
# Remove 'Rate and Comment' from Recipe Facts
removed_rc = []
for desc in df['Recipe Facts']:
    if 'rate & comment' in desc.lower():
        endpoint = desc.lower().index('rate & comment')
        removed_rc.append(desc[:endpoint].rstrip())
    else:
        removed_rc.append(desc.rstrip())
df['Recipe Facts'] = removed_rc

In [49]:
# Add a line break to each descriptor except the first one
recipe_facts_linebreaks = []
annoying_keywords = ['Active', 'Cook', 'Prep', 'Total', 'Serves',
                     'Chilling Time', 'Rising Time', 'Makes', 'Marinating Time',
                     'Cooling Time', 'Dry-Brining Time', 'Resting Time', 'Infusing Time',
                     'Curing Time', 'Soaking Time', 'Maceration Time', 'Fermentation Time', 'Chilling TIme',
                     'Fermenting Time', 'Brining Time', 'Churning/Freezing TIme', 'Proofing Time', 'Cool', 'Churning Time',
                     'Rehydrating Time', 'Marinate', 'Carbonation Time', 'Salting Time', 'Churning/Freezing Time', 'Assembly Time',
                     'Proof/Cool', 'Chilling', 'Marination Time', 'Drying Time', 'Freeze', 'Brine', 'Freezing Time', 'Rest',
                     'Purging Time', '-TOTAL', 'Proof', 'Churning/ Freezing Time:', 'Chill', 'Soak', 'Rest/Chill', 'Total:', 'Marinating',
                     'Chill/Rest', 'Cure/Rest', 'Proof/Rest']
add_string = '\n'
for desc in df['Recipe Facts']:
    temp = desc
    for keyword in annoying_keywords:
        if ' ' + keyword in temp:
            slice = temp.index(keyword)
            temp = temp[:slice] + add_string + temp[slice:]
    recipe_facts_linebreaks.append(temp)


In [50]:
# Replace Recipe Facts with reformatted facts
df['Recipe Facts'] = recipe_facts_linebreaks
df.head()

Unnamed: 0,Title,Rating,Description,Ingredients,Recipe Facts,Directions,Number of Servings,Calories,Total Fat,Saturated Fat,...,Vitamin C,Calcium,Iron,Potassium,Tags,Course Keywords,Diet Keywords,Cuisine Keywords,Recipe URLs,Image Link
0,Brown Butter Rosemary Roasted Sweet Potatoes R...,5,"Browned, rosemary-infused butter is enriched w...","3 pounds (1.4kg) sweet potatoes, peeled and cu...",Active: 20 mins \nTotal: 2 hrs 15 mins \nSer...,Place sweet potatoes in a large saucepan and c...,6 to 8,303,17g,8g,...,34mg,71mg,1mg,820mg,"Sweet Potatoes, Gluten-free Sides, Roasted Pot...",['side'],['gluten-free'],[],https://www.seriouseats.com/brown-butter-rosem...,https://www.seriouseats.com/thmb/FXfXSVxQKFZxQ...
1,Grilled Sriracha Hot Wings,5,Rich and spicy thanks to a liberal dose of gar...,"4 pounds chicken wings, cut into drumettes and...",Active: 15 mins \nTotal: 9 hrs \nServes: 4 ...,Pat chicken wings dry with paper towels. Place...,4 to 6,475,29g,11g,...,9mg,166mg,3mg,436mg,"Hors D'Oeuvres, Grilled Chicken, Chicken Wings",[],[],[],https://www.seriouseats.com/sriracha-hot-wings...,https://www.seriouseats.com/thmb/_cT9rVKj3CDSh...
2,Grilled Honey Chipotle Wings,5,"Crisp, sweet wings with a smoky finish.","3 pounds chicken wings (18 wings), cut up 1 ta...",Prep: 40 mins \nCook: 45 mins \nChilling Tim...,Pat the chicken wings dry with paper towels. P...,3 to 4,187,3g,1g,...,5mg,230mg,2mg,254mg,"Hors D'Oeuvres, Grilled Chicken, Chicken Wings",[],[],[],https://www.seriouseats.com/grilling-honey-chi...,https://www.seriouseats.com/thmb/r8gVcyUFq_IzK...
3,Grilled Turkish-Style Chicken Wings,5,An improved setup for skewers brings these gri...,1 cup (260g) Turkish hot pepper paste (see not...,Active: 30 mins \nTotal: 90 mins \nServes: ...,"In a medium bowl, stir together pepper paste, ...",4 to 6,999,74g,22g,...,19mg,71mg,4mg,492mg,,[],[],[],https://www.seriouseats.com/grilled-turkish-wi...,https://www.seriouseats.com/thmb/KhpkXCH6Rmavi...
4,Grilled Hoisin-Glazed Chicken Wings,5,Sweet and salty wings with a lightly charred c...,"3 pounds chicken wings, cut into drumettes and...",Active: 45 mins \nTotal: 9 hrs \nServes: 4 ...,Pat chicken wings dry with paper towels. Place...,4,582,29g,8g,...,3mg,274mg,5mg,553mg,"Hors D'Oeuvres, Grilled Chicken, Chicken Wings...",[],[],[],https://www.seriouseats.com/hoisin-glazed-chic...,https://www.seriouseats.com/thmb/C57mMT6MXuqB4...


In [51]:
# Rescrape Ingredients and Directions columns with bullet points in front of each ingredient
ingredient_series = []
direction_series = []
for url in df['Recipe URLs']:
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")
    
    #Ingredients
    temp = ''
    ingredients_list = soup.find('ul', class_='structured-ingredients__list text-passage').find_all('li')
    for ingredient in ingredients_list:
        temp += ingredient.text
    ingredient_series.append(temp.replace('\n\n', '\n\u2022 '))

    #Directions
    direction_temp = ''
    count = 1
    directions_list = soup.find('ol', class_='comp mntl-sc-block-group--OL mntl-sc-block mntl-sc-block-startgroup').find_all('li')
    for direction in directions_list:
        direction_temp += str(count) + '. '
        try:
            direction_temp += direction.find('p', class_='comp mntl-sc-block mntl-sc-block-html').text
        except:
            continue
        count += 1
    direction_series.append(direction_temp)

In [52]:
# Formatting strings to standard
ingredient_series = [x[1:-1] for x in ingredient_series]
ingredient_series = [('\u2022 ' + x) for x in ingredient_series]
ingredient_series = [x.replace('\xa0', ' ') for x in ingredient_series]
direction_series = [x.replace(' \n', ' ') for x in direction_series]
direction_series = [x[:-1] for x in direction_series]
df['Ingredients'] = ingredient_series
df['Directions'] = direction_series

In [53]:
# Write df to csv
df.to_csv('final_recipes.csv', index=False)

# Write df to json
df.to_json("recipes.json", orient="index")