In [None]:
# Import all necessary libraries, including the RecipeScraper itself
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import re
import time
from RecipeScraper import RecipeScraper

In [None]:
# Read the CSV containing URLs as a Pandas Dataframe
# Helps make reading the URLs easier
df = pd.read_csv('Recipe_Links_Filtered.csv')

In [None]:
# The actual attributes present in the CSV database are specified in an array
cols = ['URL',
        'Title',
        'Author',
        'Date',
        'Rating',
        'Prep Time',
        'Cook Time',
        'Total Time',
        'Servings',
        'Calories',
        'Carbs',
        'Fat',
        'Protien',
        'Description',
        'Ingredients',
        'Instructions']

df_full = pd.DataFrame(columns=cols) # These attributes are then used to make
                                     # a dataframe for the database

In [None]:
# Before scraping multiple recipes, check to see if the scraper works properly on one recipe
# Essentially a test
scraper = RecipeScraper('https://www.allrecipes.com/recipe/275372/air-fryer-turkey-breast/')
print(scraper.get_recipe_author())

In [None]:
# So far, the dataframe for the database is empty
# Use a loop to fill up the database one by one
for i in range(300):
  # Read one URL and pass it into the RecipeScraper constructor
  url = df['URL'][i]
  ex = RecipeScraper(url)

  # Call every get function and store the info returned into the proper column in the row
  df_full.at[i,'URL'] = url
  df_full.at[i,'Title'] = ex.get_recipe_title()
  df_full.at[i,'Author'] = ex.get_recipe_author()
  df_full.at[i,'Date'] = ex.get_recipe_date()
  df_full.at[i,'Rating'] = ex.get_recipe_rating()
  df_full.at[i,'Description'] = ex.get_recipe_description()
  df_full.at[i,'Ingredients'] = ex.get_recipe_ingredients()
  df_full.at[i,'Instructions'] = ex.get_recipe_instructions()

  # Details are returned as an array
  details = ex.get_recipe_summary()

  df_full.at[i,'Prep Time'] = details[0]  # Must read the returned array at
  df_full.at[i,'Cook Time'] = details[1]  # speficic indexes to store into dataframe
  df_full.at[i,'Total Time'] = details[2]
  df_full.at[i,'Servings'] = details[3]

  # Same case for nutrients
  nutrients = ex.get_recipe_nutrition()

  df_full.at[i,'Calories'] = nutrients[0]
  df_full.at[i,'Carbs'] = nutrients[1]
  df_full.at[i,'Fat'] = nutrients[2]
  df_full.at[i,'Protien'] = nutrients[3]

  sleep_time = np.random.uniform(1,5) # VERY IMPORTANT: Must wait before making
  time.sleep(sleep_time)              # another request to avoid overwhelming their servers

In [None]:
# Check to see whether data was written succesfully to rows
print(df_full)

In [None]:
# Finally, save everything as a CSV
df_full.to_csv('recipes_full.csv')