In [1]:
# Import libaries
import pandas as pd
import requests
import html
from bs4 import BeautifulSoup

### Step 1: Create a soup object from the home page

In [2]:
url = 'https://pages.git.generalassemb.ly/rldaggie/for-scraping/'
res = requests.get(url)
res.status_code

200

In [3]:
soup = BeautifulSoup(res.content, 'lxml')

### Step 2: Scrape the home page soup for every restaurant

Note: Your best bet is to create a list of dictionaries, one for each restaurant. Each dictionary contains the restaurant's name and slug. The result of your scrape should look something like this:

```python
restaurants = [
    {'name': 'A&W Restaurants', 'slug': 'aw-restaurants'}, 
    {'name': "Applebee's", 'slug': 'applebees'},
    ...
]
```

In [5]:
restaurants = [
    {'name': 'A&W Restaurants', 'slug': 'aw-restaurants'},
    {'name': "Applebee's", 'slug': 'applebees'},
    ...
]

In [6]:
div = soup.find_all('td')
restaurants = []
# Extract and print the names
for name_element in div:
    name = name_element.find('a')
    if name:
        rest = {}
        rest['name'] = name.text
        rest['slug'] = name.attrs['href'].split('/')[1]
        restaurants.append(rest)

In [7]:
restaurants[0:5]

[{'name': 'A&W Restaurants', 'slug': '1.html'},
 {'name': "Applebee's", 'slug': '2.html'},
 {'name': "Arby's", 'slug': '3.html'},
 {'name': 'Atlanta Bread Company', 'slug': '4.html'},
 {'name': "Bojangle's Famous Chicken 'n Biscuits", 'slug': '5.html'}]

### Step 3: Using the slug, scrape each restaurant's page and create a single list of food dictionaries.

Your list of foods should look something like this:
```python
foods = [
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    ...
]
```

**Note**: Remove extra white space from each category

In [8]:
foods = [
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    ...
]

Note: Remove extra white space from each category

In [9]:
slug_list = [restaurants[i]['slug'] for i in range(len(restaurants))]
base_url = 'https://pages.git.generalassemb.ly/rldaggie/for-scraping/restaurants/'

In [10]:
for x in range(len(slug_list)):
    res = requests.get(base_url + slug_list[x])
    soup = BeautifulSoup(res.content, 'lxml')
    table_soup = soup.find_all('tr')
    foods = []
    for row in table_soup:
      td_elements = row.find_all('td')
      food = {}
      if len(td_elements) >= 5:
        food['restaurant'] = restaurants[x]['name']
        food['name'] = td_elements[0].text.strip()
        food['category'] = td_elements[1].text.strip()
        food['calories'] = td_elements[2].text.strip()
        food['fat'] = td_elements[3].text.strip()
        food['carbs'] = td_elements[4].text.strip()
        foods.append(food)



    restaurants[x]['foods'] = foods

### Step 4: Create a pandas DataFrame from your list of foods

**Note**: Your DataFrame should have 4,977 rows

In [11]:
list_of_food_dicts = []
for i in range(len(restaurants)):
    for x in range(len(restaurants[i]['foods'])):
        dict = {}
        dict = (restaurants[i]['foods'][x])
        list_of_food_dicts.append(dict)

In [12]:
df = pd.DataFrame(list_of_food_dicts)

In [13]:
df.head()

Unnamed: 0,restaurant,name,category,calories,fat,carbs
0,A&W Restaurants,Original Bacon Double Cheeseburger,Burgers,760,45,45
1,A&W Restaurants,Coney (Chili) Dog,Entrees,340,20,26
2,A&W Restaurants,Chili Fries,French Fries,370,15,49
3,A&W Restaurants,Strawberry Milkshake (small),Shakes,670,29,90
4,A&W Restaurants,A&W® Root Beer Freeze (large),Shakes,820,18,150


### Step 5: Export to csv

**Note:** Don't export the index column from your DataFrame

In [14]:
df.to_csv('export.csv', index=False, sep=",")