In [2]:
# Import libaries
import pandas as pd
import requests
from bs4 import BeautifulSoup

### Step 1: Create a soup object from the home page

In [3]:
# requesting from url provided
url ='https://pages.git.generalassemb.ly/rldaggie/for-scraping/'
res = requests.get(url)

In [4]:
# checking code, to ensure proper connection
res.status_code

200

In [5]:
# soup object
soup = BeautifulSoup(res.content, 'lxml')

### Step 2: Scrape the home page soup for every restaurant

Note: Your best bet is to create a list of dictionaries, one for each restaurant. Each dictionary contains the restaurant's name and path from the `href`. The result of your scrape should look something like this:

```python
restaurants = [
    {'name': 'A&W Restaurants', 'href': 'restaurants/1.html'}, 
    {'name': "Applebee's", 'href': 'restaurants/2.html'},
    ...
]
```

In [18]:
# empty list
restaurants = []

# table with restaurant id
rest_table = soup.find('table', {'id': 'restaurants'}) # class table where the id is restaurant

for rest in rest_table.find_all('a'): # since its a href link, it'll find all the tags 'a' in the rest table
    restaurant = {} # empty dictionary to store name and link
    restaurant['name'] = rest.text # key: name and find text in the link 
    restaurant['href'] = rest.attrs['href'] # key: href value: link with attribute 'href'
    restaurants.append(restaurant) # append dictionary to list
    
restaurants

[{'name': 'A&W Restaurants', 'href': 'restaurants/1.html'},
 {'name': "Applebee's", 'href': 'restaurants/2.html'},
 {'name': "Arby's", 'href': 'restaurants/3.html'},
 {'name': 'Atlanta Bread Company', 'href': 'restaurants/4.html'},
 {'name': "Bojangle's Famous Chicken 'n Biscuits",
  'href': 'restaurants/5.html'},
 {'name': 'Buffalo Wild Wings', 'href': 'restaurants/6.html'},
 {'name': 'Burger King', 'href': 'restaurants/7.html'},
 {'name': "Captain D's", 'href': 'restaurants/8.html'},
 {'name': "Carl's Jr.", 'href': 'restaurants/9.html'},
 {'name': "Charley's Grilled Subs", 'href': 'restaurants/10.html'},
 {'name': 'Chick-fil-A', 'href': 'restaurants/11.html'},
 {'name': "Chili's", 'href': 'restaurants/12.html'},
 {'name': 'Chipotle Mexican Grill', 'href': 'restaurants/13.html'},
 {'name': "Church's", 'href': 'restaurants/14.html'},
 {'name': 'Corner Bakery Cafe', 'href': 'restaurants/15.html'},
 {'name': 'Dairy Queen', 'href': 'restaurants/16.html'},
 {'name': "Denny's", 'href': 'res

### Step 3: Using the `href`, scrape each restaurant's page and create a single list of food dictionaries.

Your list of foods should look something like this:
```python
foods = [
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    ...
]
```

**Note**: Remove extra white space from each category

In [65]:
foods = [] # empty list of food

for restaurant in restaurants: # iterate through our newly made dictionaries
    end_link = restaurant['href'] # noticed href is the end link to our above url
    rest_url = url + end_link # concatinating the end link to original url
    
    # now making requests for each item
    rest_res = requests.get(rest_url)
    if rest_res.status_code != 200: # give me a warning only if there is something wrong with the link
        print(f"{rest_res.status_code} error for {restaurant['name']}")
        
    #soup again    
    rest_soup = BeautifulSoup(rest_res.content, 'lxml') 
    
    #getting the table
    for row in rest_soup.find('tbody').find_all('tr'): #iterate over each row and find tag 'tbody and within it find all tr tags'
        food = {} # empty dictionary
        food['restaurant'] = restaurant['name'] # pull restaurant name from above
        f_row = row.find_all('td') # make a nice list of the rows to slice easier
        
        # Going in the order it's displayed on the site
        food['name'] = f_row[0].text # name of food will be in the 1st place of f_row index 0
        food['category'] = f_row[1].text.strip() # appearantly there is whitespace to be removed
        food['calories'] = f_row[2].text
        food['fat'] = f_row[3].text
        food['carbs'] = f_row[4].text
        foods.append(food)

foods[:5]

[{'restaurant': 'A&W Restaurants',
  'name': 'Original Bacon Double Cheeseburger',
  'category': 'Burgers',
  'calories': '760',
  'fat': '45',
  'carbs': '45'},
 {'restaurant': 'A&W Restaurants',
  'name': 'Coney (Chili) Dog',
  'category': 'Entrees',
  'calories': '340',
  'fat': '20',
  'carbs': '26'},
 {'restaurant': 'A&W Restaurants',
  'name': 'Chili Fries',
  'category': 'French Fries',
  'calories': '370',
  'fat': '15',
  'carbs': '49'},
 {'restaurant': 'A&W Restaurants',
  'name': 'Strawberry Milkshake (small)',
  'category': 'Shakes',
  'calories': '670',
  'fat': '29',
  'carbs': '90'},
 {'restaurant': 'A&W Restaurants',
  'name': 'A&W® Root Beer Freeze (large)',
  'category': 'Shakes',
  'calories': '820',
  'fat': '18',
  'carbs': '150'}]

### Step 4: Create a pandas DataFrame from your list of foods

**Note**: Your DataFrame should have 5,131 rows

In [67]:
df = pd.DataFrame(foods)
df.shape

(5131, 6)

In [68]:
df.head(10)

Unnamed: 0,restaurant,name,category,calories,fat,carbs
0,A&W Restaurants,Original Bacon Double Cheeseburger,Burgers,760,45,45
1,A&W Restaurants,Coney (Chili) Dog,Entrees,340,20,26
2,A&W Restaurants,Chili Fries,French Fries,370,15,49
3,A&W Restaurants,Strawberry Milkshake (small),Shakes,670,29,90
4,A&W Restaurants,A&W® Root Beer Freeze (large),Shakes,820,18,150
5,A&W Restaurants,Caramel Sundae,Desserts,340,9,57
6,A&W Restaurants,Strawberry Banana Smoothee,Shakes,420,6,86
7,A&W Restaurants,Chocolate Fudge Blendrrr,Desserts,1010,59,152
8,A&W Restaurants,Strawberry Limeade,Drinks,420,0,105
9,A&W Restaurants,Watermelon Slushee,Drinks,270,0,73


### Step 5: Export to csv

**Note:** Don't export the index column from your DataFrame

In [69]:
df.to_csv('food.csv', index = False) # to not add additional index