### Imports

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd 
import urllib.parse 

### HTTP Request

#### store website in variable

In [2]:
website = 'https://www.yellowpages.com/search?search_terms=restaurant&geo_location_terms=New+York%2C+NY'

#### Get Request

In [3]:
response = requests.get(website)

#### Status Code

In [4]:
response.status_code

200

### Soup Object 

In [5]:
soup = BeautifulSoup(response.content, 'html.parser')

In [None]:
soup 

### Results

In [7]:
result_container = soup.find_all('div', {'class':'result'})

In [None]:
result_container

In [9]:
len(result_container)

30

### Concatenate 2 URL Parts to get absolute URL

#### URL Part 1

In [10]:
# we combine url part no.1 with url part no.2 in order to get the absolute url

url_part_1 = 'https://www.yellowpages.com/'

#### Create List for URL Part 2

In [11]:
url_part_2 = []

# loop through results
for item in result_container:
    # loop through links
    for link in item.find_all('a', {'class': 'business-name'}):
        url_part_2.append(link.get('href'))

In [12]:
url_part_2

['/new-york-ny/mip/mr-ks-407372?lid=1001117676060',
 '/new-york-ny/mip/sevens-turkish-grill-460080517?lid=1000439506725',
 '/new-york-ny/mip/todaro-brothers-5642814?lid=1001105931389',
 '/new-york-ny/mip/babbo-285698',
 '/new-york-ny/mip/sparks-steak-house-15362553',
 '/new-york-ny/mip/bianca-6255318',
 '/new-york-ny/mip/blind-pig-8526127',
 '/new-york-ny/mip/old-homestead-steakhouse-1275150',
 '/new-york-ny/mip/esca-3306782',
 '/new-york-ny/mip/dave-busters-8889637',
 '/new-york-ny/mip/paul-jimmys-restaurant-2388948',
 '/new-york-ny/mip/pig-whistle-pub-restaurant-3605566',
 '/new-york-ny/mip/festival-restaurant-4598334',
 '/new-york-ny/mip/bobby-vans-steakhouse-54th-street-11139387',
 '/new-york-ny/mip/waldys-wood-fired-pizza-penne-16097243',
 '/new-york-ny/mip/pita-grill-6265201',
 '/new-york-ny/mip/johnny-rock-10705734',
 '/new-york-ny/mip/smac-459052000',
 '/new-york-ny/mip/firefly-11739917',
 '/new-york-ny/mip/menchanko-tei-restaurant-2659878',
 '/new-york-ny/mip/el-presidente-res

#### Join Url 1 and Url 2

In [13]:
url_joined = []

for link_2 in url_part_2:
    url_joined.append(urllib.parse.urljoin(url_part_1, link_2))

In [14]:
url_joined

['https://www.yellowpages.com/new-york-ny/mip/mr-ks-407372?lid=1001117676060',
 'https://www.yellowpages.com/new-york-ny/mip/sevens-turkish-grill-460080517?lid=1000439506725',
 'https://www.yellowpages.com/new-york-ny/mip/todaro-brothers-5642814?lid=1001105931389',
 'https://www.yellowpages.com/new-york-ny/mip/babbo-285698',
 'https://www.yellowpages.com/new-york-ny/mip/sparks-steak-house-15362553',
 'https://www.yellowpages.com/new-york-ny/mip/bianca-6255318',
 'https://www.yellowpages.com/new-york-ny/mip/blind-pig-8526127',
 'https://www.yellowpages.com/new-york-ny/mip/old-homestead-steakhouse-1275150',
 'https://www.yellowpages.com/new-york-ny/mip/esca-3306782',
 'https://www.yellowpages.com/new-york-ny/mip/dave-busters-8889637',
 'https://www.yellowpages.com/new-york-ny/mip/paul-jimmys-restaurant-2388948',
 'https://www.yellowpages.com/new-york-ny/mip/pig-whistle-pub-restaurant-3605566',
 'https://www.yellowpages.com/new-york-ny/mip/festival-restaurant-4598334',
 'https://www.yello

### Get Data from First Link 

In [15]:
# Name
# Address
# Phone
# Email
# Website
# General Info

#### Store first link in variable


In [16]:
first_link = url_joined[0]

#### Get Request & Soup Object

In [17]:
response = requests.get(first_link)

In [18]:
soup = BeautifulSoup(response.content, 'html.parser')

In [None]:
soup 

#### Name

In [20]:
soup.find('h1').get_text()

"Mr. K's"

#### Address

In [21]:
soup.find('h2', {'class': 'address'}).get_text()

AttributeError: 'NoneType' object has no attribute 'get_text'

#### Phone

In [None]:
soup.find('p', {'class':'phone'}).get_text()

'(212) 583-1668'

#### Email

In [None]:
# most important part

In [None]:
soup.find('a', {'class': 'email-business'}).get('href').split('mailto:')[1]

'info@movingmaninc.com'

#### Website

In [None]:
soup.find('a', {'class': 'website-link'}).get('href')

'http://www.mrksny.com'

#### General Info

In [None]:
soup.find('dd', {'class': 'general-info'}).get_text()

### Put all together and loop through all pages

In [None]:
# empty list - we need it so we can append all results - hand it over to pandas dataframe
results = []

# loop through all joined links
for link in url_joined:
    response = requests.get(link)
    
    # create soup object
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # name
    try:
        name = soup.find('h1').get_text()
    except:
        name = 'n/a'
        
    # address 
    try:
        address = soup.find('h2', {'class': 'address'}).get_text()
    except:
        address = 'n/a'
        
    # phone
    try:
        phone = soup.find('p', {'class':'phone'}).get_text()
    except:
        phone = 'n/a'
        
    # email
    try:
        email = soup.find('a', {'class': 'email-business'}).get('href').split('mailto:')[1]
    except:
        email = 'n/a'
        
    # website
    try:
        website = soup.find('a', {'class': 'website-link'}).get('href')
    except:
        website = 'n/a'
        
    # general info
    try:
        info = soup.find('dd', {'class': 'general-info'}).get_text()
    except:
        info = 'n/a'
        
    
    # create dictionary with results
    output = {'Restaurant Name': name, 'Address': address, 'Phone': phone, 'Email': email, 'Homepage':website,
             'Info':info}
    
    # append results in empty list
    results.append(output) 

### Create Pandas Dataframe

In [None]:
df = pd.DataFrame(results)

In [None]:
df 

Unnamed: 0,Restaurant Name,Address,Phone,Email,Homepage,Info
0,Mr. K's,"570 Lexington AveNew York, NY 10022",(212) 583-1668,info@movingmaninc.com,http://www.mrksny.com,We offer exclusive seating to you and your par...
1,Potjanee Thai Restaurant of New York,"48 Carmine StNew York, NY 10014",(212) 558-9071,mthitna2011@gmail.com,http://potjanee.net,If you're looking for authentic Thai food in N...
2,Spoonfed New York Country,"331 W 51st StNew York, NY 10019",(718) 789-3048,randy@spoonfednyc.com,http://www.spoonfed.nyc/contact-us.html,
3,Da Noi Midtown Manhattan,"214 E 49th StNew York, NY 10017",(347) 955-0067,info@danoinyc.com,http://danoinyc.com/midtown-manhattan.html,"High quality dining, elegant private parties a..."
4,Seven's Turkish Grill,"158 W 72nd StNew York, NY 10023",(212) 724-4700,,,Here at Seven's Mediterranean Turkish Grill we...
5,Todaro Brothers,"555 2nd AveNew York, NY 10016",(212) 532-0633,eat@todarobros.com,http://www.todarobros.com,NYC's Neighborhood Market since 1917. Todaro ...
6,Punch,"913 BroadwayNew York, NY 10010",(212) 673-6333,,http://www.punchrestaurant.com,Punch Restaurant and Bar is located in the Fla...
7,Alpha Fusion,"365 W 34th StNew York, NY 10001",(212) 279-8887,evazhng@yahoo.com,http://www.alpha34.com,
8,Babbo,"110 Waverly Pl Frnt ANew York, NY 10011",(212) 777-0303,hospitality@babbonyc.com,https://www.babbonyc.com,Babbo Ristorante e Enoteca is an exuberant cel...
9,Sparks Steak House,"210 E 46th StNew York, NY 10017",(212) 687-4855,office@sparkssteakhouse.com,http://www.sparkssteakhouse.com,"Established in 1966, Sparks Steak House featur..."


### Store in Excel

In [None]:
df.to_excel('result_single_page.xlsx', index=False)