In [1]:
from bs4 import BeautifulSoup
import requests
import numpy as np
import pandas as pd

In [2]:
url = "https://awelagos.com.ng/best-cafes-to-work-from-in-lagos/"

In [3]:
url_list = [
    "https://www.finelib.com/cities/lagos/business/food/restaurants/page-1",
    "https://www.finelib.com/cities/lagos/business/food/restaurants/page-2",
]

In [4]:
# function to get retaurant


def get_restaurant_data(*, URL: list) -> pd.DataFrame:
    """
    this function scrapes restaurant information from website and return a cleaned dataframe
    with each restaurant info

    => param:
        URL = list of url for the page to scrape
    """

    # create dictionary with an emty list for the elements
    dict_ = {"place_type": [], "place_name": [], "place_address": [], "place_phone_no": []}

    # use for loop to scrape for each url
    for i in URL:
        # make a request from the url
        html_doc = requests.get(i).text

        # initalize the beautiful saoup library
        soup = BeautifulSoup(html_doc, "lxml")

        # getting the table column
        left_column = soup.find(name="div", class_="left-column")

        for idx, data in enumerate(left_column.find_all(name="div", class_="box-682")):
            
#             #update the placr type
#             place = "restaurant"
#             dict_["place_type"].append(place)
            
            # getting the infomations
            if idx > 0:
                
                #update the placr type
                place = "restaurant"
                dict_["place_type"].append(place)
                
                # getting the name of the restaurant and stor it in the dictionary
                name_ = data.find(name="div", class_="box-headings")
                name = name_.a.text
                dict_["place_name"].append(name)

                # getting the address and store it in the dictionary
                listing_info = data.find(name="div", class_="listing-info-img")
                address = listing_info.find(name="div", class_="cmpny-lstng-1").text
                dict_["place_address"].append(address)

                # getting the restaurant phone number and store it in the dictionary
                phone_no_ = listing_info.find(name="div", class_="tel-no-div")
                phone_no = phone_no_.find(name="div", class_="cmpny-lstng-1").text
                dict_["place_phone_no"].append(phone_no)

    # converting the dictionary to dataframe
    df = pd.DataFrame(dict_)

    return df

In [5]:
# applying the function
df = get_restaurant_data(URL=url_list)

In [6]:
df

Unnamed: 0,place_type,place_name,place_address,place_phone_no
0,restaurant,Bernadines Cloud Kitchen,"35, Sholanke Akoka, Yaba, Lagos",0903 651 7676
1,restaurant,Abibiz Restaurant,"Murtala Mohammed International Airport, Ikeja,...","0803 302 4280, 01 773 1431"
2,restaurant,Afi's Restaurant,"5 Olufemi Street, Surulere, Lagos, Nigeria",01 470 3044
3,restaurant,Aldente,"S11 Lagos City Mall, Onikan, Lagos, Nigeria","01 444 3944, 01 791 2942"
4,restaurant,All Seasons Restaurants,"Plot 867A, Bishop Aboyade Cole Street, Victori...",01 262 3135
...,...,...,...,...
64,restaurant,Swallowit by Premium Posh,"Plot 8&9, Block CXLiiiA,, Agungi Lekki Phase 2...","0810 744 6322, 0815 905 7235"
65,restaurant,Sweetspot Eatery And African Dishes,"20, Oladoyinbo Street, Ikeja, Lagos, Nigeria","0703 384 2618, 0807 595 3779"
66,restaurant,"The Kitchen, Lekki","21, Oladimeji Alo Street, Off Freedom Way, Lek...",0906 999 9996
67,restaurant,Topever Restaurant,"39, Market Street, Shomolu, Shomolu, Lagos, Ni...","0806 209 8455, 0803 488 6706"


In [7]:
df.shape

(69, 4)

In [8]:
# function to get cafes


def get_cafes_data(*, URL: str) -> pd.DataFrame:
    """
    this function scrapes restaurant information from website and return a cleaned dataframe
    with each restaurant info

    => param:
        URL = is the url for the page to scrape
    """
    
    # create dictionary with an emty list for the elements
    dict_ = {"place_type": [], "place_name": [], "place_address": []}
    
    html_doc = requests.get(URL).text
    soup = BeautifulSoup(html_doc, "lxml")
    
    content = soup.find(name="div", class_="zeen__list--2")
    list_entry = content.find_all(name="div", class_= "zeen__list__entry")
    
    for idx, val in enumerate(list_entry):
        
        if idx < 21:

            # update the tpe
            place = "cafes"
            dict_["place_type"].append(place)
            
            # get the place_name:
            name = val.h3.text
            dict_["place_name"].append(name)
            
            
            # get the address
            x = val.find_all("p")
            for idx1, val1 in enumerate(x[1]):
                if idx1 == 2:
                    address = val1.text
                    dict_["place_address"].append(address)
                    
    
    # create a df
    df = pd.DataFrame(dict_)
    
    return df      

In [9]:
df2 = get_cafes_data(URL=url)

In [10]:
df2

Unnamed: 0,place_type,place_name,place_address
0,cafes,Eric Kayer,
1,cafes,Pause | Café & Espresso Bar,"1 Bisway St, Lekki, Lagos, Nigeria"
2,cafes,Vestar Coffee,"26 Oju Olobun Cl, Victoria Island, Lagos, Nigeria"
3,cafes,Moonshine Cafe,"7A Admiralty Rd, Lekki Phase I, Lagos, Nigeria"
4,cafes,MyYa’s Cafe,"Plot 1, Block, 4 Admiralty Rd, Lekki Phase I, ..."
5,cafes,Godaïf Village,"Casa Asmarina, 26B Turnbull Rd, Ikoyi, Lagos, ..."
6,cafes,Art Cafe,"282 Akin Olugbade St, Victoria Island, Lagos,..."
7,cafes,MY COFFEE,
8,cafes,SEE Lagos,"36b Wole Ariyo St, Lekki Phase I, Lagos, Nigeria"
9,cafes,Ziya Delicacy Boutique,"26B Oju Olobun Cl, Lagos, Nigeria"


In [19]:
combined = pd.concat([df,df2], ignore_index=True).reset_index().rename(columns={"index": "place_id"}) 
combined

Unnamed: 0,place_id,place_type,place_name,place_address,place_phone_no
0,0,restaurant,Bernadines Cloud Kitchen,"35, Sholanke Akoka, Yaba, Lagos",0903 651 7676
1,1,restaurant,Abibiz Restaurant,"Murtala Mohammed International Airport, Ikeja,...","0803 302 4280, 01 773 1431"
2,2,restaurant,Afi's Restaurant,"5 Olufemi Street, Surulere, Lagos, Nigeria",01 470 3044
3,3,restaurant,Aldente,"S11 Lagos City Mall, Onikan, Lagos, Nigeria","01 444 3944, 01 791 2942"
4,4,restaurant,All Seasons Restaurants,"Plot 867A, Bishop Aboyade Cole Street, Victori...",01 262 3135
...,...,...,...,...,...
85,85,cafes,Cafe One Yaba,"E-Center, 1-11 Commercial Ave, Sabo, Yaba, Lag...",
86,86,cafes,Hot Crust Cafe,"Alpha Mall, 117 Ogudu Rd, Ogudu, Lagos, Nigeria",
87,87,cafes,Ouida Cafe,"3c Oba Dosumu St, Ikeja GRA, Lagos, Nigeria",
88,88,cafes,Krispy Kreme,"389C Herbert Macaulay Way, Yaba, Lagos, Nigeria",


In [12]:
combined["place_type"].value_counts()

place_type
restaurant    69
cafes         21
Name: count, dtype: int64

In [13]:
combined.shape

(90, 4)