In [1]:
# Built-in library
import itertools
import re
from typing import Any, Optional, Sequence, Union
import warnings

# Standard imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

warnings.filterwarnings("ignore")


# pandas settings
pd.options.display.max_rows = 1_000
pd.options.display.max_columns = 1_000
pd.options.display.max_colwidth = 2_000

# Black code formatter (Optional)
%load_ext nb_black
# auto reload imports
%load_ext autoreload
%autoreload 2

<IPython.core.display.Javascript object>

In [2]:
from bs4 import BeautifulSoup
import requests

<IPython.core.display.Javascript object>

In [4]:
# lets get the url
url_list = [
    "https://www.finelib.com/cities/lagos/business/food/restaurants/page-1",
    "https://www.finelib.com/cities/lagos/business/food/restaurants/page-2",
]

<IPython.core.display.Javascript object>

In [5]:
# simplify the process by writng a function


def get_restaurant_data(*, URL: list) -> pd.DataFrame:
    """
    this function scrapes restaurant information from website and return a cleaned dataframe
    with each restaurant info

    => param:
        URL = list of url for the page to scrape
    """

    # create dictionary with an emty list for the elements
    dict_ = {"restaurant_name": [], "restaurant_address": [], "restaurant_phone_no": []}

    # use for loop to scrape for each url
    for i in URL:
        # make a request from the url
        html_doc = requests.get(i).text

        # initalize the beautiful saoup library
        soup = BeautifulSoup(html_doc, "lxml")

        # getting the table column
        left_column = soup.find(name="div", class_="left-column")

        for idx, data in enumerate(left_column.find_all(name="div", class_="box-682")):
            # getting the infomations
            if idx > 0:
                # getting the name of the restaurant and stor it in the dictionary
                name_ = data.find(name="div", class_="box-headings")
                name = name_.a.text
                dict_["restaurant_name"].append(name)

                # getting the address and store it in the dictionary
                listing_info = data.find(name="div", class_="listing-info-img")
                address = listing_info.find(name="div", class_="cmpny-lstng-1").text
                dict_["restaurant_address"].append(address)

                # getting the restaurant phone number and store it in the dictionary
                phone_no_ = listing_info.find(name="div", class_="tel-no-div")
                phone_no = phone_no_.find(name="div", class_="cmpny-lstng-1").text
                dict_["restaurant_phone_no"].append(phone_no)

    # converting the dictionary to dataframe
    df = pd.DataFrame(dict_)

    return df

<IPython.core.display.Javascript object>

In [6]:
# applying the function
df = get_restaurant_data(URL=url_list)

<IPython.core.display.Javascript object>

In [7]:
df

Unnamed: 0,restaurant_name,restaurant_address,restaurant_phone_no
0,Bernadines Cloud Kitchen,"35, Sholanke Akoka, Yaba, Lagos",0903 651 7676
1,Abibiz Restaurant,"Murtala Mohammed International Airport, Ikeja, Lagos, Nigeria","0803 302 4280, 01 773 1431"
2,Afi's Restaurant,"5 Olufemi Street, Surulere, Lagos, Nigeria",01 470 3044
3,Aldente,"S11 Lagos City Mall, Onikan, Lagos, Nigeria","01 444 3944, 01 791 2942"
4,All Seasons Restaurants,"Plot 867A, Bishop Aboyade Cole Street, Victoria Island, Lagos, Nigeria",01 262 3135
5,Anis Restaurant,"11 Simpson Street, Lagos Island, Lagos, Nigeria",01 263 7556
6,Appetizers Restaurant,"66 Ijaiye Road, Suite 33, Assembly Spot Complex, Ikeja, Lagos, Nigeria",0803 727 7483
7,Atlantic Bar & Restaurant,"14B Adeola Hopewell Street, Victoria Island, Lagos, Nigeria","0802 327 1564, 01 261 0584"
8,Ayus Restaurant And Bar,"2 Ibiyinka Olorunimbe Close, Victoria Island, Lagos, Nigeria","0703 381 4662, 0803 316 2543"
9,Bangkok Restaurant,"Plot 244A, Muri Okunntola Street, Victoria Island, Lagos, Nigeria","0803 307 7666, 0706 217 3888"


<IPython.core.display.Javascript object>

In [8]:
# let save the data to a csv file
df.to_csv("restaurant.csv", index_label="restaurant_id")

<IPython.core.display.Javascript object>

In [9]:
df.shape

(69, 3)

<IPython.core.display.Javascript object>