In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def web_scraping_jumia_microwaves(url, baseurl):

    descriptions = []
    prices = []
    older_prices = []
    reviews_list = []


    r = requests.get(url)
    print(f"Fetching data from {url} Status {r}")

    if r.status_code == 200:
        soup = BeautifulSoup(r.text, "html.parser")

        # Extract product prices
        price_elements = soup.find_all("div", {"class": "prc"})
        for price_element in price_elements:
            prices.append(price_element.text.strip())

        # Extract old prices
        old_price_elements = soup.find_all("div", class_="old")
        for old_price_element in old_price_elements:
            older_prices.append(old_price_element.text.strip())

        # Extract product descriptions
        desc_elements = soup.find_all("h3", class_="name")
        for desc_element in desc_elements:
            descriptions.append(desc_element.text.strip())

        # Extract reviews
        reviews = soup.find_all("div", class_="stars _s")
        for rev in reviews:
            reviews_list.append(rev.text.strip())
    else:
        print(f"Failed to fetch data from {url}")
    
    for i in range(2, 51):
        page_url = f"{baseurl}{i}#catalog-listing"
        r = requests.get(page_url)
        print(f"Fetching data from: {page_url} - Status: {r.status_code}")
    
        if r.status_code == 200:
            soup = BeautifulSoup(r.text, "html.parser")

        # Extract product prices
        price_elements = soup.find_all("div", {"class": "prc"})
        for price_element in price_elements:
            prices.append(price_element.text.strip())

        # Extract old prices
        old_price_elements = soup.find_all("div", class_="old")
        for old_price_element in old_price_elements:
            older_prices.append(old_price_element.text.strip())

        # Extract product descriptions
        desc_elements = soup.find_all("h3", class_="name")
        for desc_element in desc_elements:
            descriptions.append(desc_element.text.strip())

        # Extract reviews
        reviews = soup.find_all("div", class_="stars _s")
        for rev in reviews:
            reviews_list.append(rev.text.strip())
    else:
        print(f"Failed to fetch data from {page_url}")

    rows = list(zip(descriptions, prices, older_prices, reviews_list))
    return rows

In [2]:
def collect_data_microwaves():
    data = web_scraping_jumia_microwaves(url, baseurl)

    df = pd.DataFrame(data, columns = ['descriptions', 'price', 'old_price', 'ratings'])

    df.to_csv("jumia_microwaves.csv", index = False)

In [3]:
url = "https://www.jumia.co.ke/small-appliances-microwave/"
# baseurl = "https://www.jumia.co.ke/catalog/?q=microwaves&amp;page="
baseurl = "https://www.jumia.co.ke/catalog/?q=microwaves&page="

In [4]:
web_scraping_jumia_microwaves(url, baseurl)

Fetching data from https://www.jumia.co.ke/small-appliances-microwave/ Status <Response [200]>
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=2#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=3#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=4#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=5#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=6#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=7#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=8#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=9#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=10#catalog

[('Ramtons RM/458 - Digital Glass Microwave, 700W - 20L -Black & Silver',
  'KSh 21,000',
  'KSh 11,800',
  '4.6 out of 5'),
 ('Nunix Digital Microwave Oven 20L WITH GRILL',
  'KSh 10,395',
  'KSh 11,080',
  '4.3 out of 5'),
 ('Ramtons RM/684-25 LITERS MICROWAVE+GRILL (1YR WRTY)',
  'KSh 20,000',
  'KSh 12,000',
  '4.5 out of 5'),
 ('Nunix  3 IN 1 BREAKFAST MAKER, TOASTER,OVEN, COFFEE MAKER',
  'KSh 8,288',
  'KSh 9,999',
  '2 out of 5'),
 ('Nunix 20L Electric Oven With Grill (3 Knobs)',
  'KSh 8,499',
  'KSh 19,999',
  '4.2 out of 5'),
 ('Mika MMWMSKH2012B - Microwave, 20L, Manual - Black',
  'KSh 12,000',
  'KSh 20,000',
  '4.8 out of 5'),
 ('Nunix Digital Microwave with Grill, 20L',
  'KSh 7,799',
  'KSh 13,999',
  '3.5 out of 5'),
 ('Mika MMWMSKH2011W - Microwave, 20L, Manual - White',
  'KSh 14,650',
  'KSh 956',
  '3.5 out of 5'),
 ('Hisense 20L Digital Microwave oven',
  'KSh 10,900',
  'KSh 18,999',
  '4.3 out of 5'),
 ('Hisense  20L Digital Microwave oven',
  'KSh 18,130',
  '

In [5]:
collect_data_microwaves()

Fetching data from https://www.jumia.co.ke/small-appliances-microwave/ Status <Response [200]>
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=2#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=3#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=4#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=5#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=6#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=7#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=8#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=9#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=10#catalog

In [6]:
data = pd.read_csv("jumia_microwaves.csv")
data.head(14)

Unnamed: 0,descriptions,price,old_price,ratings
0,"Ramtons RM/458 - Digital Glass Microwave, 700W...","KSh 21,000","KSh 11,800",4.6 out of 5
1,Nunix Digital Microwave Oven 20L WITH GRILL,"KSh 10,395","KSh 11,080",4.3 out of 5
2,Ramtons RM/684-25 LITERS MICROWAVE+GRILL (1YR ...,"KSh 20,000","KSh 12,000",4.5 out of 5
3,"Nunix 3 IN 1 BREAKFAST MAKER, TOASTER,OVEN, C...","KSh 8,288","KSh 9,999",2 out of 5
4,Nunix 20L Electric Oven With Grill (3 Knobs),"KSh 8,499","KSh 19,999",4.2 out of 5
5,"Mika MMWMSKH2012B - Microwave, 20L, Manual - B...","KSh 12,000","KSh 20,000",4.8 out of 5
6,"Nunix Digital Microwave with Grill, 20L","KSh 7,799","KSh 13,999",3.5 out of 5
7,Hisense 20L Digital Microwave oven,"KSh 14,650","KSh 16,000",3.5 out of 5
8,"Mika MMWMSKH2011W - Microwave, 20L, Manual - W...","KSh 10,900","KSh 18,999",4.3 out of 5
9,Hisense 20L Digital Microwave oven,"KSh 18,130","KSh 20,000",1 out of 5


In [23]:
def microwaves_cleaning(csv_path):    
    data = pd.read_csv(csv_path)
    data["Price"] = data["price"].str.replace("KSh", "").str.replace(",","")
    data["Reviews"] = data["ratings"].str.replace(" out of 5", "")
    data["Old_price"] = data["old_price"].str.replace("KSh ", "").str.replace(",", "")
    data = data.drop(columns = ["price", "ratings", "old_price"])
    pattern = r"^[a-zA-Z]+"
    data["brand"] = data["descriptions"].str.extract(f"({pattern})")
    pattern_cap = r'(\d+)\s*(?=litres|l|L)'
    result = data["descriptions"].str.extract(f"({pattern_cap})")
    data["capacity"] = result[0]
    columns = ["descriptions", "brand", "Price", "Old_price", "capacity", "Reviews"]
    data = data[columns]
    data.to_csv("cleaned_microwaves_jumia", index = False)
    data 
    return data

In [24]:
microwaves_cleaning("jumia_microwaves.csv")

Unnamed: 0,descriptions,brand,Price,Old_price,capacity,Reviews
0,"Ramtons RM/458 - Digital Glass Microwave, 700W...",Ramtons,21000,11800,20,4.6
1,Nunix Digital Microwave Oven 20L WITH GRILL,Nunix,10395,11080,20,4.3
2,Ramtons RM/684-25 LITERS MICROWAVE+GRILL (1YR ...,Ramtons,20000,12000,25,4.5
3,"Nunix 3 IN 1 BREAKFAST MAKER, TOASTER,OVEN, C...",Nunix,8288,9999,,2
4,Nunix 20L Electric Oven With Grill (3 Knobs),Nunix,8499,19999,20,4.2
...,...,...,...,...,...,...
126,VON 20L DIGITAL MICROWAVE,VON,12499,2140,20,5
127,Nunix Digital Microwave Oven 20L. C20PG1,Nunix,6299,11999,20,2.7
128,Nunix Digital microwave 20 liters,Nunix,9974,18000,20,1
129,"Rotating Cake Turntable Cake Stand, Revolving ...",Rotating,15995,887,,5
