In [11]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def web_scraping_jumia_microwaves(url, baseurl):

    descriptions = []
    prices = []
    older_prices = []
    reviews_list = []
    urls = []

    r = requests.get(url)
    print(f"Fetching data from {url} Status {r}")

    if r.status_code == 200:
        soup = BeautifulSoup(r.text, "html.parser")

        # Extract product prices
        price_elements = soup.find_all("div", {"class": "prc"})
        for price_element in price_elements:
            prices.append(price_element.text.strip())

        # Extract old prices
        old_price_elements = soup.find_all("div", class_="old")
        for old_price_element in old_price_elements:
            older_prices.append(old_price_element.text.strip())

        # Extract product descriptions
        desc_elements = soup.find_all("h3", class_="name")
        for desc_element in desc_elements:
            descriptions.append(desc_element.text.strip())

        # Extract reviews
        reviews = soup.find_all("div", class_="stars _s")
        for rev in reviews:
            reviews_list.append(rev.text.strip())

        products = soup.find_all('a', class_='core')

        for product in products:
                # Extract product link
                link = product['href'] if 'href' in product.attrs else None
                # Complete the URL if the link is relative
                link = f"https://www.jumia.co.ke{link}" if link and link.startswith('/') else link
                urls.append(link)
    else:
        print(f"Failed to fetch data from {url}")
    
    for i in range(2, 51):
        page_url = f"{baseurl}{i}#catalog-listing"
        r = requests.get(page_url)
        print(f"Fetching data from: {page_url} - Status: {r.status_code}")
    
        if r.status_code == 200:
            soup = BeautifulSoup(r.text, "html.parser")

        # Extract product prices
        price_elements = soup.find_all("div", {"class": "prc"})
        for price_element in price_elements:
            prices.append(price_element.text.strip())

        # Extract old prices
        old_price_elements = soup.find_all("div", class_="old")
        for old_price_element in old_price_elements:
            older_prices.append(old_price_element.text.strip())

        # Extract product descriptions
        desc_elements = soup.find_all("h3", class_="name")
        for desc_element in desc_elements:
            descriptions.append(desc_element.text.strip())

        # Extract reviews
        reviews = soup.find_all("div", class_="stars _s")
        for rev in reviews:
            reviews_list.append(rev.text.strip())
        

        
        products = soup.find_all('a', class_='core')

        for product in products:
                # Extract product link
                link = product['href'] if 'href' in product.attrs else None
                # Complete the URL if the link is relative
                link = f"https://www.jumia.co.ke{link}" if link and link.startswith('/') else link
                urls.append(link)
                
    else:
        print(f"Failed to fetch data from {page_url}")

    rows = list(zip(descriptions, prices, older_prices, reviews_list, urls))
    return rows

In [16]:
def collect_data_microwaves():
    data = web_scraping_jumia_microwaves(url, baseurl)

    df = pd.DataFrame(data, columns = ['descriptions', 'price', 'old_price', 'ratings', 'urls'])

    df.to_csv(r"..\data\scraped_data\jumia_scraped_microwaves.csv", index = False)

In [17]:
url = "https://www.jumia.co.ke/small-appliances-microwave/"
# baseurl = "https://www.jumia.co.ke/catalog/?q=microwaves&amp;page="
baseurl = "https://www.jumia.co.ke/catalog/?q=microwaves&page="

In [18]:
web_scraping_jumia_microwaves(url, baseurl)

Fetching data from https://www.jumia.co.ke/small-appliances-microwave/ Status <Response [200]>
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=2#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=3#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=4#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=5#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=6#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=7#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=8#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=9#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=10#catalog

[('Ramtons RM/458 - Digital Glass Microwave, 700W - 20L -Black & Silver',
  'KSh 19,999',
  'KSh 11,800',
  '4.6 out of 5',
  'https://www.jumia.co.ke/hisense-30l-microwave-2yrs-wrty-129537441.html'),
 ('Nunix Digital Microwave Oven 20L WITH GRILL',
  'KSh 9,999',
  'KSh 11,080',
  '4.3 out of 5',
  'https://www.jumia.co.ke/haier-hmw20dbm-digital-microwave-oven-700w-20l-black-95922981.html'),
 ('Nunix 20L Electric Oven With Grill (3 Knobs)',
  'KSh 15,200',
  'KSh 9,999',
  '4.2 out of 5',
  'https://www.jumia.co.ke/hisense-h25moms7hg-25l-grill-microwave-2yrs-wrty.-73548924.html'),
 ('Nunix  3 IN 1 BREAKFAST MAKER, TOASTER,OVEN, COFFEE MAKER',
  'KSh 7,799',
  'KSh 12,000',
  '2 out of 5',
  'https://www.jumia.co.ke/smart-pro-20l-digital-microwave-oven-smv-20l-v42-black-1yr-wrty-280221109.html'),
 ('Ramtons RM/684-25 LITERS MICROWAVE+GRILL (1YR WRTY)',
  'KSh 8,199',
  'KSh 19,999',
  '4.5 out of 5',
  'https://www.jumia.co.ke/haier-hmw20mb-analog-microwave-oven-700w-20l-black.-9592298

In [19]:
collect_data_microwaves()

Fetching data from https://www.jumia.co.ke/small-appliances-microwave/ Status <Response [200]>
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=2#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=3#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=4#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=5#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=6#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=7#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=8#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=9#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke/catalog/?q=microwaves&page=10#catalog

In [20]:
data = pd.read_csv(r"..\data\scraped_data\jumia_scraped_microwaves.csv")
data.head(14)

Unnamed: 0,descriptions,price,old_price,ratings,urls
0,"Ramtons RM/458 - Digital Glass Microwave, 700W...","KSh 19,999","KSh 11,800",4.6 out of 5,https://www.jumia.co.ke/hisense-30l-microwave-...
1,Nunix Digital Microwave Oven 20L WITH GRILL,"KSh 9,999","KSh 11,080",4.3 out of 5,https://www.jumia.co.ke/haier-hmw20dbm-digital...
2,Nunix 20L Electric Oven With Grill (3 Knobs),"KSh 15,200","KSh 9,999",4.2 out of 5,https://www.jumia.co.ke/hisense-h25moms7hg-25l...
3,"Nunix 3 IN 1 BREAKFAST MAKER, TOASTER,OVEN, C...","KSh 7,799","KSh 12,000",2 out of 5,https://www.jumia.co.ke/smart-pro-20l-digital-...
4,Ramtons RM/684-25 LITERS MICROWAVE+GRILL (1YR ...,"KSh 8,199","KSh 19,999",4.5 out of 5,https://www.jumia.co.ke/haier-hmw20mb-analog-m...
5,"Mika MMWMSKH2012B - Microwave, 20L, Manual - B...","KSh 8,999","KSh 15,999",4.8 out of 5,https://www.jumia.co.ke/hisense-h20moms11-20-l...
6,"Nunix Digital Microwave with Grill, 20L","KSh 7,428","KSh 20,000",3.5 out of 5,https://www.jumia.co.ke/roch-rmw-20px7p-b-b-mi...
7,Hisense 20L Digital Microwave oven,"KSh 14,650","KSh 13,999",3.5 out of 5,https://www.jumia.co.ke/ramtons-rm589-microwav...
8,"Mika MMWMSKH2011W - Microwave, 20L, Manual - W...","KSh 10,599","KSh 16,000",4.3 out of 5,https://www.jumia.co.ke/ramtons-rm458-digital-...
9,Hisense 20L Digital Microwave oven,"KSh 18,130","KSh 18,999",1 out of 5,https://www.jumia.co.ke/ramtons-rm326-25l-micr...


In [21]:
def jumia_microwaves_cleaning(csv_path):
    data = pd.read_csv(csv_path)
    data.head()
    data["Price"] = data["price"].str.replace("KSh", "").str.replace(",","")
    data["Reviews"] = data["ratings"].str.replace(" out of 5", "")
    data["Old_price"] = data["old_price"].str.replace("KSh ", "").str.replace(",", "")
    data["Price"] = data["Price"].astype(int)
    data["Reviews"] = data["Reviews"].astype(float)
    data["Old_price"] = data["Old_price"].astype(int)
    pattern = r"^[a-zA-Z]+"
    data["brand"] = data["descriptions"].str.extract(f"({pattern})")
    pattern_cap = r'(\d+)\s*(?=litres|l|L)'
    result = data["descriptions"].str.extract(f"({pattern_cap})")
    data["capacity"] = result[0]
    data["capacity"] = data["capacity"].str.strip().astype(float)
    id = [i for i in range(1, len(data) + 1)]
    data["id"] = id
    data["id"] - data["id"].astype(int)
    data["source"] = ["Jumia"] * len(data)
    data = data.drop(columns = ["price", "ratings", "old_price"])
    columns = ["id", "descriptions", "brand", "Price", "Old_price", "capacity", "Reviews", "source", "urls"]
    data = data[columns]
    data.rename(columns = {"descriptions" : "description", "Price" : "price", "Old_price" : "old_price", "Reviews" :  "reviews"})
    data = data[columns]
    data.to_csv(r"..\data\clean_data\jumia_clean_microwaves.csv", index=False)

    return data 

In [22]:
jumia_microwaves_cleaning(r"..\data\scraped_data\jumia_scraped_microwaves.csv")

Unnamed: 0,id,descriptions,brand,Price,Old_price,capacity,Reviews,source,urls
0,1,"Ramtons RM/458 - Digital Glass Microwave, 700W...",Ramtons,19999,11800,20.0,4.6,Jumia,https://www.jumia.co.ke/hisense-30l-microwave-...
1,2,Nunix Digital Microwave Oven 20L WITH GRILL,Nunix,9999,11080,20.0,4.3,Jumia,https://www.jumia.co.ke/haier-hmw20dbm-digital...
2,3,Nunix 20L Electric Oven With Grill (3 Knobs),Nunix,15200,9999,20.0,4.2,Jumia,https://www.jumia.co.ke/hisense-h25moms7hg-25l...
3,4,"Nunix 3 IN 1 BREAKFAST MAKER, TOASTER,OVEN, C...",Nunix,7799,12000,,2.0,Jumia,https://www.jumia.co.ke/smart-pro-20l-digital-...
4,5,Ramtons RM/684-25 LITERS MICROWAVE+GRILL (1YR ...,Ramtons,8199,19999,25.0,4.5,Jumia,https://www.jumia.co.ke/haier-hmw20mb-analog-m...
...,...,...,...,...,...,...,...,...,...
191,192,Nunix 20l Microwave with Grill,Nunix,900,12000,20.0,5.0,Jumia,https://www.jumia.co.ke/generic-microwave-dust...
192,193,"Roch Manual Microwave Oven 700W, 20L – RMW-20L...",Roch,7999,16999,20.0,5.0,Jumia,https://www.jumia.co.ke/synix-5.5l-air-fryer-2...
193,194,Synix Syinix 20l microwave oven,Synix,1250,3000,20.0,5.0,Jumia,https://www.jumia.co.ke/generic-1pc-non-slip-k...
194,195,Nunix 20L Electric Microwave Oven,Nunix,1080,12500,20.0,4.0,Jumia,https://www.jumia.co.ke/generic-cake-turntable...
