In [9]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import csv

def web_scraping(base_url, first_page_url):
    descriptions = []
    prices = []
    older_prices = []
    reviews_list = []

    # Scrape the first page
    r = requests.get(first_page_url)
    print(f"Fetching data from: {first_page_url} - Status: {r.status_code}")

    if r.status_code == 200:
        soup = BeautifulSoup(r.text, 'html.parser')

        # Extract product prices
        price_elements = soup.find_all("div", {"class": "prc"})
        for price_element in price_elements:
            prices.append(price_element.text.strip())

        # Extract old prices
        old_price_elements = soup.find_all("div", class_="old")
        for old_price_element in old_price_elements:
            older_prices.append(old_price_element.text.strip())

        # Extract product descriptions
        desc_elements = soup.find_all("h3", class_="name")
        for desc_element in desc_elements:
            descriptions.append(desc_element.text.strip())

        # Extract reviews
        reviews = soup.find_all("div", class_="stars _s")
        for rev in reviews:
            reviews_list.append(rev.text.strip())
    else:
        print(f"Failed to fetch data from {first_page_url}")

    # Scrape pages 2 to 50
    for i in range(2, 51):
        page_url = f"{base_url}{i}#catalog-listing"
        r = requests.get(page_url)
        print(f"Fetching data from: {page_url} - Status: {r.status_code}")

        if r.status_code != 200:
            print(f"Failed to fetch data from {page_url}")
            continue

        soup = BeautifulSoup(r.text, 'html.parser')

        # Extract product prices
        price_elements = soup.find_all("div", {"class": "prc"})
        for price_element in price_elements:
            prices.append(price_element.text.strip())

        # Extract old prices
        old_price_elements = soup.find_all("div", class_="old")
        for old_price_element in old_price_elements:
            older_prices.append(old_price_element.text.strip())

        # Extract product descriptions
        desc_elements = soup.find_all("h3", class_="name")
        for desc_element in desc_elements:
            descriptions.append(desc_element.text.strip())

        # Extract reviews
        reviews = soup.find_all("div", class_="stars _s")
        for rev in reviews:
            reviews_list.append(rev.text.strip())

    # Return collected data
    rows = list(zip(descriptions, prices, older_prices, reviews_list))
    return rows



In [10]:
def collect_data_phones():

    data = web_scraping(base_url, first_page_url)

    # Convert to DataFrame for easier handling
    df = pd.DataFrame(data, columns=['Description', 'Price', 'Old Price', 'Reviews'])

    # Save to CSV
    df.to_csv('jumia_mobile_phones.csv', index=False)

In [11]:
 #Base URL for pages 2 to 50
base_url = "https://www.jumia.co.ke//mobile-phones/?page="

# URL for the first page
first_page_url = "https://www.jumia.co.ke/mobile-phones/"

collect_data_phones()

Fetching data from: https://www.jumia.co.ke/mobile-phones/ - Status: 200
Fetching data from: https://www.jumia.co.ke//mobile-phones/?page=2#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke//mobile-phones/?page=3#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke//mobile-phones/?page=4#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke//mobile-phones/?page=5#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke//mobile-phones/?page=6#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke//mobile-phones/?page=7#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke//mobile-phones/?page=8#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke//mobile-phones/?page=9#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke//mobile-phones/?page=10#catalog-listing - Status: 200
Fetching data from: https://www.jumia.co.ke//mobile-p

In [17]:
dataset = pd.read_csv("jumia_mobile_phones.csv")
dataset.head(10)

Unnamed: 0,Description,Price,Old Price,Reviews
0,"XIAOMI Redmi A3, 6.71"", 3GB RAM + 64GB (Dual S...","KSh 12,200","KSh 11,000",4.4 out of 5
1,"XIAOMI Redmi 14C, 6.88"" (4GB RAM+128GB Storage...","KSh 9,200","KSh 15,000",4.4 out of 5
2,"Itel S23+ 6.78"", 128GB + 4GB RAM, 50MP Camera,...","KSh 12,999","KSh 23,000",4.5 out of 5
3,"Tecno Spark 30c, 6.67'' HD+, UP to 128GB ROM+ ...","KSh 13,000","KSh 14,999",4.1 out of 5
4,"XIAOMI Redmi 13 6.79'' 8GB+256GB Dual SIM, 4G,...","KSh 13,580","KSh 23,000",3.8 out of 5
5,"VILLAON V101 177"" Kabambe, Wireless FM, Camera...","KSh 7,000","KSh 1,200",4.1 out of 5
6,"XIAOMI Redmi A3x, 6.71"", 64GB + 3GB RAM (Dual ...","KSh 12,999","KSh 10,500",4.4 out of 5
7,"XIAOMI Redmi 14C, 6.88"" (4GB RAM+128GB Storage...","KSh 21,060","KSh 15,000",4.4 out of 5
8,"Samsung Galaxy A05, 6.7'' 4GB RAM + 64GB ROM (...","KSh 15,000","KSh 14,999",3.7 out of 5
9,"Tecno Spark 20 Pro, 6.78"", 256GB+8GB (8GB RAM ...","KSh 13,599","KSh 26,999",4.4 out of 5
