# Imports

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# Define constants (headers & base URL)

In [2]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
}
base_url = "https://www.jumia.com.ng/laptops/?page={}"
products = []

# Function to get reviews for certian product

In [3]:
def get_reviews(product_url):

    try:
        resp = requests.get(product_url, headers=headers, timeout=10)
        if resp.status_code != 200:
            return None, None

        soup = BeautifulSoup(resp.text, 'html.parser')

        rating_tag = soup.find(string=lambda t: t and "out of 5" in t)
        rating = rating_tag.strip() if rating_tag else None

        num_reviews = None
        for tag in soup.find_all(string=lambda t: t and "verified ratings" in t):
            num_reviews = tag.strip()
            break

        return rating, num_reviews

    except Exception as e:
        print(f"Error fetching reviews for {product_url}: {e}")
        return None, None


## Scraping multiple pages with live progress updates

In [4]:
import requests
from requests.exceptions import ReadTimeout, ConnectionError
import time

number_of_pages = 5

for page in range(1, number_of_pages + 1):
    print(f"Scraping page {page}...")

    try:
        resp = requests.get(base_url.format(page), headers=headers, timeout=20)  # longer timeout
        resp.raise_for_status()
    except (ReadTimeout, ConnectionError) as e:
        print(f"⚠️ Timeout/Connection error on page {page}, skipping... ({e})")
        continue

    soup = BeautifulSoup(resp.text, 'html.parser')
    items = soup.select('a.core')
    if not items:
        print(f"⚠️ No items found on page {page}")
        continue

    for idx, item in enumerate(items, start=1):
        title_tag = item.select_one('h3.name')
        if not title_tag:
            continue
        title = title_tag.text.strip()
        if not title:
            continue

        price = item.select_one('.prc').text.strip() if item.select_one('.prc') else None
        brand = item.get('data-gtm-brand', None)
        product_url = "https://www.jumia.com.ng" + item.get('href')
        img_tag = item.select_one('img.img')
        image_url = img_tag.get('data-src') or img_tag.get('src') if img_tag else None

        try:
            rating, num_reviews = get_reviews(product_url)
        except (ReadTimeout, ConnectionError):
            rating, num_reviews = None, None

        products.append({
            'title': title,
            'price': price,
            'brand': brand,
            'rating': rating,
            'num_reviews': num_reviews,
            'product_url': product_url,
            'image_url': image_url
        })

        time.sleep(1)  # Delay between product requests

    print(f"Finished page {page}")
    time.sleep(2)  # Delay between pages

print("Done scraping")


Scraping page 1...
Finished page 1
Scraping page 2...
Finished page 2
Scraping page 3...
Finished page 3
Scraping page 4...
Finished page 4
Scraping page 5...
Finished page 5
Done scraping


# Save results to CSV

In [5]:
df = pd.DataFrame(products)
df.to_csv("jumia_laptops.csv", index=False)
print(f"Collected {len(products)} products from (number_of_pages) pages.")


Collected 200 products from (number_of_pages) pages.


In [6]:
df

Unnamed: 0,title,price,brand,rating,num_reviews,product_url,image_url
0,Ace Elec 14.1'' Intel(R)Pentium(R) CPU N3700 1...,"₦ 220,442",Ace Elec,3.7 out of 5,(83 verified ratings),https://www.jumia.com.ng/ace-elec-14.1-intelrp...,https://ng.jumia.is/unsafe/fit-in/300x300/filt...
1,Macbook PRO Laptop A1278 13.3 Inch Core I5 2.5...,"₦ 191,680",Renewed,3.6 out of 5,(28 verified ratings),https://www.jumia.com.ng/renewed-macbook-pro-l...,https://ng.jumia.is/unsafe/fit-in/300x300/filt...
2,"Blueing 15.6"" Laptop J4125 8GB+256GB SSD Stude...","₦ 232,580",Blueing,4 out of 5,(56 verified ratings),https://www.jumia.com.ng/blueing-15.6-laptop-j...,https://ng.jumia.is/unsafe/fit-in/300x300/filt...
3,Hp EliteBook 840 G7 10th Gen Intel Core I5 Tou...,"₦ 538,000",Hp,4.1 out of 5,(45 verified ratings),https://www.jumia.com.ng/elitebook-840-g6-inte...,https://ng.jumia.is/unsafe/fit-in/300x300/filt...
4,Hp Stream 11 Pro Intel Celeron 4GB RAM- 64GB H...,"₦ 145,000",Hp,3.8 out of 5,(13 verified ratings),https://www.jumia.com.ng/hp-stream-11-pro-inte...,https://ng.jumia.is/unsafe/fit-in/300x300/filt...
...,...,...,...,...,...,...,...
195,"DELL Latitude 7480 Intel Core I5, Keyboard Ba...","₦ 450,000",DELL,0 out of 5,,https://www.jumia.com.ng/dell-latitude-7480-in...,https://ng.jumia.is/unsafe/fit-in/300x300/filt...
196,DELL Latitude 5300 TOUCHSCREEN Core i5 -512G...,"₦ 440,000",DELL,4.5 out of 5,(11 verified ratings),https://www.jumia.com.ng/dell-latitude-5300-to...,https://ng.jumia.is/unsafe/fit-in/300x300/filt...
197,Hp ProBook 11 X360- TOUCH Intel Celeron 256GB ...,"₦ 205,000",Hp,0 out of 5,,https://www.jumia.com.ng/hp-probook-11-x360-to...,https://ng.jumia.is/unsafe/fit-in/300x300/filt...
198,Hp EliteBook 840 G5 TOUCHSCREEN Core I7-32GB ...,"₦ 630,000",Hp,0 out of 5,,https://www.jumia.com.ng/elitebook-840-g5-touc...,https://ng.jumia.is/unsafe/fit-in/300x300/filt...
