In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [15]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
}
def scrape_jumia_monitors(page_num):
    url = f"https://www.jumia.com.eg/televisions/?page={page_num}"
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")

    products = soup.find_all("article", {"class": "prd _fb col c-prd"})

    product_data = []

    for product in products:
        try:
            name = product.find("h3", {"class": "name"}).text.strip()
        except:
            name = None

        try:
            price = product.find("div", {"class": "prc"}).text.strip()
        except:
            price = None

        try:
            rating = product.find("div", {"class": "rev"}).text.strip("()").strip()
        except:
            rating = None

        try:
            reviews = product.find("div", {"class": "rev"}).text.strip("()")
        except:
            reviews = None

        try:
            product_url = "https://www.jumia.com.eg" + product.find("a")["href"]
        except:
            product_url = None

        try:
            image_url = product.find("img")["data-src"]
        except:
            image_url = None

        brand = name.split()[0] if name else None

        product_data.append({
            "Product Name": name,
            "Price": price,
            "Rating": rating,
            "Number of Reviews": reviews,
            "Product URL": product_url,
            "Image URL": image_url,
            "Brand": brand
        })

    return product_data
    

In [16]:
all_products = []

for page in range(1, 4):  # scrape 3 pages
    print(f"Scraping page {page}...")
    data = scrape_jumia_monitors(page)
    all_products.extend(data)
    time.sleep(2)  # be polite to the server

print(f"Total products scraped: {len(all_products)}")

Scraping page 1...
Scraping page 2...
Scraping page 3...
Total products scraped: 120


In [17]:
df = pd.DataFrame(all_products)

In [18]:
df.drop_duplicates()
df["Price"] = df["Price"].str.replace("EGP", "").str.replace(",", "").astype(float)
df

Unnamed: 0,Product Name,Price,Rating,Number of Reviews,Product URL,Image URL,Brand
0,LG 43LM6370PVA.AFU Series Full HD HDR Smart LE...,13299.0,4.9 out of 5(15,4.9 out of 5(15,https://www.jumia.com.eg/customer/account/logi...,https://eg.jumia.is/unsafe/fit-in/300x300/filt...,LG
1,Samsung ‎‎50 Inch Crystal UHD DU7000 4K Tizen ...,17299.0,4.2 out of 5(73,4.2 out of 5(73,https://www.jumia.com.eg/customer/account/logi...,https://eg.jumia.is/unsafe/fit-in/300x300/filt...,Samsung
2,Samsung ‎‎55 Inch Crystal UHD DU7000 4K Tizen ...,19299.0,4.3 out of 5(80,4.3 out of 5(80,https://www.jumia.com.eg/customer/account/logi...,https://eg.jumia.is/unsafe/fit-in/300x300/filt...,Samsung
3,Skyline 32-22A - 32-inch HD LED TV - Limited,3899.0,3.9 out of 5(317,3.9 out of 5(317,https://www.jumia.com.eg/customer/account/logi...,https://eg.jumia.is/unsafe/fit-in/300x300/filt...,Skyline
4,ATA 32HON - 32-inch HD TV,4099.0,4 out of 5(269,4 out of 5(269,https://www.jumia.com.eg/customer/account/logi...,https://eg.jumia.is/unsafe/fit-in/300x300/filt...,ATA
...,...,...,...,...,...,...,...
115,LG Smart LED TV With Magic Remote Nanocell 55 ...,25499.0,,,https://www.jumia.com.eg/customer/account/logi...,https://eg.jumia.is/unsafe/fit-in/300x300/filt...,LG
116,Itel S32101BE Icast 32 Inch HD- Built-in Recei...,4700.0,3.5 out of 5(4,3.5 out of 5(4,https://www.jumia.com.eg/customer/account/logi...,https://eg.jumia.is/unsafe/fit-in/300x300/filt...,Itel
117,LG 50NANO776RA 50-Inch Screen,28637.0,,,https://www.jumia.com.eg/customer/account/logi...,https://eg.jumia.is/unsafe/fit-in/300x300/filt...,LG
118,"Samsung HD Smart TV 32"" Inch T5300",10500.0,,,https://www.jumia.com.eg/customer/account/logi...,https://eg.jumia.is/unsafe/fit-in/300x300/filt...,Samsung


In [20]:
df.to_csv("jumia_monitors.csv", index=False)