In [2]:
import pandas as pd
import numpy as np

In [22]:
import asyncio
import pandas as pd
from itertools import zip_longest
from playwright.async_api import async_playwright

async def scrape_jumia():
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)  # Run in headless mode
        page = await browser.new_page()

        # Go to Jumia gaming laptops category
        await page.goto("https://www.jumia.com.ng/gaming-laptops/")

        # Wait for products to load
        await page.wait_for_selector(".name")

        # Extract product names
        product_names = await page.locator(".name").all_inner_texts()

        # Extract prices
        product_prices = await page.locator(".prc").all_inner_texts()

        # Extract ratings
        product_rating = await page.locator(".rev").all_inner_texts()

        # Extract image URLs
        product_images = await page.locator(".img").evaluate_all("elements => elements.map(e => e.getAttribute('data-src'))")

        # Debug: Print lengths of all lists
        print(f"Names: {len(product_names)}, Prices: {len(product_prices)}, Ratings: {len(product_rating)}, Images: {len(product_images)}")

        # Ensure all lists have the same length by filling missing values with "N/A"
        aligned_data = list(zip_longest(product_names, product_prices, product_rating, product_images, fillvalue="N/A"))

        # Print extracted data
        for name, price, rating, img in aligned_data:
            print(f"Product: {name} | Price: {price} | Rating: {rating} | Image: {img}")

        # Save to CSV
        df = pd.DataFrame(aligned_data, columns=["Product Name", "Price", "Rating", "Image URL"])
        df.to_csv("jumia_products.csv", index=False)
        print("Data saved to jumia_products.csv")

        await browser.close()

# Run the scraper
await scrape_jumia()  # Use await instead of asyncio.run() in Jupyter


Names: 40, Prices: 40, Ratings: 8, Images: 40
Product: Lenovo Legion Pro 5 16IRX9 Gaming | Price: ₦ 4,000,000 | Rating: 4.4 out of 5
(9) | Image: https://ng.jumia.is/unsafe/fit-in/300x300/filters:fill(white)/product/65/7253293/1.jpg?2813
Product: Hp 15 VICTUS GAMING, 12TH GEN INTEL CORE I5, 16GB RAM, 512GB SSD NVIDIA RTX 3050 (4GB) WINS 11 | Price: ₦ 1,340,000 | Rating: 3.8 out of 5
(5) | Image: https://ng.jumia.is/unsafe/fit-in/300x300/filters:fill(white)/product/87/9747762/1.jpg?4591
Product: Anti Blue Ray Light Blocking Glasses For Computers / Phones | Price: ₦ 4,199 | Rating: 5 out of 5
(2) | Image: https://ng.jumia.is/unsafe/fit-in/300x300/filters:fill(white)/product/37/0368273/1.jpg?4891
Product: Lenovo Legion 7i - 16.0" WQXGA Display - 32GB RAM / 1TB SSD - Nvidia GeForce RTX 4070 (8GB) - 14th Gen Core I9-14900HX - Windows 11 - Glacier White. | Price: ₦ 4,000,000 | Rating: 3.3 out of 5
(4) | Image: https://ng.jumia.is/unsafe/fit-in/300x300/filters:fill(white)/product/06/0633993/1

In [26]:
import asyncio
import pandas as pd
from itertools import zip_longest
from playwright.async_api import async_playwright

async def scrape_jumia():
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)  # Run in headless mode
        page = await browser.new_page()

        all_products = []  # Store all products

        for page_number in range(1,5):  # Scrape pages 1 to 10
            url = f"https://www.jumia.com.ng/gaming-laptops/?page={page_number}"
            print(f"Scraping: {url}")
            await page.goto(url)

            # Wait for products to load
            await page.wait_for_selector(".name", timeout=60000) 

            # Extract product names
            product_names = await page.locator(".name").all_inner_texts()

            # Extract prices
            product_prices = await page.locator(".prc").all_inner_texts()

            # Extract ratings
            product_rating = await page.locator(".rev").all_inner_texts()

            # Extract image URLs
            product_images = await page.locator(".img").evaluate_all("elements => elements.map(e => e.getAttribute('data-src'))")

            # Ensure all lists have the same length
            aligned_data = list(zip_longest(product_names, product_prices, product_rating, product_images, fillvalue="N/A"))

            # Add products from this page to the list
            all_products.extend(aligned_data)

        # Save to CSV
        df = pd.DataFrame(all_products, columns=["Product Name", "Price", "Rating", "Image URL"])
        df.to_csv("jumia_products.csv", index=False)
        print("Data saved to jumia_products.csv")

        await browser.close()

# Run the scraper
await scrape_jumia()

Scraping: https://www.jumia.com.ng/gaming-laptops/?page=1
Scraping: https://www.jumia.com.ng/gaming-laptops/?page=2
Scraping: https://www.jumia.com.ng/gaming-laptops/?page=3
Scraping: https://www.jumia.com.ng/gaming-laptops/?page=4
Data saved to jumia_products.csv


In [31]:
product = pd.read_csv("jumia_products.csv")
product.head(20)

Unnamed: 0,Product Name,Price,Rating,Image URL
0,Lenovo Legion Pro 5 16IRX9 Gaming,"₦ 4,000,000",4.4 out of 5\n(9),https://ng.jumia.is/unsafe/fit-in/300x300/filt...
1,"Hp 15 VICTUS GAMING, 12TH GEN INTEL CORE I5, 1...","₦ 1,340,000",3.8 out of 5\n(5),https://ng.jumia.is/unsafe/fit-in/300x300/filt...
2,Anti Blue Ray Light Blocking Glasses For Compu...,"₦ 4,199",5 out of 5\n(2),https://ng.jumia.is/unsafe/fit-in/300x300/filt...
3,"Lenovo Legion 7i - 16.0"" WQXGA Display - 32GB ...","₦ 4,000,000",3.3 out of 5\n(4),https://ng.jumia.is/unsafe/fit-in/300x300/filt...
4,Hp OMEN TRANSCEND 14 14TH GEN INTEL CORE ULTRA...,"₦ 3,999,999",5 out of 5\n(1),https://ng.jumia.is/unsafe/fit-in/300x300/filt...
5,Hp Omen 16 Gaming 13th Gen Intel Core I7 16GB ...,"₦ 2,500,000",5 out of 5\n(1),https://ng.jumia.is/unsafe/fit-in/300x300/filt...
6,Hp TUFF DASH GAMING 15 AMD RYZEN 7-7435HS 16GB...,"₦ 1,840,000",4.3 out of 5\n(7),https://ng.jumia.is/unsafe/fit-in/300x300/filt...
7,"DELL Alienware M16 R2 - 16.0"" QHD IPS Display ...","₦ 3,899,000",5 out of 5\n(2),https://ng.jumia.is/unsafe/fit-in/300x300/filt...
8,Asus VIVOBOOK PRO 16X GAMING INTEL CORE I7 16G...,"₦ 1,650,000",,https://ng.jumia.is/unsafe/fit-in/300x300/filt...
9,Alienware x16 GAMING 13TH GEN INTEL CORE I7 UP...,"₦ 4,000,000",,https://ng.jumia.is/unsafe/fit-in/300x300/filt...
