### Data Scrapping

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL for mobile phones
base_url = 'https://www.jumia.co.ke/mobile-phones/#catalog-listing'
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
    "Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8"
}

# Create an empty list to store the products
all_products = []

# Set the maximum number of pages to scrape based on the number of items found
max_pages = 300 

# Iterate through pages
for page in range(1, max_pages + 1):
    # Jumia's URL
    next_url = f"https://www.jumia.co.ke/mobile-phones/#catalog-listing?page={page}"

    # Send a GET request
    response = requests.get(next_url, headers=headers)

    # Check if the request was successful
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")

        # Find all product containers
        products = soup.find_all("div", class_="info")

        # Check if products were found
        if not products:
            print(f"No products found on page {page}.")
            break  # Exit if no products are found

        # Extract product details
        for index, product in enumerate(products):
            name = product.find("h3", class_="name").get_text(strip=True)
            price = product.find("div", class_="prc").get_text(strip=True)
            old_price = product.find("div", class_="old")
            discount = product.find("div", class_="bdg _dsct _sm")
            rating = product.find("div", class_="stars _s")
            reviews = product.find("div", class_="rev")

            # Extract text and set as None if not available
            old_price = old_price.get_text(strip=True) if old_price else None
            discount = discount.get_text(strip=True) if discount else None
            rating = rating.get_text(strip=True) if rating else None
            num_reviews = reviews.get_text(strip=True) if reviews else None

            # Search ranking (based on order on page and page number)
            search_ranking = f"Page {page}, Rank {index + 1}"  # Adjust index for 1-based ranking

            # Store the product details in a dictionary
            product_info = {
                "Name": name,
                "Price": price,
                "Old Price": old_price,
                "Discount": discount,
                "Rating": rating,
                "Number of Reviews": num_reviews,
                "Search Ranking": search_ranking
            }

            # Append the product info to the list
            all_products.append(product_info)
    else:
        print(f"Failed to retrieve page {page}: {response.status_code}")
        break  # Exit if a page cannot be found

# Create a DataFrame from the scraped data
df = pd.DataFrame(all_products)

# Save the DataFrame to a CSV file jumia phones
df.to_csv("jumia_phones.csv", index=False)

print("Data scraping complete. Total products scraped:", len(df))


Data scraping complete. Total products scraped: 12000
