**Importing Libraries and dependancies**

In [1]:
from bs4 import BeautifulSoup
import requests
import time
import datetime
import pandas as pd
import csv
import psycopg2

import smtplib
import os
from dotenv import load_dotenv

**Connecting to the website and pulling in the data**

In [None]:
#This code scrapes laptop(category) data from Jumia Kenya and prints the brand, name, price, and discount of each product.
base_url = "https://www.jumia.co.ke"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
    "Accept-Encoding": "gzip, deflate",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "DNT": "1",
    "Connection": "close",
    "Upgrade-Insecure-Requests": "1"
}

all_data = []
today = datetime.date.today()

#This code loops through the 50 pages of the Jumia Kenya laptops category, scrapes product details.
for page in range(1, 51):
    category_url = f"https://www.jumia.co.ke/mlp-laptops/?page={page}"
    print(f"\n📄 Scraping Page {page}...")

    try:
        response = requests.get(category_url, headers=headers, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        products = soup.find_all('article', class_='c-prd')

        for i, product in enumerate(products):
            a_tag = product.find('a', class_='core')
            if not a_tag:
                continue

            relative_link = a_tag.get('href')
            product_url = base_url + relative_link
            print(f"⏳ Fetching product {i+1} → {product_url}")

            try:
                product_response = requests.get(product_url, headers=headers, timeout=10)
                product_response.raise_for_status()
                product_soup = BeautifulSoup(product_response.text, 'html.parser')

                name = product_soup.find('h1').text.strip() if product_soup.find('h1') else 'N/A'
                price_tag = product_soup.find('span', class_='-b -ubpt -tal -fs24 -prxs')
                old_price_tag = product_soup.find('span', class_='-tal -gy5 -lthr -fs16 -pvxs -ubpt')
                discount_tag = product_soup.find('span', class_='bdg _dsct _dyn -mls')

                price = price_tag.text.strip() if price_tag else 'N/A'
                old_price = old_price_tag.text.strip() if old_price_tag else 'N/A'
                discount = discount_tag.text.strip() if discount_tag else 'N/A'

                all_data.append([name, price, old_price, discount, today])
                print(f" {name} | Price: {price} | Old: {old_price} | Discount: {discount}")
                time.sleep(1)

            except requests.exceptions.RequestException as e:
                print(f"Failed to load product page: {e}")
                continue

        # 📝 Saving after each page
        df = pd.DataFrame(all_data, columns=['Name', 'Current Price', 'Old Price', 'Discount', 'Date'])
        df.to_csv('JumiaLaptopScrape.csv', index=False)

    except requests.exceptions.RequestException as e:
        print(f"Failed to load category page {page}: {e}")
        continue






📄 Scraping Page 1...
⏳ Fetching product 1 → https://www.jumia.co.ke/thinkpad-refurbished-thinkpad-yoga-11e-x360-celeron-touchscreen-11.6-4gb-ram-ssd-128gb-lenovo-mpg1337401.html
✅ Lenovo Thinkpad Refurbished ThinkPad Yoga 11e X360 Celeron -Touchscreen- 11.6"- 4GB RAM - SSD 128GB | Price: KSh 9,971 | Old: KSh 17,000 | Discount: 41%
⏳ Fetching product 2 → https://www.jumia.co.ke/hp-refurbished-probook-640-g1-core-i5-8gb-ram-500gb-hdd-windows-10-6-months-wrty-306414529.html
✅ HP Refurbished ProBook 640 G1 Core I5, 8GB RAM, 500GB HDD, Windows 10, 6 Months WRTY | Price: KSh 14,906 | Old: KSh 26,700 | Discount: 44%
⏳ Fetching product 3 → https://www.jumia.co.ke/nec-laptop-new-versapro-x360-4-gb-ram-64-gb-ssd-11-windows-11-black-309343917.html
✅ Nec Laptop New VersaPro X360, 4 GB RAM, 64 GB SSD, 11" Windows 11 , Black | Price: KSh 12,499 | Old: KSh 16,999 | Discount: 26%
⏳ Fetching product 4 → https://www.jumia.co.ke/apple-macbook-pro-13-core-i5-2.4ghz-8gb-ram-500gb-hdd-2012-silver-refurbish

In [10]:
df.shape

(1931, 5)

**Loading the data to my PostgreSQL Database**

In [None]:
load_dotenv()
## Loading the data to my PostgreSQL Database once scrapping is done
try:
    #Connecting to my PostgreSQL database
    conn = psycopg2.connect(
        host="localhost",
        database="jumia_db",
        user="postgres",
        password=os.getenv("DB_PASSWORD")
    )
    cursor = conn.cursor()


    #Truncatig table to overwrite existing data
    cursor.execute("TRUNCATE TABLE bronze.jumia_raw_laptops;")

    #Inserting all rows
    insert_query = """
        INSERT INTO bronze.jumia_raw_laptops (
            product_name, new_price, old_price, discount, scraped_on
        ) VALUES (%s, %s, %s, %s, %s)
    """
    cursor.executemany(insert_query,all_data)

    conn.commit()
    print("Bulk insert complete. Table overwritten with new data.")

except Exception as e:
    print(f" Error inserting data: {e}")

finally:
    if cursor:
        cursor.close()
    if conn:
        conn.close()


✅ Bulk insert complete. Table overwritten with new data.
