In [16]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import re

In [17]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

BASE_URL = "https://jiji.ug/cars?page={}"

In [18]:
def clean_price(text):
    text = text.replace("USh", "").replace(",", "").strip()
    digits = re.sub(r"\D", "", text)
    return int(digits) if digits else None

In [19]:
def extract_year(title):
    match = re.search(r"(19|20)\d{2}", title)
    return int(match.group()) if match else None

In [20]:
def parse_card(card):
    """Extract data from a single listing based on provided HTML."""
    
    # URL
    link_tag = card.find("a", class_="qa-advert-list-item")
    url = link_tag["href"] if link_tag else None
    if url and url.startswith("/"):
        url = "https://jiji.ug" + url

    # Title
    title_tag = card.select_one("div.b-advert-title-inner.qa-advert-title")
    title = title_tag.text.strip() if title_tag else None

    # Price
    price_tag = card.select_one("div.qa-advert-price")
    price = clean_price(price_tag.text) if price_tag else None

    # Location
    loc_tag = card.select_one("span.b-list-advert__region__text")
    location = loc_tag.text.strip() if loc_tag else None

    # Attributes (Condition, Transmission)
    attrs = card.select("div.b-list-advert-base__item-attr")
    condition, transmission = None, None

    for attr in attrs:
        text = attr.text.strip().lower()
        if "used" in text:
            condition = text
        elif text in ("automatic", "manual"):
            transmission = text

    # Make + Model
    make, model = None, None
    if title:
        parts = title.split()
        if len(parts) >= 2:
            make = parts[0]
            model = parts[1]

    # Year
    year = extract_year(title or "")

    return {
        "title": title,
        "make": make,
        "model": model,
        "year": year,
        "condition": condition,
        "transmission": transmission,
        "location": location,
        "price": price,
        "url": url
    }

In [21]:
def scrape_jiji(pages=5, delay=2):
    listings = []

    for p in range(1, pages + 1):
        url = BASE_URL.format(p)
        print("Scraping:", url)

        resp = requests.get(url, headers=headers)
        if resp.status_code != 200:
            print("Failed:", resp.status_code)
            continue

        soup = BeautifulSoup(resp.text, "html.parser")

        # This is the CORRECT selector you sent
        cards = soup.select("div.b-list-advert__gallery__item.js-advert-list-item")

        print("Found", len(cards), "cars on page", p)

        for card in cards:
            car = parse_card(card)
            listings.append(car)

        time.sleep(delay)

    return pd.DataFrame(listings)

In [22]:
df = scrape_jiji(pages=12) #Jiji has a max of 12 pages
df.to_csv("raw_data/jiji_raw.csv", index=False)
df.head()

Scraping: https://jiji.ug/cars?page=1
Found 24 cars on page 1
Scraping: https://jiji.ug/cars?page=2
Found 24 cars on page 2
Scraping: https://jiji.ug/cars?page=3
Found 24 cars on page 3
Scraping: https://jiji.ug/cars?page=4
Found 24 cars on page 4
Scraping: https://jiji.ug/cars?page=5
Found 24 cars on page 5
Scraping: https://jiji.ug/cars?page=6
Found 24 cars on page 6
Scraping: https://jiji.ug/cars?page=7
Found 24 cars on page 7
Scraping: https://jiji.ug/cars?page=8
Found 24 cars on page 8
Scraping: https://jiji.ug/cars?page=9
Found 24 cars on page 9
Scraping: https://jiji.ug/cars?page=10
Found 24 cars on page 10
Scraping: https://jiji.ug/cars?page=11
Found 24 cars on page 11
Scraping: https://jiji.ug/cars?page=12
Found 24 cars on page 12


Unnamed: 0,title,make,model,year,condition,transmission,location,price,url
0,Toyota Harrier 2.4 2011 Black,Toyota,Harrier,2011,foreign used,automatic,"Kampala, Nakawa",58750000,https://jiji.ug/nakawa/cars/toyota-harrier-2-4...
1,Toyota C-HR XLE Prermium FWD (2.0L 4cyl 2AM) 2...,Toyota,C-HR,2018,foreign used,automatic,"Kampala, Nakawa",77000000,https://jiji.ug/nakawa/cars/toyota-c-hr-xle-pr...
2,Toyota Esquire 2016 Black,Toyota,Esquire,2016,foreign used,automatic,"Kampala, Nakawa",59000000,https://jiji.ug/nakawa/cars/toyota-esquire-201...
3,New Toyota Land Cruiser Prado 2.7 2024 Black,New,Toyota,2024,,automatic,"Kampala, Nakawa",390000000,https://jiji.ug/nakawa/cars/new-toyota-land-cr...
4,Toyota Wish 2007 White,Toyota,Wish,2007,local used,automatic,"Kampala, Central Division",18000000,https://jiji.ug/central-division/cars/toyota-w...
