In [10]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import re

In [11]:
BASE_URL = "https://www.beforward.jp/stocklist?page={}"
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0)"
}

USD_TO_UGX = 3600

In [12]:
def clean_price(value):
    if pd.isna(value) or value is None:
        return None

    # Convert list to string if needed
    if isinstance(value, list):
        if len(value) == 0:
            return None
        value = value[0]

    # Convert to string
    s = str(value)

    # Remove dollar signs, commas, and whitespace
    s = s.replace("$", "").replace(",", "").strip()

    # Extract digits only
    digits = re.sub(r"\D", "", s)

    return int(digits) if digits else None


In [13]:

def extract_year(title):
    """Find 4-digit year in title like '2025 SUZUKI EVERY'."""
    m = re.search(r"(19|20)\d{2}", title)
    return int(m.group()) if m else None

In [62]:
def parse_row(row):
    cols = row.find_all("td")
    if len(cols) < 3:
        return None
    
    # --- COLUMN 1: Photo + URL + Ref ---
    first = cols[0]

    # URL
    link_tag = first.find("a", class_="vehicle-url-link")
    url = link_tag["href"] if link_tag else None

    # Ref Number
    ref_tag = first.select_one("p.veh-stock-no span")
    ref_no = ref_tag.text.strip().replace("Ref No. ", "") if ref_tag else None

    # --- COLUMN 2: Description Column ---
    second = cols[1]

    # Title (includes year + make + model)
    title_tag = second.select_one("p.make-model a")
    title = title_tag.text.strip() if title_tag else None

    # Specs
    mileage = second.select_one("td.mileage p.val")
    mileage = mileage.text.strip() if mileage else None

    year_val = second.select_one("td.year p.val")
    year = year_val.text.strip() if year_val else extract_year(title or "")

    engine_cc = second.select_one("td.engine p.val")
    engine_cc = engine_cc.text.strip() if engine_cc else None

    transmission = second.select_one("td.trans p.val")
    transmission = transmission.text.strip() if transmission else None

    location = second.select_one("td.location span")
    location = location.text.strip() if location else None

    # Extra specs (Fuel, Seats)
    fuel = seats = None
    detailed_rows = second.select("table.table-detailed-spec td")

    for i, cell in enumerate(detailed_rows):
        label = cell.text.strip().lower()
        if label == "fuel":
            fuel = detailed_rows[i+1].text.strip()
        if label == "seats":
            seats = detailed_rows[i+1].text.strip()

    # --- COLUMN 3: Price Column ---
    third = cols[2]

    price_td = row.select_one("td.price-col")
    if price_td:
        price_el = price_td.select_one("p.vehicle-price span.price")
        total_price_el = price_td.select_one("p.total-price span:not(.currency-label)")
        if price_el:
            raw_price = price_el.get_text(strip=True)
            price_usd = clean_price(raw_price)
            raw_total_price = total_price_el.get_text(strip=True)
            total_price_usd = clean_price(raw_total_price)
        else:
            return
    else: 
        return

    price_ugx = price_usd * USD_TO_UGX if price_usd else None
    total_price_ugx = total_price_usd * USD_TO_UGX if total_price_usd else None

    make = model = None
    if title:
        parts = title.split()
        if len(parts) >= 2:
            make = parts[1]
        if len(parts) >= 3:
            model = parts[2]

    return {
        "ref_no": ref_no,
        "title": title,
        "make": make,
        "model": model,
        "year": year,
        "mileage": mileage,
        "engine_cc": engine_cc,
        "transmission": transmission,
        "fuel": fuel,
        "seats": seats,
        "location": location,
        "price_usd": price_usd,
        "total_price_usd": total_price_usd,
        "price_ugx": price_ugx,
        "total_price_ugx": total_price_ugx,
        "url": url
    }

In [63]:
#testing getting my element, it was giving me a hard time.
from bs4 import BeautifulSoup

html = """<p class="vehicle-price"><span class="price">$9,800</span></p>"""

soup = BeautifulSoup(html, "html.parser")
el = soup.select_one("p.vehicle-price span.price")
print(el.get_text(strip=True))


$9,800


In [64]:
def scrape_beforward(max_pages=20, delay=1.5):
    cars = []

    for page in range(1, max_pages + 1):
        print(f"Scraping page {page} -> {BASE_URL.format(page)}")
        resp = requests.get(BASE_URL.format(page), headers=HEADERS)

        if resp.status_code != 200:
            print("Failed:", resp.status_code)
            break

        soup = BeautifulSoup(resp.text, "html.parser")
        rows = soup.select("tr.stocklist-row")

        if not rows:
            print("No more rows found. Stopping.")
            break

        print(f"Found {len(rows)} cars on page {page}")

        for row in rows:
            """Testing Fetching prices from HTML"""
            # price_td = row.select_one("td.price-col")
            # if price_td:
            #     price_el = price_td.select_one("p.vehicle-price span.price")
            #     total_price_el = price_td.select_one("p.total-price span:not(.currency-label)")
            #     if price_el:
            #         raw_price = price_el.get_text(strip=True)
            #         price_usd = clean_price(raw_price)
            #         raw_total_price = total_price_el.get_text(strip=True)
            #         total_price_usd = clean_price(raw_total_price)
            #     else:
            #         continue
            # else: 
            #     continue
            # print("Price, Total price:", price_usd,total_price_usd)
            car = parse_row(row)
            if car:
                cars.append(car)

        time.sleep(delay)

    return pd.DataFrame(cars)

In [66]:
df_bef = scrape_beforward(max_pages=100)
df_bef.to_csv("raw_data/beforward_raw.csv", index=False)

df_bef.head()

Scraping page 1 -> https://www.beforward.jp/stocklist?page=1
Found 28 cars on page 1
Scraping page 2 -> https://www.beforward.jp/stocklist?page=2
Found 28 cars on page 2
Scraping page 3 -> https://www.beforward.jp/stocklist?page=3
Found 28 cars on page 3
Scraping page 4 -> https://www.beforward.jp/stocklist?page=4
Found 28 cars on page 4
Scraping page 5 -> https://www.beforward.jp/stocklist?page=5
Found 28 cars on page 5
Scraping page 6 -> https://www.beforward.jp/stocklist?page=6
Found 28 cars on page 6
Scraping page 7 -> https://www.beforward.jp/stocklist?page=7
Found 28 cars on page 7
Scraping page 8 -> https://www.beforward.jp/stocklist?page=8
Found 28 cars on page 8
Scraping page 9 -> https://www.beforward.jp/stocklist?page=9
Found 28 cars on page 9
Scraping page 10 -> https://www.beforward.jp/stocklist?page=10
Found 28 cars on page 10
Scraping page 11 -> https://www.beforward.jp/stocklist?page=11
Found 28 cars on page 11
Scraping page 12 -> https://www.beforward.jp/stocklist?page

Unnamed: 0,ref_no,title,make,model,year,mileage,engine_cc,transmission,fuel,seats,location,price_usd,total_price_usd,price_ugx,total_price_ugx,url
0,BY759022,2006 HONDA ACTY TRUCK,HONDA,ACTY,2006/12,"111,250 km",650cc,MT,Petrol,2,Location,2370,3973.0,8532000,14302800.0,/honda/acty-truck/by759022/id/11910020/
1,BY759024,2013 MITSUBISHI CANTER,MITSUBISHI,CANTER,2013/2,"312,401 km","2,990cc",Semi AT,Diesel,3,Location,3920,7747.0,14112000,27889200.0,/mitsubishi/canter/by759024/id/11910065/
2,CA462307,2014 TOYOTA HIACE VAN\n ...,TOYOTA,HIACE,2014/4,"161,352 km","2,980cc",AT,Diesel,3,Location,5960,8492.0,21456000,30571200.0,/toyota/hiace-van/ca462307/id/12565207/
3,CA740447,2011 TOYOTA WISH\n ...,TOYOTA,WISH,2011/9,"154,001 km","1,790cc",AT,Petrol,7,Location,2430,4544.0,8748000,16358400.0,/toyota/wish/ca740447/id/12831915/
4,CB026605,2017 HONDA FIT HYBRID\n ...,HONDA,FIT,2017/9,"111,072 km","1,490cc",AT,Hybrid(Petrol),5,Location,4790,6633.0,17244000,23878800.0,/honda/fit-hybrid/cb026605/id/13106276/
