In [1]:
#import libraries
import pandas as pd
import requests
import re

In [26]:
def extract_make_model_year(title):
    match = re.match(r"(?P<make>\w+)\s+(?P<model>[A-Za-z0-9\-]+)?\s+(?P<year>\d{4})?", title.strip())
    if match:
        return match.group("make"), match.group("model"), match.group("year")
    return None, None, None    

In [27]:
def extract_condition(condition):
    condition_lower = condition.lower()
    if 'local used' in condition_lower:
        return 'local used'
    elif 'foreign used' in condition_lower:
        return 'foreign used'
    elif 'new' in condition_lower:
        return 'new'
    return None    

In [28]:
def extract_transmission(transmission):
    transmission_lower = transmission.lower()
    if 'automatic' in transmission_lower:
        return 'automatic'
    elif 'manual' in transmission_lower:
        return 'manual'
    return None    

In [29]:
def fetch_json_data(page):
    url = "https://jiji.ng/api_web/v1/listing"
    params = {
        'slug': 'cars',
        'page': page,
        'webp': True
    }
    headers = {
        "User-Agent": "Mozilla/5.0"
    }
    try:
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status()
        data = response.json()
    except requests.RequestException as e:
        print(f"[page] {page}. Request error")
        return []
    except ValueError:
        print(f"[page] {page}. Failed to decode json")
        return []

    adverts = data.get('adverts_list', {}).get("adverts", [])
    if not isinstance(adverts, list):
        print(f"[page] Expected a list but got{type(adverts)}")
        return []
    return adverts


    

In [30]:
#get attribute value
def get_attr_value(attrs, key_name):
    for attr in attrs:
        if attr.get("name", "").lower() == key_name.lower():
            return attr.get("value", "").strip()
    return None      
            

In [31]:
#controller
def main():
    all_ads = []
    for page in range(1, 101):
        ads = fetch_json_data(page)
        print(f"page{page}:{len(ads)} found")

        for ad in ads:
            if isinstance(ad, dict):
                attrs = ad.get("attrs", [])
                title = ad.get("title", "")
                condition_ = get_attr_value(attrs, "condition")
                transmission_ = get_attr_value(attrs, "transmission")
                make, model, year = extract_make_model_year(title)
                condition = extract_condition(condition_)
                transmission = extract_transmission(transmission_)
                price = ad.get("price_title", "")
                location = ad.get("region_name", "")

                if price:
                    all_ads.append({
                        "title": title,
                        "condition": condition,
                        "transmission": transmission,
                        "make": make,
                        "model": model,
                        "year": year,
                        "location": location,
                        "price": price
                    })

    if all_ads:
        df = pd.DataFrame(all_ads)
        df.to_csv("data/jiji_car_evaluation.csv", index=False)
        print("Jiji car evaluation scraped completed")
    else:
        print("No ads scraped")

In [32]:
if __name__ == "__main__":
    main()

page1:20 found
page2:20 found
page3:20 found
page4:20 found
page5:20 found
page6:20 found
page7:20 found
page8:20 found
page9:20 found
page10:20 found
page11:20 found
page12:20 found
page13:20 found
page14:20 found
page15:20 found
page16:20 found
page17:20 found
page18:20 found
page19:20 found
page20:20 found
page21:20 found
page22:20 found
page23:20 found
page24:20 found
page25:20 found
page26:20 found
page27:20 found
page28:20 found
page29:20 found
page30:20 found
page31:20 found
page32:20 found
page33:20 found
page34:20 found
page35:20 found
page36:20 found
page37:20 found
page38:20 found
page39:20 found
page40:20 found
page41:20 found
page42:20 found
page43:20 found
page44:20 found
page45:20 found
page46:20 found
page47:20 found
page48:20 found
page49:20 found
page50:20 found
page51:20 found
page52:20 found
page53:20 found
page54:20 found
page55:20 found
page56:20 found
page57:20 found
page58:20 found
page59:20 found
page60:20 found
page61:20 found
page62:20 found
page63:20 found
p