In [62]:
import requests
import pandas as pd
from bs4 import BeautifulSoup as bs
import numpy as np
import time

In [63]:
LINKS = []
#Number of pages we want to scrape
PAGES = 2    

# Itterating over the pages
for PAGE in range(1, PAGES + 1):
    TARGET_URL = f'https://www.avito.ma/fr/maroc/voitures?o={PAGE}'
    HEADERS = {'user-agent' : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36"}
    RESPONSE = requests.get(TARGET_URL, headers=HEADERS)
    PAGE_CONTENT = bs(RESPONSE.content, "html.parser")
    LISTINGS = PAGE_CONTENT.find_all("a", class_="sc-1jge648-0")
    # Getting the link of each listing and append it to out listings list
    for LISTING in LISTINGS:
        LINK = LISTING.get("href") if LISTING and LISTING.get("href") else "N/A"
        LINKS = np.append(LINKS, LINK)

In [64]:
# Dropping duplicat links
link_sr = pd.Series(LINKS)
link_sr = link_sr.drop_duplicates().reset_index(drop=True)
link_sr.value_counts()
# Array of links
links = np.array(link_sr) 

In [68]:
listings = []
headers = {"user-agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36")}
#Itterating over the links we got
page_num = 0
for link in links:
    page_num += 1
    start = time.perf_counter()
    response = requests.get(link, headers=headers, timeout=10)
    page = bs(response.content, "html.parser")

    # Link
    listing = {"lien": link}

    # Listing title
    title = page.find("h1", class_="sc-16573058-5 izVEJU")
    listing["titre annonce"] = title.text.strip() if title else None

    # Price
    price_elem = page.find("div", class_="sc-16573058-10 kRLGQQ")
    if price_elem:
        price = price_elem.text.strip()
        # Remove unwanted phrases
        for phrase in ["Prix en hausse", "Prix à la baisse"]:
            price = price.replace(phrase, "")
        listing["prix"] = price.strip()
    else:
        listing["prix"] = None

    # Address & date
    spans = page.find_all("span", class_="sc-16573058-17 gLkxLA")
    if len(spans) >= 2:
        address = spans[0].text.strip().split(",")
        listing["ville"] = address[1].strip() if len(address) > 1 else None
        listing["quartier"] = address[0].strip()
        listing["date"] = spans[1].text.strip()
    else:
        listing["ville"] = None
        listing["quartier"] = None
        listing["date"] = None

    # Owner
    owner = page.find("p",class_="sc-1x0vz2r-0 fUTtTl sc-1l0do2b-9 bJuYLD")
    listing["proprietere"] = owner.text.strip() if owner else None

    # Tags
    tags = [t.text.strip() for t in page.find_all("span", class_="sc-1x0vz2r-0 fjZBup")]

    if len(tags) > 0:
        cat_parts = tags[0].split(",")
        listing["category"] = cat_parts[0].strip() if len(cat_parts) > 0 else None
        listing["contrat"] = cat_parts[1].strip() if len(cat_parts) > 1 else None
    else:
        listing["category"] = listing["contrat"] = None

    listing["annee"] = tags[1] if len(tags) > 1 else None
    listing["transmission"] = tags[2] if len(tags) > 2 else None
    listing["carburant"] = tags[3] if len(tags) > 3 else None
    listing["kilometrage"] = tags[4] if len(tags) > 4 else None
    listing["marque"] = tags[5] if len(tags) > 5 else None
    listing["modele"] = tags[6] if len(tags) > 6 else None

    # Images
    listing["images"] = [img.get("src") for img in page.find_all("img", class_="sc-1gjavk-0 fpXQoT") if img.get("src")]
    
    # Append listing to listings array
    listings.append(listing)

    # Waiting a bit so we dont get banned
    elapsed = time.perf_counter() - start
    print(f"page {page_num} scraped in {elapsed:.2f}s")
    time.sleep(1.5)

page 1 scraped in 0.93s
page 2 scraped in 1.57s
page 3 scraped in 0.92s
page 4 scraped in 1.52s
page 5 scraped in 1.50s
page 6 scraped in 1.03s
page 7 scraped in 1.37s
page 8 scraped in 1.09s
page 9 scraped in 0.84s
page 10 scraped in 1.16s
page 11 scraped in 1.10s
page 12 scraped in 1.09s
page 13 scraped in 0.90s
page 14 scraped in 0.94s
page 15 scraped in 0.83s
page 16 scraped in 0.88s
page 17 scraped in 2.14s
page 18 scraped in 1.35s
page 19 scraped in 1.15s
page 20 scraped in 1.43s
page 21 scraped in 3.49s
page 22 scraped in 1.03s
page 23 scraped in 1.48s
page 24 scraped in 1.23s
page 25 scraped in 1.51s
page 26 scraped in 0.96s
page 27 scraped in 1.34s
page 28 scraped in 1.55s
page 29 scraped in 0.97s
page 30 scraped in 2.34s
page 31 scraped in 2.82s
page 32 scraped in 1.50s
page 33 scraped in 1.58s
page 34 scraped in 1.68s
page 35 scraped in 1.16s
page 36 scraped in 1.24s
page 37 scraped in 0.99s
page 38 scraped in 1.56s
page 39 scraped in 2.01s
page 40 scraped in 3.49s
page 41 s

In [69]:
df = pd.DataFrame(listings)
df.to_csv("../data/avito_listings.csv")
df.head(10)

Unnamed: 0,lien,titre annonce,prix,ville,quartier,date,proprietere,category,contrat,annee,transmission,carburant,kilometrage,marque,modele,images
0,https://www.avito.ma/fr/maarif/voitures_d_occa...,Mercedes-Benz Classe A Diesel Automatique 2017,,Casablanca,Maarif,il y a 3 heures,Fajrine Auto,Voitures d'occasion,à vendre,2017.0,Automatique,Diesel,120000.0,Mercedes-Benz,Classe A,[https://content.avito.ma/classifieds/images/1...
1,https://www.avito.ma/fr/hay_el_qods/voitures_d...,"Nouvelle Golf 8,5 importée neuve",,Oujda,Hay El Qods,il y a 2 heures,C ZAM AUTO oujda,Voitures d'occasion,à vendre,2026.0,Automatique,Diesel,0.0,Volkswagen,Golf 8,[https://content.avito.ma/classifieds/images/1...
2,https://www.avito.ma/fr/nouaceur/voitures_de_l...,location de voiture aéroport mohamed 5,199 DH,Nouaceur,Toute la ville,il y a 4 heures,PATH CAR,Voitures de location,à louer,,,,,,,[https://content.avito.ma/classifieds/images/1...
3,https://www.avito.ma/fr/hivernage/voitures_d_o...,Jeep Wrangler Hybride Automatique 2024 à Marra...,,Marrakech,Hivernage,il y a 11 minutes,SELECT GARANTIE,Voitures d'occasion,à vendre,2024.0,Automatique,Hybride,26000.0,Jeep,Wrangler,[https://content.avito.ma/classifieds/images/1...
4,https://www.avito.ma/fr/palmier/voitures_d_occ...,RENAULT AUSTRAL Hybride Automatique 2025,325 000 DH,Casablanca,Palmier,il y a 13 minutes,SIARACASH.MA,Voitures d'occasion,à vendre,2025.0,Automatique,Hybride,5001.0,Renault,Austral,[https://content.avito.ma/classifieds/images/1...
5,https://www.avito.ma/fr/salé/voitures_de_locat...,Location de voiture aéroport Rabat Salé,199 DH,Salé,Toute la ville,il y a 13 minutes,PATH CAR,Voitures de location,à louer,,,,,,,[https://content.avito.ma/classifieds/images/1...
6,https://www.avito.ma/fr/2_mars/voitures_d_occa...,Clio intense diesel,95 000 DH,Casablanca,2 Mars,il y a 13 minutes,LAVAGE AUTO SAAD,Voitures d'occasion,à vendre,2013.0,Manuelle,Diesel,250000.0,Renault,Clio,[https://content.avito.ma/classifieds/images/1...
7,https://www.avito.ma/fr/hay_el_fath/voitures_d...,Renault clio 4,,Rabat,Hay el Fath,il y a 13 minutes,Chaabi Auto,Voitures d'occasion,à vendre,2016.0,Manuelle,Diesel,146.0,Renault,Clio,[https://content.avito.ma/classifieds/images/1...
8,https://www.avito.ma/fr/hay_massira/voitures_d...,Renault Mégane 2019,158 000 DH,Marrakech,Hay Massira,il y a 14 minutes,auto Abderrahim,Voitures d'occasion,à vendre,2019.0,Manuelle,Diesel,110000.0,Renault,Megane,[https://content.avito.ma/classifieds/images/1...
9,https://www.avito.ma/fr/californie/voitures_d_...,Cadillac Escalade Essence Automatique 2018,,Casablanca,Californie,il y a 13 minutes,NT MOTORS LUXURY,Voitures d'occasion,à vendre,2018.0,Automatique,Essence,56000.0,Cadillac,Escalade,[https://content.avito.ma/classifieds/images/1...
