<a href="https://colab.research.google.com/github/Rayyan-Portfolio/Gen_Ai/blob/main/Scraping_Data_from_pakwheels.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
from bs4 import BeautifulSoup
import json
import re
import time
import csv

# Base URL for PakWheels used cars
base_url = "https://www.pakwheels.com/used-cars/"

# Headers to mimic a browser visit
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

# Function to extract car listing links
def get_car_links():
    response = requests.get(base_url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")

    car_links = set()  # Use a set to store unique links

    for link in soup.find_all("a"):
        href = link.get("href", "")
        if "/used-cars/" in href and "-for-sale-in-" in href:
            full_link = "https://www.pakwheels.com" + href
            car_links.add(full_link)

    return list(car_links)

# Function to extract car details from a listing
def scrape_car_details(car_url):
    response = requests.get(car_url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")

    script_tag = soup.find("script", {"type": "application/ld+json"})

    if script_tag:
        try:
            car_data = json.loads(script_tag.string)

            location_text = car_data.get("description", "N/A")

            # Extract price using regex (PKR followed by a number)
            price_match = re.search(r"PKR\s([\d\.]+)\s*lacs?", location_text)
            price_lacs = price_match.group(1) if price_match else "N/A"
            price_pkr = str(int(float(price_lacs) * 100000)) if price_lacs != "N/A" else "N/A"

            car_details = {
                "Brand": car_data.get("brand", {}).get("name", "N/A"),
                "Model": car_data.get("name", "N/A"),
                "Year": car_data.get("modelDate", "N/A"),
                "Fuel Type": car_data.get("fuelType", "N/A"),
                "Transmission": car_data.get("vehicleTransmission", "N/A"),
                "Engine Capacity": car_data.get("vehicleEngine", {}).get("engineDisplacement", "N/A"),
                "Mileage": car_data.get("mileageFromOdometer", "N/A"),
                "Condition": car_data.get("itemCondition", "N/A"),
                "Price (Lacs)": price_lacs if price_lacs != "N/A" else "",
                "Price (PKR)": price_pkr if price_pkr != "N/A" else "",
                "Currency": "PKR" if price_lacs != "N/A" else "",
                "Location": location_text.split("for sale in")[-1].strip(),
                "Image URL": car_data.get("image", "N/A"),
                "Listing URL": car_data.get("offers", {}).get("url", car_url),
            }

            # Remove N/A fields
            car_details = {k: v for k, v in car_details.items() if v != "N/A"}

            print("\nScraped Car Details:")
            for key, value in car_details.items():
                print(f"{key}: {value}")

            return car_details

        except json.JSONDecodeError:
            print(f"Error parsing JSON for {car_url}")
            return None
    else:
        print(f"No structured JSON found for {car_url}")
        return None

# Get car links
car_links = get_car_links()

print(f"Total unique car listings found: {len(car_links)}")

# Prepare CSV file
csv_filename = "scraped_cars.csv"
csv_headers = ["Brand", "Model", "Year", "Fuel Type", "Transmission", "Engine Capacity", "Mileage",
               "Condition", "Price (Lacs)", "Price (PKR)", "Currency", "Location", "Image URL", "Listing URL"]

# Open CSV file and write headers
with open(csv_filename, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.DictWriter(file, fieldnames=csv_headers)
    writer.writeheader()

    # Loop through each car listing and scrape details
    for car_url in car_links:
        print(f"\nScraping: {car_url}")
        car_info = scrape_car_details(car_url)

        if car_info:
            # Write car details to CSV
            writer.writerow(car_info)

        time.sleep(2)

print(f"\n✅ Data saved to {csv_filename}")


Total unique car listings found: 24

Scraping: https://www.pakwheels.com/used-cars/united-bravo-2021-for-sale-in-rawalpindi-9784405

Scraped Car Details:
Model: United Bravo Base Grade  2021 for sale in Rawalpindi
Price (Lacs): 13.0
Price (PKR): 1300000
Currency: PKR
Location: United Bravo Base Grade  2021  Used for sale  in Rawalpindi for PKR 13.0 lacs <span></span>. Buy this 800 cc, White 35165 KM Driven, Manual Car. Contact Seller Now!
Listing URL: https://www.pakwheels.com/used-cars/united-bravo-2021-for-sale-in-rawalpindi-9784405

Scraping: https://www.pakwheels.com/used-cars/toyota-raize-2020-for-sale-in-gujranwala-9803834

Scraped Car Details:
Model: Toyota Raize XS 2020 for sale in Gujranwala
Price (Lacs): 49.0
Price (PKR): 4900000
Currency: PKR
Location: Toyota Raize XS 2020  Used for sale  in Gujranwala for PKR 49.0 lacs <span></span>. Buy this 1000 cc, Silver 98000 KM Driven, Automatic Car. Contact Seller Now!
Listing URL: https://www.pakwheels.com/used-cars/toyota-raize-202