In [4]:
import requests
from bs4 import BeautifulSoup
import re

# --- 1. YOUR TASK: FILL IN THESE FOUR VALUES ---

# Paste the full URL of the single ad (one that HAS a report).
URL = "https://www.pakwheels.com/used-cars/toyota-corolla-2019-for-sale-in-karachi-10838003" 

# Replace with the exact class names you found.
# e.g., "price-value", "ad-description", "report-link-class"
PRICE_CLASS = "price-box"
DESCRIPTION_CLASS = "YOUR_DESCRIPTION_CLASS_HERE"
REPORT_LINK_CLASS = "btn btn-light-blue fs16" # This should be the class of the <a> tag

# --- 2. THE SCRAPER CODE (No need to change) ---

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

try:
    response = requests.get(URL, headers=headers)
    response.raise_for_status()     # Check if the request was successful
    soup = BeautifulSoup(response.content, "html.parser")

    # --- 3. EXTRACTING THE DATA ---

    # Find the Price
    price_element = soup.find(class_=PRICE_CLASS)
    if price_element:
        print(f"Price: {price_element.get_text(strip=True)}")
    else:
        print(f"Could not find price with class: {PRICE_CLASS}")

    print("Attempting to fetch description via Sibling Navigation...")

    # 1. Find the Header using the ID from your screenshot
    header_element = soup.find(id="scroll_seller_comments")

    if header_element:
        # 2. "Hop" to the very next tag (the <div> in your screenshot)
        description_div = header_element.find_next_sibling("div")
        
        if description_div:
            # We use 'strip=True' to clean up the extra spaces seen in the screenshot
            clean_text = description_div.get_text(separator="\n", strip=True)
            print(f"SUCCESS! Description found:\n{clean_text}")
        else:
            print("Found the header, but there was no sibling div next to it.")
    else:
        print("Could not find the header with ID 'scroll_seller_comments'.")

    # Find the Inspection Report Link
    # We find the <a> tag with the class you provided
    report_link_element = soup.find("a", class_=REPORT_LINK_CLASS)
    
    if report_link_element and report_link_element.has_attr('href'):
        # We extract the 'href' attribute, which is the URL
        report_url = report_link_element['href']
        
        # We need to make sure it's a full URL
        if not report_url.startswith("http"):
            report_url = "https://www.pakwheels.com" + report_url
            
        print(f"\nReport Link Found:\n{report_url}")
    else:
        print(f"\nCould not find report link with class: {REPORT_LINK_CLASS}")

except requests.exceptions.RequestException as e:
    print(f"Error fetching the URL: {e}")

Price: PKR 42.5lacsFinancing starts at PKR 105,535/MonthManaged by PakWheels
Attempting to fetch description via Sibling Navigation...
SUCCESS! Description found:
PakWheels inspected car
Inspection report attached
Number plates available
2nd Owner
Token Tax not Paid
Manufacture 2019
Registered 2019
Documents available
2 keys available
Mention PakWheels.com when calling Seller to get a good deal

Report Link Found:
https://www.pakwheels.com/carsure-reports/fc29c0979dd7ba6b0b7019856f397e35


In [5]:
import requests
from bs4 import BeautifulSoup
import re

# --- 1. FILL IN THE REPORT URL ---
# Use the link from the ad you just found
URL = report_url 

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

try:
    print(f"Visiting report: {URL}...")
    response = requests.get(URL, headers=headers)
    response.raise_for_status()
    soup = BeautifulSoup(response.content, "html.parser")

    # --- SMART SEARCH: FIND THE SCORE PATTERN ---
    # We look for text that looks like "8.5 / 10" or "9.2/10"
    # \d+\.\d+ means "number dot number"
    # \s* means "maybe some spaces"
    # / 10 means "slash ten"
    score_pattern = re.compile(r"(\d+\.\d+)\s*/\s*10")
    
    # We search the whole page text for this pattern
    found_text = soup.find(string=score_pattern)

    if found_text:
        # If we find "Overall Rating 9.2 / 10", we extract just the "9.2"
        match = score_pattern.search(found_text)
        if match:
            print(f"SUCCESS! Overall Score Found: {match.group(1)}")
        else:
            print("Found text with '/ 10' but couldn't extract the number.")
            print(f"Found text: {found_text.strip()}")
            
    else:
        print("Could not find any text matching the pattern 'X.X / 10'")
        
        # specific fallback for PakWheels structure if the regex fails
        # Sometimes the score is just a number inside a specific generic box
        print("Attempting fallback search...")
        rating_box = soup.find(class_="rating-value") # Common class name guess
        if rating_box:
             print(f"Fallback Score Found: {rating_box.get_text(strip=True)}")

except Exception as e:
    print(f"Error: {e}")

Visiting report: https://www.pakwheels.com/carsure-reports/fc29c0979dd7ba6b0b7019856f397e35...
SUCCESS! Overall Score Found: 8.0
