Import Required Libraries

In [31]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

Fetch URL Response

In [32]:
url = "https://www.pakwheels.com/new-cars/bmw/"

response = requests.get(url)
response

<Response [200]>

Parse HTML and Find Car Data

In [33]:
data = BeautifulSoup(response.content, "html.parser")
cars = data.find_all(class_="col-md-3")

Extract Links for Cars

In [34]:
links = []

if data.find_all(class_ = "col-md-3"): 
    for tag in data.find_all(class_="show"):
        link = "https://www.pakwheels.com" + tag.get("href")
        links.append(link)

print(links)

['https://www.pakwheels.com/new-cars/bmw/x7/', 'https://www.pakwheels.com/new-cars/bmw/i4/', 'https://www.pakwheels.com/new-cars/bmw/x1-series/', 'https://www.pakwheels.com/new-cars/bmw/2-series/', 'https://www.pakwheels.com/new-cars/bmw/ix3/', 'https://www.pakwheels.com/new-cars/bmw/ix/', 'https://www.pakwheels.com/new-cars/bmw/5-series/', 'https://www.pakwheels.com/new-cars/bmw/x3-series/', 'https://www.pakwheels.com/new-cars/bmw/7-series/', 'https://www.pakwheels.com/new-cars/bmw/x5-series/', 'https://www.pakwheels.com/new-cars/bmw/x2/', 'https://www.pakwheels.com/new-cars/bmw/i5/', 'https://www.pakwheels.com/new-cars/bmw/1-series/', 'https://www.pakwheels.com/new-cars/bmw/6-series/', 'https://www.pakwheels.com/new-cars/bmw/x6-series/', 'https://www.pakwheels.com/new-cars/bmw/z3/', 'https://www.pakwheels.com/new-cars/bmw/z4/', 'https://www.pakwheels.com/new-cars/bmw/8-series/', 'https://www.pakwheels.com/new-cars/bmw/z8/']


Fetch the Web Page, Extract the Title, Extract the Table Data

In [40]:
data_list = []

for link in links:
    response = requests.get(link)
    car_data = BeautifulSoup(response.content, "html.parser")

    # Extract the title from the page
    title = car_data.find("h1")  # Locate the <h2> tag (if it exists)
    title_text = title.text.strip() if title else "No Title"  # Extract and clean the title text
    print(f"Title extracted: {title_text}")

    # Extract table data (if a table is found)
    table = car_data.find("table", class_="table bike-version-detailscont")
    if table:
        table_rows = table.find_all('tr')  # Find all rows in the table
        for row in table_rows:
            table_data = row.find_all('td')  # Find all table data cells
            if len(table_data) >= 2:  # Ensure there are at least two columns
                key = table_data[0].text.strip()  # Extract the key (first column)
                value = table_data[1].text.strip()  # Extract the value (second column)
                data_list.append({"Title": title_text, "Key": key, "Value": value})
            print(f"Added data: {key} -> {value}")
    else:
        # If no table is found, add a placeholder
        data_list.append({"Title": title_text, "Key": "No Data", "Value": "No Data"})
        print("No table data found for this link")

    

Title extracted: BMW X7 2025 Price in Pakistan, Images, Reviews & Specs
Added data: Price -> 2.9 crore
Added data: Body Type -> Crossover
Added data: Dimensions (Length x Width x Height) -> 5171 x 1999 x 1834 mm
Added data: Ground Clearance -> 221 mm
Added data: Displacement -> 2998 cc
Added data: Transmission -> Automatic
Added data: Horse Power -> 335 hp
Added data: Torque -> 447 Nm
Added data: Boot Space -> 300 L
Added data: Kerb Weight -> 2490 KG
Added data: Fuel Type -> Petrol
Added data: Mileage -> 10 - 12 KM/L
Added data: Fuel Tank Capacity -> 83 L
Added data: Seating Capacity -> 7 - Persons
Added data: Top Speed -> 250 KM/H
Added data: Tyre Size -> 285/45/R21
Title extracted: BMW i4 2025 Price in Pakistan, Pictures & Reviews
Added data: Price -> 3.0 - 4.2 crore
Added data: Body Type -> Sedan
Added data: Dimensions (Length x Width x Height) -> 4783 x 1852 x 1448 mm
Added data: Ground Clearance -> 125 mm
Added data: Displacement -> -
Added data: Transmission -> Automatic
Added da

Convert Data to a DataFrame

In [41]:
# Convert the list of dictionaries into a pandas DataFrame
df = pd.DataFrame(data_list)

print("Data converted to DataFrame!")
df.head()  # Display the first few rows of the DataFrame for verification

Data converted to DataFrame!


Unnamed: 0,Title,Key,Value
0,"BMW X7 2025 Price in Pakistan, Images, Reviews...",Price,2.9 crore
1,"BMW X7 2025 Price in Pakistan, Images, Reviews...",Body Type,Crossover
2,"BMW X7 2025 Price in Pakistan, Images, Reviews...",Dimensions (Length x Width x Height),5171 x 1999 x 1834 mm
3,"BMW X7 2025 Price in Pakistan, Images, Reviews...",Ground Clearance,221 mm
4,"BMW X7 2025 Price in Pakistan, Images, Reviews...",Displacement,2998 cc


Save Data to Excel

In [43]:
# Save the DataFrame to an Excel file
output_file = "web_scraped_data.xlsx"
df.to_excel(output_file, index=False)

print(f"Data has been saved to {output_file}")

Data has been saved to web_scraped_data.xlsx


In [44]:
data_dict = {}

for link in links:
    response = requests.get(link)
    car_data = BeautifulSoup(response.content, "html.parser")

    # Extract title
    title = car_data.find("h1")
    title_text = title.text.strip() if title else "No Title"

    # Initialize a dictionary for this car
    car_details = {}

    # Extract table data
    table = car_data.find("table", class_="table bike-version-detailscont")
    if table:
        table_rows = table.find_all('tr')
        for row in table_rows:
            table_data = row.find_all('td')
            if len(table_data) >= 2:  # Ensure there are at least two columns
                key = table_data[0].text.strip()
                value = table_data[1].text.strip()
                car_details[key] = value

    # Add car details to the main dictionary
    data_dict[title_text] = car_details

# Convert the dictionary into a DataFrame
df = pd.DataFrame.from_dict(data_dict, orient='index').reset_index()
df.rename(columns={'index': 'Title'}, inplace=True)

# Save the DataFrame to an Excel file
output_file = "web_scraped_data_restructured.xlsx"
df.to_excel(output_file, index=False)

print(f"Data has been saved to {output_file}")

Data has been saved to web_scraped_data_restructured.xlsx
