# Libraries import

In [116]:
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
import time
import random
import numpy as np
import re
import os
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.cm as cm
import matplotlib.pyplot as plt

# Data Acquisition
Data Acquisition by Manufacturer and Merging
This code section focuses on acquiring data for different car manufacturers separately and then merging the data into a single file. The code follows a similar process for each manufacturer, such as BMW, Chevrolet, and more.
Manufacturer-Specific Data Acquisition: The code generates URLs for each manufacturer on the Autotrader website, specifying the location, search parameters, and desired number of records. It iterates through multiple pages and extracts the links to individual car pages for the specific manufacturer.
Data Storage: The extracted links for each manufacturer are stored in separate lists or arrays to keep track of the data.
Merging the Data: Once the data acquisition is complete for each manufacturer, the code merges the separate lists or arrays into a single consolidated dataset.
The second part of code iterates through the links to individual car pages.
It constructs the complete URL for each car page and sends a request to retrieve the HTML content.
The code extracts specific details such as the car name and price from the HTML using BeautifulSoup.
If additional details are available, they are also extracted and added to the data list.
If any information is missing or not found, 'N/A' is appended to maintain consistent data structure.
By executing this code, the data list will contain the extracted information for each car, allowing for further analysis, preprocessing, and modeling.

### BMW

In [2]:
linksToPages = []

for i in range(0, 1000, 100):
    if i==0:
        url = "https://www.autotrader.com/cars-for-sale/all-cars/Bmw/san-francisco-ca?searchRadius=75&zip=94102&marketExtension=include&isNewSearch=true&showAccelerateBanner=false&sortBy=relevance&numRecords=100"
      
    else:
        url = f"https://www.autotrader.com/cars-for-sale/all-cars/Bmw/san-francisco-ca?searchRadius=75&zip=94102&marketExtension=include&isNewSearch=true&showAccelerateBanner=false&sortBy=relevance&numRecords=100&firstRecord={i}"

    response = requests.get(url)
    page_source = response.text

    soup = bs(page_source, 'html.parser')
    my_divs = soup.find_all('div', attrs={'class': "inventory-listing cursor-pointer panel panel-default"})

    for div in my_divs:
        for t in div.find_all('a'):
            linksToPages.append(t['href'])
            break
    time.sleep(2)

linksToPages

['/cars-for-sale/vehicledetails.xhtml?listingId=684315378&allListingType=all-cars&makeCodeList=BMW&city=San%20Francisco&state=CA&zip=94102&searchRadius=75&marketExtension=include&isNewSearch=false&showAccelerateBanner=false&sortBy=relevance&numRecords=100&referrer=%2Fcars-for-sale%2Fall-cars%2Fbmw%2Fsan-francisco-ca%3FsearchRadius%3D75%26zip%3D94102%26marketExtension%3Dinclude%26isNewSearch%3Dfalse%26showAccelerateBanner%3Dfalse%26sortBy%3Drelevance%26numRecords%3D100&clickType=listing',
 '/cars-for-sale/vehicledetails.xhtml?listingId=679342003&allListingType=all-cars&makeCodeList=BMW&city=San%20Francisco&state=CA&zip=94102&searchRadius=75&marketExtension=include&isNewSearch=false&showAccelerateBanner=false&sortBy=relevance&numRecords=100&referrer=%2Fcars-for-sale%2Fall-cars%2Fbmw%2Fsan-francisco-ca%3FsearchRadius%3D75%26zip%3D94102%26marketExtension%3Dinclude%26isNewSearch%3Dfalse%26showAccelerateBanner%3Dfalse%26sortBy%3Drelevance%26numRecords%3D100&clickType=listing',
 '/cars-for-sa

In [3]:

data = []
for link in linksToPages:
    if link.startswith('/'):
        url = 'https://www.autotrader.com' + link
        res = requests.get(url)
        time.sleep(2)
        soup = bs(res.content, 'html.parser')
        items = soup.find('ul', attrs={'class': 'list'})
        data_row = []

        # Extract name if it is not None
        name = soup.find('h1')
        if name is not None:
            data_row.append(name.text.strip())
        else:
            data_row.append('N/A')

        # Extract price
        price = soup.find('div', attrs={'data-cmp': 'pricing'})
        if price is not None:
            price_value = price.find('span', attrs={'class': 'first-price'})
            if price_value is not None:
                data_row.append(price_value.text.strip())
            else:
                data_row.append('N/A')
        else:
            data_row.append('N/A')

        # Extract other details if items is not None
        if items is not None:
            for item in items.find_all("li", attrs={'class': 'list-bordered'}):
                data_row.append(item.text.strip())
        else:
            # Append 'N/A' values if no other details are found
            for _ in range(6):
                data_row.append('N/A')

        data.append(data_row)



# Save DataFrame as Excel file
#save_location = r"C:\Users\tal66\Desktop\פרויקט טל ושקד מדעי הנתונים\runs\BMW.xlsx"
#df.to_excel(save_location, index=False)

In [4]:
df = pd.DataFrame(data)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,Used 2020 BMW X1 sDrive28i w/ Convenience Package,29888,"20,119 miles",2.0L 4-Cylinder Turbo Gas Engine,24 City / 33 Highway,Alpine White Exterior,Black Sensatec Interior,8-Speed Automatic Transmission,2 wheel drive - front,
1,Used 2018 BMW 320i Sedan,24997,"27,659 miles",2.0L 4-Cylinder Turbo Gas Engine,24 City / 35 Highway,Mineral White Metallic Exterior,Leather Black/Red Hghlgt/Stitc Leather Seats,8-Speed Automatic Transmission,2 wheel drive - rear,
2,Certified 2020 BMW 228i xDrive w/ Convenience ...,29998,"19,102 miles",2.0L 4-Cylinder Turbo Gas Engine,23 City / 33 Highway,Alpine White Exterior,Black Sensatec Interior,8-Speed Automatic Transmission,All wheel drive,
3,Certified 2020 BMW X1 sDrive28i w/ Convenience...,30999,"21,068 miles",2.0L 4-Cylinder Turbo Gas Engine,24 City / 33 Highway,Mineral White Metallic Exterior,Black Sensatec Interior,8-Speed Automatic Transmission,2 wheel drive - front,
4,Certified 2020 BMW X1 sDrive28i w/ Premium Pac...,31587,"18,865 miles",2.0L 4-Cylinder Turbo Gas Engine,24 City / 33 Highway,Mineral Grey Metallic Exterior,Black Sensatec Interior,8-Speed Automatic Transmission,2 wheel drive - front,
...,...,...,...,...,...,...,...,...,...,...
995,Used 2020 BMW 530e,28699,"79,505 miles",Plug-in Hybrid: Gas/Electric,Information Unavailable,21 EV Mile Range,Mediterranean Blue Metallic Exterior,Canberra Beige/Black Interior,8-Speed Automatic Transmission,2 wheel drive - rear
996,Used 2020 BMW X5 sDrive40i w/ Premium Package,43987,"29,231 miles",3.0L 6-Cylinder Turbo Gas Engine,21 City / 26 Highway,Arctic Gray Metallic Exterior,Ivory White Interior,8-Speed Automatic Transmission,2 wheel drive - rear,
997,Used 2022 BMW M5 w/ Competition Package,102987,"15,364 miles",4.4L 8-Cylinder Turbo Gas Engine,15 City / 21 Highway,Brands Hatch Grey Metallic Exterior,Silverstone Leather Seats,8-Speed Automatic Transmission,All wheel drive,
998,Used 2016 BMW 528i Sedan,19500,"65,657 miles",2.0L 4-Cylinder Turbo Gas Engine,23 City / 34 Highway,Alpine White Exterior,Black Interior,8-Speed Automatic Transmission,2 wheel drive - rear,


### Chevrolet

In [5]:
#chevrolet
linksToPages = []

for i in range(0, 1000, 100):
    if i==0:
        url = "https://www.autotrader.com/cars-for-sale/all-cars/chevrolet/san-francisco-ca?searchRadius=75&zip=94102&marketExtension=include&isNewSearch=true&showAccelerateBanner=false&sortBy=relevance&numRecords=100"
      
    else:
        url = f"https://www.autotrader.com/cars-for-sale/all-cars/chevrolet/san-francisco-ca?searchRadius=75&zip=94102&marketExtension=include&isNewSearch=true&showAccelerateBanner=false&sortBy=relevance&numRecords=100&firstRecord={i}"

    response = requests.get(url)
    page_source = response.text

    soup = bs(page_source, 'html.parser')
    my_divs = soup.find_all('div', attrs={'class': "inventory-listing cursor-pointer panel panel-default"})

    for div in my_divs:
        for t in div.find_all('a'):
            linksToPages.append(t['href'])
            break
    time.sleep(2)

In [6]:
 linksToPages

['/cars-for-sale/vehicledetails.xhtml?listingId=679664548&allListingType=all-cars&makeCodeList=CHEV&city=San%20Francisco&state=CA&zip=94102&searchRadius=75&marketExtension=include&isNewSearch=false&showAccelerateBanner=false&sortBy=relevance&numRecords=100&referrer=%2Fcars-for-sale%2Fall-cars%2Fchevrolet%2Fsan-francisco-ca%3FsearchRadius%3D75%26zip%3D94102%26marketExtension%3Dinclude%26isNewSearch%3Dfalse%26showAccelerateBanner%3Dfalse%26sortBy%3Drelevance%26numRecords%3D100&clickType=listing',
 '/cars-for-sale/vehicledetails.xhtml?listingId=680187123&allListingType=all-cars&makeCodeList=CHEV&city=San%20Francisco&state=CA&zip=94102&searchRadius=75&marketExtension=include&isNewSearch=false&showAccelerateBanner=false&sortBy=relevance&numRecords=100&referrer=%2Fcars-for-sale%2Fall-cars%2Fchevrolet%2Fsan-francisco-ca%3FsearchRadius%3D75%26zip%3D94102%26marketExtension%3Dinclude%26isNewSearch%3Dfalse%26showAccelerateBanner%3Dfalse%26sortBy%3Drelevance%26numRecords%3D100&clickType=listing',


In [7]:

data = []
for link in linksToPages:
    if link.startswith('/'):
        url = 'https://www.autotrader.com' + link
        res = requests.get(url)
        time.sleep(2)
        soup = bs(res.content, 'html.parser')
        items = soup.find('ul', attrs={'class': 'list'})
        data_row = []

        # Extract name if it is not None
        name = soup.find('h1')
        if name is not None:
            data_row.append(name.text.strip())
        else:
            data_row.append('N/A')

        # Extract price
        price = soup.find('div', attrs={'data-cmp': 'pricing'})
        if price is not None:
            price_value = price.find('span', attrs={'class': 'first-price'})
            if price_value is not None:
                data_row.append(price_value.text.strip())
            else:
                data_row.append('N/A')
        else:
            data_row.append('N/A')

        # Extract other details if items is not None
        if items is not None:
            for item in items.find_all("li", attrs={'class': 'list-bordered'}):
                data_row.append(item.text.strip())
        else:
            # Append 'N/A' values if no other details are found
            for _ in range(6):
                data_row.append('N/A')

        data.append(data_row)



# Save DataFrame as Excel file
#save_location = r"C:\Users\tal66\Desktop\פרויקט טל ושקד מדעי הנתונים\runs\chevrolet.xlsx"
#df.to_excel(save_location, index=False)


In [8]:
df = pd.DataFrame(data)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,Used 1999 Chevrolet Corvette Convertible w/ Me...,14750,"61,632 miles",5.7L 8-Cylinder Gas Engine,15 City / 23 Highway,Sebring Silver Metallic Exterior,Black Leather Seats,4-Speed Automatic Transmission,2 wheel drive - rear,
1,Used 2022 Chevrolet Bolt EUV Premier w/ Sun & ...,33495,"6,251 miles",Electric Engine,247 EV Mile Range,Gray Ghost Metallic Exterior,Dark Ash Gray Sky Gray Leather Seats,Single-Speed Transmission,2 wheel drive - front,
2,Used 2019 Chevrolet Corvette Stingray Coupe w/...,50900,"27,262 miles",6.2L 8-Cylinder Gas Engine,15 City / 25 Highway,Ceramic Matrix Gray Metallic Exterior,Jet Black Leather Seats,8-Speed Automatic Transmission,2 wheel drive - rear,
3,Used 2014 Chevrolet Camaro LT,22000,"32,688 miles",3.6L 6-Cylinder Gas Engine,Information Unavailable,Bright Yellow Exterior,Black Interior,6-Speed Automatic Transmission,2 wheel drive - rear,
4,Used 2019 Chevrolet Bolt LT,23987,"23,358 miles",Electric Engine,238 EV Mile Range,Summit White Exterior,Dark Galvanized/Sky Cool Gray Interior,Single-Speed Transmission,2 wheel drive - front,
...,...,...,...,...,...,...,...,...,...,...
995,Used 2015 Chevrolet Silverado 1500 LTZ w/ LTZ ...,29888,"96,671 miles",5.3L 8-Cylinder Gas Engine,16 City / 23 Highway,White Diamond Tricoat Exterior,Dark Ash With Jet Black Leather Seats,6-Speed Automatic Transmission,2 wheel drive - rear,"5'10"" Bed Length (Extra Short)"
996,Used 2017 Chevrolet Silverado 1500 LT w/ All S...,39454,"65,939 miles",5.3L 8-Cylinder Gas Engine,16 City / 22 Highway,Silver Ice Metallic Exterior,Black Interior,6-Speed Automatic Transmission,4 wheel drive,"5'10"" Bed Length (Extra Short)"
997,Used 2017 Chevrolet Volt Premier w/ Driver Con...,21500,"60,375 miles",Plug-in Hybrid: Gas/Electric,43 City / 42 Highway,53 EV Mile Range,Heather Gray Metallic Exterior,Black Interior,Continuously Variable Automatic Transmission,2 wheel drive - front
998,Certified 2019 Chevrolet Traverse LS,28988,"46,773 miles",3.6L 6-Cylinder Gas Engine,18 City / 27 Highway,Silver Ice Metallic Exterior,Jet Black Interior,9-Speed Automatic Transmission,2 wheel drive - front,


### Ford

In [9]:
#Ford
linksToPages = []

for i in range(0, 1000, 100):
    if i==0:
        url = "https://www.autotrader.com/cars-for-sale/all-cars/Ford/san-francisco-ca?searchRadius=75&zip=94102&marketExtension=include&isNewSearch=true&showAccelerateBanner=false&sortBy=relevance&numRecords=100"
      
    else:
        url = f"https://www.autotrader.com/cars-for-sale/all-cars/Ford/san-francisco-ca?searchRadius=75&zip=94102&marketExtension=include&isNewSearch=true&showAccelerateBanner=false&sortBy=relevance&numRecords=100&firstRecord={i}"

    response = requests.get(url)
    page_source = response.text

    soup = bs(page_source, 'html.parser')
    my_divs = soup.find_all('div', attrs={'class': "inventory-listing cursor-pointer panel panel-default"})

    for div in my_divs:
        for t in div.find_all('a'):
            linksToPages.append(t['href'])
            break
    time.sleep(2)

In [10]:
linksToPages

['/cars-for-sale/vehicledetails.xhtml?listingId=684275430&allListingType=all-cars&makeCodeList=FORD&city=San%20Francisco&state=CA&zip=94102&searchRadius=75&marketExtension=include&isNewSearch=false&showAccelerateBanner=false&sortBy=relevance&numRecords=100&referrer=%2Fcars-for-sale%2Fall-cars%2Fford%2Fsan-francisco-ca%3FsearchRadius%3D75%26zip%3D94102%26marketExtension%3Dinclude%26isNewSearch%3Dfalse%26showAccelerateBanner%3Dfalse%26sortBy%3Drelevance%26numRecords%3D100&clickType=listing',
 '/cars-for-sale/vehicledetails.xhtml?listingId=681436062&allListingType=all-cars&makeCodeList=FORD&city=San%20Francisco&state=CA&zip=94102&searchRadius=75&marketExtension=include&isNewSearch=false&showAccelerateBanner=false&sortBy=relevance&numRecords=100&referrer=%2Fcars-for-sale%2Fall-cars%2Fford%2Fsan-francisco-ca%3FsearchRadius%3D75%26zip%3D94102%26marketExtension%3Dinclude%26isNewSearch%3Dfalse%26showAccelerateBanner%3Dfalse%26sortBy%3Drelevance%26numRecords%3D100&clickType=listing',
 '/cars-fo

In [11]:

data = []
for link in linksToPages:
    if link.startswith('/'):
        url = 'https://www.autotrader.com' + link
        res = requests.get(url)
        time.sleep(2)
        soup = bs(res.content, 'html.parser')
        items = soup.find('ul', attrs={'class': 'list'})
        data_row = []

        # Extract name if it is not None
        name = soup.find('h1')
        if name is not None:
            data_row.append(name.text.strip())
        else:
            data_row.append('N/A')

        # Extract price
        price = soup.find('div', attrs={'data-cmp': 'pricing'})
        if price is not None:
            price_value = price.find('span', attrs={'class': 'first-price'})
            if price_value is not None:
                data_row.append(price_value.text.strip())
            else:
                data_row.append('N/A')
        else:
            data_row.append('N/A')

        # Extract other details if items is not None
        if items is not None:
            for item in items.find_all("li", attrs={'class': 'list-bordered'}):
                data_row.append(item.text.strip())
        else:
            # Append 'N/A' values if no other details are found
            for _ in range(6):
                data_row.append('N/A')

        data.append(data_row)



# Save DataFrame as Excel file
#save_location = r"C:\Users\tal66\Desktop\פרויקט טל ושקד מדעי הנתונים\runs\Ford.xlsx"
#df.to_excel(save_location, index=False)


In [12]:
df = pd.DataFrame(data)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,Certified 2020 Ford Edge Titanium w/ Equipment...,31491,"26,278 miles",2.0L 4-Cylinder Turbo Gas Engine,21 City / 28 Highway,Atlas Blue Metallic Exterior,Ebony Leather Seats,8-Speed Automatic Transmission,All wheel drive,View Original Window Sticker,
1,Used 2018 Ford Escape SE w/ SE Sync 3 Package,15498,"74,184 miles",1.5L 4-Cylinder Turbo Gas Engine,23 City / 30 Highway,Ruby Red Metallic Tinted Clearcoat Exterior,Charcoal Black Interior,6-Speed Automatic Transmission,2 wheel drive - front,,
2,Used 2020 Ford Explorer XLT w/ Equipment Group...,32374,"12,155 miles",2.3L 4-Cylinder Turbo Gas Engine,21 City / 28 Highway,Magnetic Exterior,Ebony With Light Slate Uppers Leather Seats,10-Speed Automatic Transmission,2 wheel drive - rear,,
3,Used 2020 Ford Ranger XL w/ Equipment Group 10...,32000,"17,816 miles",2.3L 4-Cylinder Turbo Gas Engine,21 City / 26 Highway,Magnetic Metallic Exterior,Ebony Interior,10-Speed Automatic Transmission,2 wheel drive - rear,"5'1"" Bed Length (Short)",
4,Used 2018 Ford Escape Titanium w/ Ford Safe & ...,22999,"35,309 miles",2.0L 4-Cylinder Turbo Gas Engine,20 City / 27 Highway,Ruby Red Metallic Tinted Clearcoat Exterior,Charcoal Black Leather Seats,6-Speed Automatic Transmission,4 wheel drive,,
...,...,...,...,...,...,...,...,...,...,...,...
995,Used 2015 Ford Transit Connect XLT,21999,"64,804 miles",2.5L 4-Cylinder Gas Engine,21 City / 28 Highway,White Exterior,Black Interior,6-Speed Automatic Transmission,2 wheel drive - front,,
996,Used 2014 Ford Taurus SE,10989,"133,930 miles",3.5L 6-Cylinder Gas Engine,19 City / 28 Highway,Silver Exterior,No Color Interior,6-Speed Automatic Transmission,2 wheel drive - front,,
997,Used 2020 Ford EcoSport SE,18500,"51,270 miles",2.0L 4-Cylinder Gas Engine,23 City / 29 Highway,6-Speed Automatic Transmission,4 wheel drive,,,,
998,Used 2021 Ford Edge SEL,27000,"53,966 miles",2.0L 4-Cylinder Turbo Gas Engine,21 City / 28 Highway,8-Speed Automatic Transmission,All wheel drive,,,,


### GMC

In [13]:
#GMC
linksToPages = []

for i in range(0, 1000, 100):
    if i==0:
        url = "https://www.autotrader.com/cars-for-sale/all-cars/GMC/san-francisco-ca?searchRadius=75&zip=94102&marketExtension=include&isNewSearch=true&showAccelerateBanner=false&sortBy=relevance&numRecords=100"
      
    else:
        url = f"https://www.autotrader.com/cars-for-sale/all-cars/GMC/san-francisco-ca?searchRadius=75&zip=94102&marketExtension=include&isNewSearch=true&showAccelerateBanner=false&sortBy=relevance&numRecords=100&firstRecord={i}"

    response = requests.get(url)
    page_source = response.text

    soup = bs(page_source, 'html.parser')
    my_divs = soup.find_all('div', attrs={'class': "inventory-listing cursor-pointer panel panel-default"})

    for div in my_divs:
        for t in div.find_all('a'):
            linksToPages.append(t['href'])
            break
    time.sleep(2)

In [14]:
linksToPages

['/cars-for-sale/vehicledetails.xhtml?listingId=677176963&allListingType=all-cars&makeCodeList=GMC&city=San%20Francisco&state=CA&zip=94102&searchRadius=75&marketExtension=include&isNewSearch=false&showAccelerateBanner=false&sortBy=relevance&numRecords=100&referrer=%2Fcars-for-sale%2Fall-cars%2Fgmc%2Fsan-francisco-ca%3FsearchRadius%3D75%26zip%3D94102%26marketExtension%3Dinclude%26isNewSearch%3Dfalse%26showAccelerateBanner%3Dfalse%26sortBy%3Drelevance%26numRecords%3D100&clickType=listing',
 '/cars-for-sale/vehicledetails.xhtml?listingId=674234819&allListingType=all-cars&makeCodeList=GMC&city=San%20Francisco&state=CA&zip=94102&searchRadius=75&marketExtension=include&isNewSearch=false&showAccelerateBanner=false&sortBy=relevance&numRecords=100&referrer=%2Fcars-for-sale%2Fall-cars%2Fgmc%2Fsan-francisco-ca%3FsearchRadius%3D75%26zip%3D94102%26marketExtension%3Dinclude%26isNewSearch%3Dfalse%26showAccelerateBanner%3Dfalse%26sortBy%3Drelevance%26numRecords%3D100&clickType=listing',
 '/cars-for-sa

In [15]:



data = []
for link in linksToPages:
    if link.startswith('/'):
        url = 'https://www.autotrader.com' + link
        res = requests.get(url)
        time.sleep(2)
        soup = bs(res.content, 'html.parser')
        items = soup.find('ul', attrs={'class': 'list'})
        data_row = []

        # Extract name if it is not None
        name = soup.find('h1')
        if name is not None:
            data_row.append(name.text.strip())
        else:
            data_row.append('N/A')

        # Extract price
        price = soup.find('div', attrs={'data-cmp': 'pricing'})
        if price is not None:
            price_value = price.find('span', attrs={'class': 'first-price'})
            if price_value is not None:
                data_row.append(price_value.text.strip())
            else:
                data_row.append('N/A')
        else:
            data_row.append('N/A')

        # Extract other details if items is not None
        if items is not None:
            for item in items.find_all("li", attrs={'class': 'list-bordered'}):
                data_row.append(item.text.strip())
        else:
            # Append 'N/A' values if no other details are found
            for _ in range(6):
                data_row.append('N/A')

        data.append(data_row)


# Save DataFrame as Excel file
#save_location = r"C:\Users\tal66\Desktop\פרויקט טל ושקד מדעי הנתונים\runs\GMC.xlsx"
#df.to_excel(save_location, index=False)


In [16]:
df = pd.DataFrame(data)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,Used 2019 GMC Acadia Denali w/ Technology Package,35900,"38,105 miles",3.6L 6-Cylinder Gas Engine,17 City / 25 Highway,Ebony Twilight Metallic Exterior,Jet Black Leather Seats,6-Speed Automatic Transmission,All wheel drive,
1,Used 2017 GMC Acadia Denali w/ Technology Package,25888,"61,915 miles",3.6L 6-Cylinder Gas Engine,18 City / 25 Highway,Black Cherry Metallic Exterior,Jet Black Leather Seats,6-Speed Automatic Transmission,All wheel drive,
2,Certified 2020 GMC Acadia AT4,35974,"28,968 miles",3.6L 6-Cylinder Gas Engine,18 City / 25 Highway,Quicksilver Metallic Exterior,Jet Black Interior,9-Speed Automatic Transmission,All wheel drive,
3,Used 2022 GMC Canyon AT4 w/ Driver Alert Package,41783,"10,053 miles",3.6L 6-Cylinder Gas Engine,17 City / 24 Highway,Summit White Exterior,Jet Black/Kalahari Leather Seats,8-Speed Automatic Transmission,4 wheel drive,"5'1"" Bed Length (Short)"
4,Used 2022 GMC Terrain Denali w/ Denali Premium...,34000,"5,320 miles",1.5L 4-Cylinder Turbo Gas Engine,25 City / 28 Highway,Ebony Twilight Metallic Exterior,Jet Black Leather Seats,9-Speed Automatic Transmission,All wheel drive,
...,...,...,...,...,...,...,...,...,...,...
995,New 2024 GMC Sierra 2500 Pro w/ Convenience Pa...,"51,825MSRP",2 miles,6.6L 8-Cylinder Gas Engine,Information Unavailable,Summit White Exterior,Black Interior,10-Speed Automatic Transmission,4 wheel drive,"8'2"" Bed Length (Long)"
996,New 2023 GMC Canyon Elevation,"41,695MSRP",0 miles,2.7L 4-Cylinder Turbo Gas Engine,Information Unavailable,Summit White Exterior,Jet Black Cloth Seat Trim,8-Speed Automatic Transmission,4 wheel drive,"5'1"" Bed Length (Short)"
997,New 2023 GMC Canyon Elevation,"42,190MSRP",0 miles,2.7L 4-Cylinder Turbo Gas Engine,Information Unavailable,Onyx Black Exterior,Jet Black Cloth Seat Trim,8-Speed Automatic Transmission,4 wheel drive,"5'1"" Bed Length (Short)"
998,New 2023 GMC Canyon Elevation w/ Convenience P...,"44,075MSRP",0 miles,2.7L 4-Cylinder Turbo Gas Engine,Information Unavailable,Summit White Exterior,Jet Black Cloth Seat Trim,8-Speed Automatic Transmission,2 wheel drive - rear,"5'1"" Bed Length (Short)"


### Honda

In [17]:
#Honda
linksToPages = []

for i in range(0, 1000, 100):
    if i==0:
        url = "https://www.autotrader.com/cars-for-sale/all-cars/Honda/san-francisco-ca?searchRadius=75&zip=94102&marketExtension=include&isNewSearch=true&showAccelerateBanner=false&sortBy=relevance&numRecords=100"
      
    else:
        url = f"https://www.autotrader.com/cars-for-sale/all-cars/Honda/san-francisco-ca?searchRadius=75&zip=94102&marketExtension=include&isNewSearch=true&showAccelerateBanner=false&sortBy=relevance&numRecords=100&firstRecord={i}"

    response = requests.get(url)
    page_source = response.text

    soup = bs(page_source, 'html.parser')
    my_divs = soup.find_all('div', attrs={'class': "inventory-listing cursor-pointer panel panel-default"})

    for div in my_divs:
        for t in div.find_all('a'):
            linksToPages.append(t['href'])
            break
    time.sleep(2)

In [18]:
linksToPages

['/cars-for-sale/vehicledetails.xhtml?listingId=684153785&allListingType=all-cars&makeCodeList=HONDA&city=San%20Francisco&state=CA&zip=94102&searchRadius=75&marketExtension=include&isNewSearch=false&showAccelerateBanner=false&sortBy=relevance&numRecords=100&referrer=%2Fcars-for-sale%2Fall-cars%2Fhonda%2Fsan-francisco-ca%3FsearchRadius%3D75%26zip%3D94102%26marketExtension%3Dinclude%26isNewSearch%3Dfalse%26showAccelerateBanner%3Dfalse%26sortBy%3Drelevance%26numRecords%3D100&clickType=listing',
 '/cars-for-sale/vehicledetails.xhtml?listingId=682403898&allListingType=all-cars&makeCodeList=HONDA&city=San%20Francisco&state=CA&zip=94102&searchRadius=75&marketExtension=include&isNewSearch=false&showAccelerateBanner=false&sortBy=relevance&numRecords=100&referrer=%2Fcars-for-sale%2Fall-cars%2Fhonda%2Fsan-francisco-ca%3FsearchRadius%3D75%26zip%3D94102%26marketExtension%3Dinclude%26isNewSearch%3Dfalse%26showAccelerateBanner%3Dfalse%26sortBy%3Drelevance%26numRecords%3D100&clickType=listing',
 '/car

In [19]:

data = []
for link in linksToPages:
    if link.startswith('/'):
        url = 'https://www.autotrader.com' + link
        res = requests.get(url)
        time.sleep(2)
        soup = bs(res.content, 'html.parser')
        items = soup.find('ul', attrs={'class': 'list'})
        data_row = []

        # Extract name if it is not None
        name = soup.find('h1')
        if name is not None:
            data_row.append(name.text.strip())
        else:
            data_row.append('N/A')

        # Extract price
        price = soup.find('div', attrs={'data-cmp': 'pricing'})
        if price is not None:
            price_value = price.find('span', attrs={'class': 'first-price'})
            if price_value is not None:
                data_row.append(price_value.text.strip())
            else:
                data_row.append('N/A')
        else:
            data_row.append('N/A')

        # Extract other details if items is not None
        if items is not None:
            for item in items.find_all("li", attrs={'class': 'list-bordered'}):
                data_row.append(item.text.strip())
        else:
            # Append 'N/A' values if no other details are found
            for _ in range(6):
                data_row.append('N/A')

        data.append(data_row)



# Save DataFrame as Excel file
#save_location = r"C:\Users\tal66\Desktop\פרויקט טל ושקד מדעי הנתונים\runs\Honda.xlsx"
#df.to_excel(save_location, index=False)


In [20]:
df = pd.DataFrame(data)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,Used 2022 Honda Civic Si,34991,"7,833 miles",1.5L 4-Cylinder Turbo Gas Engine,27 City / 37 Highway,Blazing Orange Pearl Exterior,Black Interior,6-Speed Manual Transmission,2 wheel drive - front,
1,Used 2020 Honda CR-V EX,28483,"28,914 miles",1.5L 4-Cylinder Turbo Gas Engine,28 City / 34 Highway,Platinum White Pearl Exterior,Black Interior,Continuously Variable Automatic Transmission,2 wheel drive - front,
2,Used 2014 Honda Civic LX,14975,"56,885 miles",1.8L 4-Cylinder Gas Engine,29 City / 37 Highway,Maroon Exterior,Tan Interior,Continuously Variable Automatic Transmission,2 wheel drive - front,
3,Used 2018 Honda Civic LX,20991,"43,135 miles",2.0L 4-Cylinder Gas Engine,31 City / 40 Highway,White Exterior,Black/Ivory Interior,Continuously Variable Automatic Transmission,2 wheel drive - front,
4,Used 2019 Honda CR-V EX,27991,"38,190 miles",1.5L 4-Cylinder Turbo Gas Engine,27 City / 33 Highway,Modern Steel Metallic Exterior,Gray Interior,Continuously Variable Automatic Transmission,All wheel drive,
...,...,...,...,...,...,...,...,...,...,...
995,Certified 2021 Honda Civic Sport,29995,"22,957 miles",2.0L 4-Cylinder Gas Engine,29 City / 37 Highway,Silver Exterior,Black Interior,Continuously Variable Automatic Transmission,2 wheel drive - front,
996,Certified 2021 Honda Civic Sport,30995,"28,204 miles",1.5L 4-Cylinder Turbo Gas Engine,29 City / 35 Highway,Platinum White Pearl Exterior,Black Interior,Continuously Variable Automatic Transmission,2 wheel drive - front,
997,Certified 2022 Honda Accord Sport,36995,"12,115 miles",Hybrid: Gas/Electric,44 City / 41 Highway,White Exterior,Black Interior,Continuously Variable Automatic Transmission,2 wheel drive - front,
998,Used 2017 Honda CR-V Touring,24995,"75,676 miles",1.5L 4-Cylinder Turbo Gas Engine,28 City / 34 Highway,Continuously Variable Automatic Transmission,2 wheel drive - front,,,


### Jeep

In [21]:
#Jeep
linksToPages = []

for i in range(0, 1000, 100):
    if i==0:
        url = "https://www.autotrader.com/cars-for-sale/all-cars/Jeep/san-francisco-ca?searchRadius=75&zip=94102&marketExtension=include&isNewSearch=true&showAccelerateBanner=false&sortBy=relevance&numRecords=100"
      
    else:
        url = f"https://www.autotrader.com/cars-for-sale/all-cars/Jeep/san-francisco-ca?searchRadius=75&zip=94102&marketExtension=include&isNewSearch=true&showAccelerateBanner=false&sortBy=relevance&numRecords=100&firstRecord={i}"

    response = requests.get(url)
    page_source = response.text

    soup = bs(page_source, 'html.parser')
    my_divs = soup.find_all('div', attrs={'class': "inventory-listing cursor-pointer panel panel-default"})

    for div in my_divs:
        for t in div.find_all('a'):
            linksToPages.append(t['href'])
            break
    time.sleep(2)

In [22]:
linksToPages

['/cars-for-sale/vehicledetails.xhtml?listingId=681942552&allListingType=all-cars&makeCodeList=JEEP&city=San%20Francisco&state=CA&zip=94102&searchRadius=75&marketExtension=include&isNewSearch=false&showAccelerateBanner=false&sortBy=relevance&numRecords=100&referrer=%2Fcars-for-sale%2Fall-cars%2Fjeep%2Fsan-francisco-ca%3FsearchRadius%3D75%26zip%3D94102%26marketExtension%3Dinclude%26isNewSearch%3Dfalse%26showAccelerateBanner%3Dfalse%26sortBy%3Drelevance%26numRecords%3D100&clickType=listing',
 '/cars-for-sale/vehicledetails.xhtml?listingId=678327155&allListingType=all-cars&makeCodeList=JEEP&city=San%20Francisco&state=CA&zip=94102&searchRadius=75&marketExtension=include&isNewSearch=false&showAccelerateBanner=false&sortBy=relevance&numRecords=100&referrer=%2Fcars-for-sale%2Fall-cars%2Fjeep%2Fsan-francisco-ca%3FsearchRadius%3D75%26zip%3D94102%26marketExtension%3Dinclude%26isNewSearch%3Dfalse%26showAccelerateBanner%3Dfalse%26sortBy%3Drelevance%26numRecords%3D100&clickType=listing',
 '/cars-fo

In [23]:

data = []
for link in linksToPages:
    if link.startswith('/'):
        url = 'https://www.autotrader.com' + link
        res = requests.get(url)
        time.sleep(2)
        soup = bs(res.content, 'html.parser')
        items = soup.find('ul', attrs={'class': 'list'})
        data_row = []

        # Extract name if it is not None
        name = soup.find('h1')
        if name is not None:
            data_row.append(name.text.strip())
        else:
            data_row.append('N/A')

        # Extract price
        price = soup.find('div', attrs={'data-cmp': 'pricing'})
        if price is not None:
            price_value = price.find('span', attrs={'class': 'first-price'})
            if price_value is not None:
                data_row.append(price_value.text.strip())
            else:
                data_row.append('N/A')
        else:
            data_row.append('N/A')

        # Extract other details if items is not None
        if items is not None:
            for item in items.find_all("li", attrs={'class': 'list-bordered'}):
                data_row.append(item.text.strip())
        else:
            # Append 'N/A' values if no other details are found
            for _ in range(6):
                data_row.append('N/A')

        data.append(data_row)



# Save DataFrame as Excel file
#save_location = r"C:\Users\tal66\Desktop\פרויקט טל ושקד מדעי הנתונים\runs\Mercedes benz.xlsx"
#df.to_excel(save_location, index=False)


In [24]:
df = pd.DataFrame(data)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,Used 2021 Jeep Wrangler Rubicon,44999,"11,247 miles",3.6L 6-Cylinder Gas Engine,Information Unavailable,Firecracker Red Clearcoat Exterior,Dark Saddle/Black Leather Seats,8-Speed Automatic Transmission,4 wheel drive,,
1,Used 2021 Jeep Grand Cherokee Laredo,27900,"16,220 miles",3.6L 6-Cylinder Gas Engine,19 City / 26 Highway,Bright White Clearcoat Exterior,Black Interior,8-Speed Automatic Transmission,2 wheel drive - rear,,
2,Used 2020 Jeep Gladiator Overland w/ LED Light...,39900,"23,339 miles",3.6L 6-Cylinder Gas Engine,17 City / 22 Highway,Black Clearcoat Exterior,Dark Saddle/Black Leather Seats,8-Speed Automatic Transmission,4 wheel drive,5' Bed Length (Short),
3,Used 2020 Jeep Compass Limited w/ Safety & Sec...,26900,"16,908 miles",2.4L 4-Cylinder Gas Engine,22 City / 30 Highway,Sting-Gray Clearcoat Exterior,Black Leather Seats,9-Speed Automatic Transmission,4 wheel drive,,
4,Used 2018 Jeep Compass Limited w/ Navigation G...,24900,"23,345 miles",2.4L 4-Cylinder Gas Engine,22 City / 30 Highway,Billet Silver Metallic Clearcoat Exterior,Black Leather Seats,9-Speed Automatic Transmission,4 wheel drive,,
...,...,...,...,...,...,...,...,...,...,...,...
995,New 2023 Jeep Grand Cherokee Laredo,48430,20 miles,3.6L 6-Cylinder Gas Engine,19 City / 26 Highway,Baltic Gray Metallic Clearcoat Exterior,Global Black Interior,8-Speed Automatic Transmission,4 wheel drive,,
996,New 2023 Jeep Wrangler Unlimited,48920,10 miles,Plug-in Hybrid: Gas/Electric,Information Unavailable,22 EV Mile Range,White Knuckle Clearcoat Exterior,Black Interior,8-Speed Automatic Transmission,4 wheel drive,
997,New 2023 Jeep Wrangler Unlimited,48920,10 miles,Plug-in Hybrid: Gas/Electric,Information Unavailable,22 EV Mile Range,White Knuckle Clearcoat Exterior,Black Interior,8-Speed Automatic Transmission,4 wheel drive,
998,New 2023 Jeep Wrangler Unlimited,49415,10 miles,Plug-in Hybrid: Gas/Electric,Information Unavailable,22 EV Mile Range,Earl Clearcoat Exterior,Black Interior,8-Speed Automatic Transmission,4 wheel drive,
