In [1]:
import requests
import json
import csv
from bs4 import BeautifulSoup
import traceback
import cloudscraper
import pandas as pd
import numpy as np
import seaborn as sns

In [2]:
def get_cars(
  make="BMW", 
  model="5 SERIES", 
  postcode="SW1A 0AA", 
  radius=1500, 
  min_year=1995, 
  max_year=1995, 
  include_writeoff="include", 
  max_attempts_per_page=1, 
  verbose=False,
  min_price= 0,
  max_price = 99999):

    # To bypass Cloudflare protection
    scraper = cloudscraper.create_scraper()

    # Basic variables
    results = []
    n_this_year_results = 0

    url = "https://www.autotrader.co.uk/results-car-search"

    keywords = {}
    keywords["mileage"] = ["miles"]
    keywords["BHP"] = ["BHP"]
    keywords["transmission"] = ["Automatic", "Manual"]
    keywords["fuel"] = [
      "Petrol", 
      "Diesel", 
      "Electric", 
      "Hybrid – Diesel/Electric Plug-in", 
      "Hybrid – Petrol/Electric", 
      "Hybrid – Petrol/Electric Plug-in"
    ]
    keywords["owners"] = ["owners"]
    keywords["body"] = [
      "Coupe", 
      "Convertible", 
      "Estate", 
      "Hatchback", 
      "MPV", 
      "Pickup", 
      "SUV", 
      "Saloon"
    ]
    keywords["ULEZ"] = ["ULEZ"]
    keywords["year"] = [" reg)"]
    keywords["engine"] = ["engine"]

    # Set up parameters for query to autotrader.co.uk
    params = {
        "sort": "relevance",
        "postcode": postcode,
        "price-from" : min_price,
        "price-to" : max_price,
        "radius": radius,
        "make": make,
        "model": model,
        "search-results-price-type": "total-price",
        "search-results-year": "select-year",
    }

    if (include_writeoff == "include"):
        params["writeoff-categories"] = "on"
    elif (include_writeoff == "exclude"):
        params["exclude-writeoff-categories"] = "on"
    elif (include_writeoff == "writeoff-only"):
        params["only-writeoff-categories"] = "on"
        
    year = min_year
    page = 1
    attempt = 1

    try:
        while year <= max_year:
            params["year-from"] = year
            params["year-to"] = year
            params["page"] = page

            r = scraper.get(url, params=params)
            if verbose:
                print("Year:     ", year)
                print("Page:     ", page)
                print("Response: ", r)

            try:
                if r.status_code != 200:   # If not successful (e.g. due to bot protection)
                    attempt = attempt + 1  # Log as an attempt
                    if attempt <= max_attempts_per_page:
                        if verbose:
                            print("Exception. Starting attempt #", attempt, "and keeping at page #", page)
                    else:
                        page = page + 1
                        attempt = 1
                        if verbose:
                            print("Exception. All attempts exhausted for this page. Skipping to next page #", page)

                else:

                    j = r.json()
                    s = BeautifulSoup(j["html"], features="html.parser")

                    articles = s.find_all("article", attrs={"data-standout-type":""})

                    # If no results or reached end of results...
                    if len(articles) == 0 or r.url[r.url.find("page=")+5:] != str(page):
                        if verbose:
                            print("Found total", n_this_year_results, "results for year", year, "across", page-1, "pages")
                            if year+1 <= max_year:
                                print("Moving on to year", year + 1)
                                print("---------------------------------")

                        # Increment year and reset relevant variables
                        year = year + 1
                        page = 1
                        attempt = 1
                        n_this_year_results = 0
                    else:
                        for article in articles:
                            car = {}
                            car["name"] = article.find("h3", {"class": "product-card-details__title"}).text.strip()             
                            car["link"] = "https://www.autotrader.co.uk" + \
                                  article.find("a", {"class": "listing-fpa-link"})["href"][: article.find("a", {"class": "listing-fpa-link"})["href"] \
                                  .find("?")]
                            car["price"] = article.find("div", {"class": "product-card-pricing__price"}).text.strip()

                            seller_info = article.find("ul", {"class": "product-card-seller-info__specs"}).text.strip()
                            car["seller"] = " ".join(seller_info.split())

                            key_specs_bs_list = article.find("ul", {"class": "listing-key-specs"}).find_all("li")
                            
                            for key_spec_bs_li in key_specs_bs_list:

                                key_spec_bs = key_spec_bs_li.text

                                if any(keyword in key_spec_bs for keyword in keywords["mileage"]):
                                    car["mileage"] = int(key_spec_bs[:key_spec_bs.find(" miles")].replace(",",""))
                                elif any(keyword in key_spec_bs for keyword in keywords["BHP"]):
                                    car["BHP"] = int(key_spec_bs[:key_spec_bs.find("BHP")])
                                elif any(keyword in key_spec_bs for keyword in keywords["transmission"]):
                                    car["transmission"] = key_spec_bs
                                elif any(keyword in key_spec_bs for keyword in keywords["fuel"]):
                                    car["fuel"] = key_spec_bs
                                elif any(keyword in key_spec_bs for keyword in keywords["owners"]):
                                    car["owners"] = int(key_spec_bs[:key_spec_bs.find(" owners")])
                                elif any(keyword in key_spec_bs for keyword in keywords["body"]):
                                    car["body"] = key_spec_bs
                                elif any(keyword in key_spec_bs for keyword in keywords["ULEZ"]):
                                    car["ULEZ"] = key_spec_bs
                                elif any(keyword in key_spec_bs for keyword in keywords["year"]):
                                    car["year"] = key_spec_bs
                                elif key_spec_bs[1] == "." and key_spec_bs[3] == "L":
                                    car["engine"] = key_spec_bs

                            results.append(car)
                            n_this_year_results = n_this_year_results + 1

                        page = page + 1
                        attempt = 1

                        if verbose:
                            print("Car count: ", len(results))
                            print("---------------------------------")

            except KeyboardInterrupt:
                break

            except:
                traceback.print_exc()
                attempt = attempt + 1
                if attempt <= max_attempts_per_page:
                    if verbose:
                        print("Exception. Starting attempt #", attempt, "and keeping at page #", page)
                else:
                    page = page + 1
                    attempt = 1
                    if verbose:
                        print("Exception. All attempts exhausted for this page. Skipping to next page #", page)

    except KeyboardInterrupt:
        pass

    return results

In [10]:
#### parameters needed #####
#location 1 and location 2 to compare
#radius for each 
#car price and comparison
#minimum and maximum purchase price 

In [7]:
x = get_cars(min_price = 500,max_price = 10000,make = "",model = "",radius = 50,max_year = 2000, postcode= "MK3 6JS" )

In [12]:
#leave as blank to include all within that range 
#postcode as 10 

make_1="", 
model_1="", 
postcode_1="MK3 6JS", 
radius_1=30, 
min_year_1=2000, 
max_year_1=2010, 
include_writeoff_1="include", 
max_attempts_per_page_1=1, 
verbose_1=False,
min_price_1= 0,
max_price_1 = 10000

make_2="", 
model_2="", 
postcode_2="DT1 3GJ", 
radius_2=30, 
min_year_1=2000, 
max_year_1=2010, 
include_writeoff_2="include", 
max_attempts_per_page_2=1, 
verbose_2=False,
min_price_2= 0,
max_price_2 = 10000



location_1 = get_cars()
location_2 = get_cars()

Unnamed: 0,name,link,price,seller,year,body,mileage,engine,BHP,transmission,fuel,owners
0,Honda Civic,https://www.autotrader.co.uk/car-details/20230...,"£2,475",wimbledon (48 miles),1995 (M reg),Hatchback,53000.0,1.3L,74.0,Manual,Petrol,3.0
1,BMW 3 Series,https://www.autotrader.co.uk/car-details/20230...,"£1,990",4.1 (59 reviews) peterborough (46 miles),1995 (N reg),Convertible,100000.0,2.0L,150.0,Automatic,Petrol,
2,Volvo 850,https://www.autotrader.co.uk/car-details/20230...,"£1,495",reading (46 miles),1995 (M reg),Saloon,156295.0,2.5L,170.0,Automatic,Petrol,
3,Toyota Corolla,https://www.autotrader.co.uk/car-details/20230...,"£2,495",high wycombe (28 miles),1995 (N reg),Hatchback,41000.0,1.6L,113.0,Automatic,Petrol,6.0
4,BMW 5 Series,https://www.autotrader.co.uk/car-details/20230...,"£4,995",4.5 (142 reviews) leicester (48 miles),1995 (M reg),Saloon,85000.0,1.8L,113.0,Manual,Petrol,
...,...,...,...,...,...,...,...,...,...,...,...,...
368,Suzuki Grand Vitara,https://www.autotrader.co.uk/car-details/20230...,"£1,795",leamington spa (39 miles),2000 (W reg),SUV,99700.0,2.5L,142.0,Automatic,Petrol,2.0
369,Jeep Wrangler,https://www.autotrader.co.uk/car-details/20230...,"£6,995",4.9 (205 reviews) london (22 miles),2000 (X reg),SUV,133000.0,4.0L,174.0,Automatic,Petrol,
370,Alfa Romeo 156,https://www.autotrader.co.uk/car-details/20230...,"£5,980",4.8 (14 reviews) london (48 miles),2000 (W reg),Saloon,80000.0,2.5L,190.0,Manual,Petrol,2.0
371,BMW Z3,https://www.autotrader.co.uk/car-details/20230...,"£5,000",warwick (41 miles),2000 (X reg),Convertible,71000.0,2.2L,170.0,Automatic,Petrol,3.0
