# Data Scraping from Carmax

## MA 705 Individual Project

### Jinru Wei
### B10002915

In [1]:
import os
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd

In [2]:
headers = {'User-Agent': 'Safari'}
cars_req = requests.get('https://www.carmax.com/cars', headers = headers)
cars = BeautifulSoup(cars_req.text, 'html.parser')

### Extract the link for the 13 types of cars 

In [3]:
cars_links = cars.select('.carousel__slide a')
cartype_links = ['https://www.carmax.com' + link['href']
                for link in cars_links]

cartype_links = cartype_links[0:13]

In [4]:
cartype_links

['https://www.carmax.com/cars/sport-utilities',
 'https://www.carmax.com/cars/pickup-trucks',
 'https://www.carmax.com/cars/crossovers',
 'https://www.carmax.com/cars/sedans',
 'https://www.carmax.com/cars/coupes',
 'https://www.carmax.com/cars/convertibles',
 'https://www.carmax.com/cars/luxury-vehicles',
 'https://www.carmax.com/cars/sports-cars',
 'https://www.carmax.com/cars/diesel-engines',
 'https://www.carmax.com/cars/minivans-and-vans',
 'https://www.carmax.com/cars/hybrids',
 'https://www.carmax.com/cars/wagons',
 'https://www.carmax.com/cars/electric-vehicles']

### Extract the first part of car information

Year, Make, Model, Price, Milleage, Availability for current store (Norwood)

In [5]:
year_make = []
model_trim = []
price = []
miles = []
transfer = []
t = 0
car_type = []



for cartype_link in cartype_links:
    car_page_req = requests.get(cartype_link, headers=headers)
    car_page = BeautifulSoup(car_page_req.text, 'html.parser')
    t = t + 1
    car_info1 = car_page.find_all("div", class_="car-tile--content")
    for info in car_info1:
        year_make.append(info.find('span', class_='year-make').text)
        model_trim.append(info.find('span', class_='model-trim').text)
        price.append(info.find('span', class_='price').text)
        miles.append(info.find('span', class_='miles').text)
        transfer.append(info.find('span', class_='transfer').text)
        car_type.append(t)

In [6]:
# create dataframe for each type
car_type = pd.DataFrame(car_type)
year_make = pd.DataFrame(year_make)
model_trim = pd.DataFrame(model_trim)
price = pd.DataFrame(price)
miles = pd.DataFrame(miles)
transfer = pd.DataFrame(transfer)

year_make = year_make.iloc[:,0].str.split(expand=True, n=1)

### Extract the second part of car information

Description, Car Features, Transmission, Color, Interior Color, Average Review 

In [7]:
Description = []
Car_features = []
Transmission = []
Color = []
Interior_Color = []
Average_Review = []

for cartype_link in cartype_links:
    car_page_req = requests.get(cartype_link, headers=headers)
    car_page = BeautifulSoup(car_page_req.text, 'html.parser')
    car_info2 = car_page.find_all("div", class_="car-tile--extra-content")
    for detail in car_info2:
        Car_features.append(car_info2[0].find(class_='car-features').text)
        Description.append(detail.select('span')[1].next_sibling)
        Transmission.append(detail.select('span')[2].next_sibling)
        Color.append(detail.select('span')[3].next_sibling)
        Interior_Color.append(detail.select('span')[4].next_sibling)
        Average_Review.append(detail.select('span')[5].next_sibling)

In [8]:
# create dataframe for each type
Description = pd.DataFrame(Description)
Car_features = pd.DataFrame(Car_features)
Transmission = pd.DataFrame(Transmission)
Color = pd.DataFrame(Color)
Interior_Color = pd.DataFrame(Interior_Color)
Average_Review = pd.DataFrame(Average_Review)

### Extract the second part of car information

Image 

In [9]:
Car_img = []

for cartype_link in cartype_links:
    car_page_req = requests.get(cartype_link, headers=headers)
    car_page = BeautifulSoup(car_page_req.text, 'html.parser')
    car_info3 = car_page.find_all("img", class_="loaded")
    for detail in car_info3:
        Car_img.append(detail.get('src'))

In [10]:
Cars3 = pd.DataFrame(Car_img)

### Merge all the car information dataframes

In [11]:
carmax = pd.concat([car_type,year_make, model_trim, price,miles,Average_Review,Transmission,Color,Interior_Color,Car_features,transfer,Description, Cars3],axis=1)

In [12]:
# reset the column names
carmax.columns = ['Type', 'Year', 'Make', 'Model', 'Price', 'Milleage', 'Average Review', 'Transmission', 'Color', 'Interior Color','Features', 'Availability', 'Description', 'Car Image']

In [13]:
carmax

Unnamed: 0,Type,Year,Make,Model,Price,Milleage,Average Review,Transmission,Color,Interior Color,Features,Availability,Description,Car Image
0,1,2011,Honda,CR-V LX,"$14,599",126K mi,(4.506849315068493 reviews)\r\n,Automatic\r\n,Blue\r\n,Gray\r\n,"4WD/AWD, Auxiliary Audio Input, Cruise Control...","Available at your store CarMax Norwood, MA","Used 2011 Honda CR-V LX for sale - $14,599, 1...",https://media-service.carmax.com/img/vehicles/...
1,1,2013,Jeep,Grand Cherokee Laredo,"$23,998",76K mi,(4.451612903225806 reviews)\r\n,Automatic\r\n,Gray\r\n,Black\r\n,"4WD/AWD, Auxiliary Audio Input, Cruise Control...","Available at your store CarMax Norwood, MA",Used 2013 Jeep Grand Cherokee Laredo for sale...,https://media-service.carmax.com/img/vehicles/...
2,1,2017,Dodge,Journey SXT,"$18,998",82K mi,(3.8095238095238098 reviews)\r\n,Automatic\r\n,Gray\r\n,Black\r\n,"4WD/AWD, Auxiliary Audio Input, Cruise Control...","Available at your store CarMax Norwood, MA","Used 2017 Dodge Journey SXT for sale - $18,99...",https://media-service.carmax.com/img/vehicles/...
3,1,2015,Chevrolet,Tahoe LTZ,"$46,998",67K mi,(4.388888888888889 reviews)\r\n,Automatic\r\n,Black\r\n,Black\r\n,"4WD/AWD, Auxiliary Audio Input, Cruise Control...","Available at your store CarMax Norwood, MA","Used 2015 Chevrolet Tahoe LTZ for sale - $46,...",https://media-service.carmax.com/img/vehicles/...
4,1,2019,Jeep,Grand Cherokee SRT,"$70,998",26K mi,(4.714285714285714 reviews)\r\n,Automatic\r\n,Red\r\n,Brown\r\n,"4WD/AWD, Auxiliary Audio Input, Cruise Control...","Available at your store CarMax Norwood, MA",Used 2019 Jeep Grand Cherokee SRT for sale - ...,https://media-service.carmax.com/img/vehicles/...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,13,2021,Tesla,Model 3 Standard Range Plus,"$51,998",4K mi,(0 reviews)\r\n,Automatic\r\n,Gray\r\n,Black\r\n,"4WD/AWD, Leatherette Seats, Parking Sensors, R...","$99 Shipping from CarMax Waterbury, CT",Used 2021 Tesla Model 3 Standard Range Plus f...,https://media-service.carmax.com/img/vehicles/...
293,13,2019,Tesla,Model 3 Performance,"$55,998",19K mi,(0 reviews)\r\n,Automatic\r\n,Gray\r\n,Black\r\n,"4WD/AWD, Leatherette Seats, Parking Sensors, R...","Only available at CarMax Easton, PA",Used 2019 Tesla Model 3 Performance for sale ...,https://media-service.carmax.com/img/vehicles/...
294,13,2018,Tesla,Model 3 Long Range,"$50,998",11K mi,(0 reviews)\r\n,Automatic\r\n,Red\r\n,Black\r\n,"4WD/AWD, Leatherette Seats, Parking Sensors, R...","Only available at CarMax Easton, PA",Used 2018 Tesla Model 3 Long Range for sale -...,https://media-service.carmax.com/img/vehicles/...
295,13,2021,Tesla,Model 3 Performance,"$69,998",2K mi,(0 reviews)\r\n,Automatic\r\n,White\r\n,Black\r\n,"4WD/AWD, Leatherette Seats, Parking Sensors, R...","Coming soon to CarMax Albany, NY",Used 2021 Tesla Model 3 Performance for sale ...,https://media-service.carmax.com/img/vehicles/...


Delete redundant strings in the dataset

In [14]:
carmax['Color'] = carmax['Color'].str.replace('\r\n','')
carmax['Interior Color'] = carmax['Interior Color'].str.replace('\r\n','')
carmax['Transmission'] = carmax['Transmission'].str.replace('\r\n','')
carmax['Average Review'] = carmax['Average Review'].str.replace('\r\n','')
carmax['Average Review'] = carmax['Average Review'].str.replace('(','')
carmax['Average Review'] = carmax['Average Review'].str.replace(')','')
carmax['Average Review'] = carmax['Average Review'].str.replace(' reviews','')

In [15]:
carmax['Type'] =  carmax['Type'].replace(1,'SUV')
carmax['Type'] =  carmax['Type'].replace(2,'Truck')
carmax['Type'] =  carmax['Type'].replace(3,'Crossover')
carmax['Type'] =  carmax['Type'].replace(4,'Sedan')
carmax['Type'] =  carmax['Type'].replace(5,'Coupe')
carmax['Type'] =  carmax['Type'].replace(6,'Convertible')
carmax['Type'] =  carmax['Type'].replace(7,'Luxury')
carmax['Type'] =  carmax['Type'].replace(8,'Sport Car')
carmax['Type'] =  carmax['Type'].replace(9,'Diesel Engine')
carmax['Type'] =  carmax['Type'].replace(10,'Van')
carmax['Type'] =  carmax['Type'].replace(11,'Hybrid')
carmax['Type'] =  carmax['Type'].replace(12,'Wagon')
carmax['Type'] =  carmax['Type'].replace(13,'Electric Vehicle')

In [16]:
carmax

Unnamed: 0,Type,Year,Make,Model,Price,Milleage,Average Review,Transmission,Color,Interior Color,Features,Availability,Description,Car Image
0,SUV,2011,Honda,CR-V LX,"$14,599",126K mi,4.506849315068493,Automatic,Blue,Gray,"4WD/AWD, Auxiliary Audio Input, Cruise Control...","Available at your store CarMax Norwood, MA","Used 2011 Honda CR-V LX for sale - $14,599, 1...",https://media-service.carmax.com/img/vehicles/...
1,SUV,2013,Jeep,Grand Cherokee Laredo,"$23,998",76K mi,4.451612903225806,Automatic,Gray,Black,"4WD/AWD, Auxiliary Audio Input, Cruise Control...","Available at your store CarMax Norwood, MA",Used 2013 Jeep Grand Cherokee Laredo for sale...,https://media-service.carmax.com/img/vehicles/...
2,SUV,2017,Dodge,Journey SXT,"$18,998",82K mi,3.8095238095238098,Automatic,Gray,Black,"4WD/AWD, Auxiliary Audio Input, Cruise Control...","Available at your store CarMax Norwood, MA","Used 2017 Dodge Journey SXT for sale - $18,99...",https://media-service.carmax.com/img/vehicles/...
3,SUV,2015,Chevrolet,Tahoe LTZ,"$46,998",67K mi,4.388888888888889,Automatic,Black,Black,"4WD/AWD, Auxiliary Audio Input, Cruise Control...","Available at your store CarMax Norwood, MA","Used 2015 Chevrolet Tahoe LTZ for sale - $46,...",https://media-service.carmax.com/img/vehicles/...
4,SUV,2019,Jeep,Grand Cherokee SRT,"$70,998",26K mi,4.714285714285714,Automatic,Red,Brown,"4WD/AWD, Auxiliary Audio Input, Cruise Control...","Available at your store CarMax Norwood, MA",Used 2019 Jeep Grand Cherokee SRT for sale - ...,https://media-service.carmax.com/img/vehicles/...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,Electric Vehicle,2021,Tesla,Model 3 Standard Range Plus,"$51,998",4K mi,0,Automatic,Gray,Black,"4WD/AWD, Leatherette Seats, Parking Sensors, R...","$99 Shipping from CarMax Waterbury, CT",Used 2021 Tesla Model 3 Standard Range Plus f...,https://media-service.carmax.com/img/vehicles/...
293,Electric Vehicle,2019,Tesla,Model 3 Performance,"$55,998",19K mi,0,Automatic,Gray,Black,"4WD/AWD, Leatherette Seats, Parking Sensors, R...","Only available at CarMax Easton, PA",Used 2019 Tesla Model 3 Performance for sale ...,https://media-service.carmax.com/img/vehicles/...
294,Electric Vehicle,2018,Tesla,Model 3 Long Range,"$50,998",11K mi,0,Automatic,Red,Black,"4WD/AWD, Leatherette Seats, Parking Sensors, R...","Only available at CarMax Easton, PA",Used 2018 Tesla Model 3 Long Range for sale -...,https://media-service.carmax.com/img/vehicles/...
295,Electric Vehicle,2021,Tesla,Model 3 Performance,"$69,998",2K mi,0,Automatic,White,Black,"4WD/AWD, Leatherette Seats, Parking Sensors, R...","Coming soon to CarMax Albany, NY",Used 2021 Tesla Model 3 Performance for sale ...,https://media-service.carmax.com/img/vehicles/...


Export the csv file

In [17]:
carmax.to_csv('carmax2.csv',index=False)