### Imports

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd 

### HTTP Request

#### store website in variable

In [2]:
website = 'https://www.cars.com/shopping/results/?stock_type=cpo&makes%5B%5D=mercedes_benz&models%5B%5D=&list_price_max=&maximum_distance=20&zip='

#### Get Request

In [3]:
response = requests.get(website)

#### Status Code

In [4]:
response.status_code

200

### Soup Object

In [5]:
soup = BeautifulSoup(response.content, 'html.parser')

### Results

In [6]:
results = soup.find_all('div', {'class' : 'vehicle-card'})

In [9]:
len(results)

20

### Target necessary data

In [17]:
# Name
# Mileage
# Dealer Name
# Rating
# Rating Count
# Price

#### Name

In [7]:
results[0].find('h2').get_text()

'2018 Mercedes-Benz AMG GLC 63 S 4MATIC Coupe'

#### Mileage

In [8]:
results[0].find('div', {'class':'mileage'}).get_text()

'57,530 mi.'

#### Dealer Name

In [9]:
results[0].find('div', {'class':'dealer-name'}).get_text().strip()

'Mercedes-Benz of South Orlando'

#### Rating

In [17]:
results[0].find('span', {'class':'sds-rating__count'}).get_text()

'4.7'

#### Review Count

In [10]:
results[0].find('span', {'class':'sds-rating__link'}).get_text()

'(1,362 reviews)'

#### Price

In [19]:
results[0].find('span', {'class':'primary-price'}).get_text()

'$34,999'

### Put everything together inside a For-Loop

In [11]:
name = []
mileage = []
dealer_name = []
rating = []
review_count = []
price = []

for result in results:
    
    # name
    try:
        name.append(result.find('h2').get_text()) 
    except:
        name.append('n/a')
    
    # mileage
    try:
        mileage.append(result.find('div', {'class':'mileage'}).get_text())
    except:
        mileage.append('n/a')
    
    # dealer_name
    try:
        dealer_name.append(result.find('div', {'class':'dealer-name'}).get_text().strip())
    except:
        dealer_name.append('n/a')
        
    # rating
    try:
        rating.append(result.find('span', {'class':'sds-rating__count'}).get_text())
    except:
        rating.append('n/a')
    
    # review_count
    try:
        review_count.append(result.find('span', {'class':'sds-rating__link'}).get_text())
    except:
        review_count.append('n/a')
    
    #price 
    try:
        price.append(result.find('span', {'class':'primary-price'}).get_text())
    except:
        price.append('n/a')

### Create Pandas Dataframe

In [13]:
# dictionary
car_dealer = pd.DataFrame({'Name': name, 
                                'Rating': rating,})

In [14]:
car_dealer

Unnamed: 0,Name,Rating
0,2018 Mercedes-Benz AMG GLC 63 S 4MATIC Coupe,4.5
1,2022 Mercedes-Benz AMG GLB 35 Base,4.7
2,2018 Mercedes-Benz GLS 550 Base 4MATIC,4.8
3,2018 Mercedes-Benz E-Class E 300,4.8
4,2021 Mercedes-Benz S-Class S 580 4MATIC,4.2
5,2019 Mercedes-Benz AMG GT 53 Base,4.0
6,2020 Mercedes-Benz Maybach S 650 Base,4.9
7,2022 Mercedes-Benz AMG GLB 35 Base,4.8
8,2022 Mercedes-Benz AMG GLB 35 Base,4.7
9,2022 Mercedes-Benz GLC 300 Base 4MATIC,4.9


#### Data Cleaning

In [35]:
car_dealer['Review Count'] = car_dealer['Review Count'].apply(lambda x: x.strip('reviews)').strip('('))

In [15]:
# dataframe updated
car_dealer

Unnamed: 0,Name,Rating
0,2018 Mercedes-Benz AMG GLC 63 S 4MATIC Coupe,4.5
1,2022 Mercedes-Benz AMG GLB 35 Base,4.7
2,2018 Mercedes-Benz GLS 550 Base 4MATIC,4.8
3,2018 Mercedes-Benz E-Class E 300,4.8
4,2021 Mercedes-Benz S-Class S 580 4MATIC,4.2
5,2019 Mercedes-Benz AMG GT 53 Base,4.0
6,2020 Mercedes-Benz Maybach S 650 Base,4.9
7,2022 Mercedes-Benz AMG GLB 35 Base,4.8
8,2022 Mercedes-Benz AMG GLB 35 Base,4.7
9,2022 Mercedes-Benz GLC 300 Base 4MATIC,4.9


### Output in Excel

In [28]:
car_dealer.to_excel('car_dealer_single_page.xlsx', index=False)

### Part 2 - Pagination 

In [39]:
name = []
mileage = []
dealer_name = []
rating = []
review_count = []
price = []

for i in range (1,11):
    
    # website in variable
    website = 'https://www.cars.com/shopping/results/?page='+ str(i) +'&page_size=20&dealer_id=&list_price_max=&list_price_min=&makes[]=mercedes_benz&maximum_distance=20&mileage_max=&sort=best_match_desc&stock_type=cpo&year_max=&year_min=&zip=' 
    
    # request to website
    response = requests.get(website)
    
    # soup object
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # results
    results = soup.find_all('div', {'class' : 'vehicle-card'})
    
    # loop through results
    for result in results:
    
        # name
        try:
            name.append(result.find('h2').get_text()) 
        except:
            name.append('n/a')

        # mileage
        try:
            mileage.append(result.find('div', {'class':'mileage'}).get_text())
        except:
            mileage.append('n/a')

        # dealer_name
        try:
            dealer_name.append(result.find('div', {'class':'dealer-name'}).get_text().strip())
        except:
            dealer_name.append('n/a')

        # rating
        try:
            rating.append(result.find('span', {'class':'sds-rating__count'}).get_text())
        except:
            rating.append('n/a')

        # review_count
        try:
            review_count.append(result.find('span', {'class':'sds-rating__link'}).get_text())
        except:
            review_count.append('n/a')

        #price 
        try:
            price.append(result.find('span', {'class':'primary-price'}).get_text())
        except:
            price.append('n/a')

In [24]:
# dictionary
car_dealer = pd.DataFrame({'Name': name,
                                'Rating': rating,})

In [29]:
car_dealer

Unnamed: 0,Name,Rating
0,2018 Mercedes-Benz AMG GLC 63 S 4MATIC Coupe,4.5
1,2022 Mercedes-Benz AMG GLB 35 Base,4.7
2,2018 Mercedes-Benz GLS 550 Base 4MATIC,4.8
3,2018 Mercedes-Benz E-Class E 300,4.8
4,2021 Mercedes-Benz S-Class S 580 4MATIC,4.2
5,2019 Mercedes-Benz AMG GT 53 Base,4.0
6,2020 Mercedes-Benz Maybach S 650 Base,4.9
7,2022 Mercedes-Benz AMG GLB 35 Base,4.8
8,2022 Mercedes-Benz AMG GLB 35 Base,4.7
9,2022 Mercedes-Benz GLC 300 Base 4MATIC,4.9


In [48]:
car_dealer['Review Count'] = car_dealer['Review Count'].apply(lambda x: x.strip('reviews)').strip('('))

In [30]:
car_dealer

Unnamed: 0,Name,Rating
0,2018 Mercedes-Benz AMG GLC 63 S 4MATIC Coupe,4.5
1,2022 Mercedes-Benz AMG GLB 35 Base,4.7
2,2018 Mercedes-Benz GLS 550 Base 4MATIC,4.8
3,2018 Mercedes-Benz E-Class E 300,4.8
4,2021 Mercedes-Benz S-Class S 580 4MATIC,4.2
5,2019 Mercedes-Benz AMG GT 53 Base,4.0
6,2020 Mercedes-Benz Maybach S 650 Base,4.9
7,2022 Mercedes-Benz AMG GLB 35 Base,4.8
8,2022 Mercedes-Benz AMG GLB 35 Base,4.7
9,2022 Mercedes-Benz GLC 300 Base 4MATIC,4.9


In [None]:
file.download('car_dealer_single_page.xlsx')

In [None]:
import os
