# WEB SCRAPING NOTEBOOK USING BEAUTIFUL SOUP


# Car Dealership Web Scraping
Auther- Riddhi Dinesh Vora

NUID- 002761911

Email- vora.ri@northeastern.edu


### Imports

In [6]:
 # Import the BeautifulSoup library for web scraping.
from bs4 import BeautifulSoup

# Import the requests library for making HTTP requests to web pages.
import requests

# Import the pandas library for data manipulation and analysis.
import pandas as pd

# The code for web scraping and data analysis would be placed here.

### HTTP Request


In [8]:
# Use the 'requests.get()' function to send an HTTP GET request to the specified 'website' URL.
# This function fetches the content of the web page and stores it in the 'response' object.
response = requests.get(website)


### Get Request

In [7]:
# Define the website URL where the search results are located.
website = 'https://www.cars.com/shopping/results/?stock_type=cpo&makes%5B%5D=mercedes_benz&models%5B%5D=&list_price_max=&maximum_distance=20&zip='


### Status Code 

In [11]:
response.status_code

200

### Soup Object

In [12]:
# Parse the HTML content of the response using BeautifulSoup.
soup = BeautifulSoup(response.content, 'html.parser')


In [13]:
# Find and collect all HTML elements with the class 'vehicle-card' using BeautifulSoup.
results = soup.find_all('div', {'class': 'vehicle-card'})

In [14]:
# Calculate the number of elements in the 'results' list, representing the count of vehicle cards found on the web page.
len(results)

21

In [15]:
del results[0]

### Target necessary data

#### Name
#### Mileage
#### Dealer Name
#### Rating
##### Rating Count
#### Price

## Name

In [18]:
# Extract the text content of the first 'h2' element found within the first'results' list.
results[0].find('h2').get_text()


'2020 Mercedes-Benz AMG GT 63 4-Door'

## Mileage

In [19]:
# Retrieve the text content of the 'mileage' element within the first 'results' list.
results[0].find('div', {'class':'mileage'}).get_text()

'20,157 mi.'

## Dealer Name

In [20]:
# Get and clean the text content of the 'dealer-name' element within the first'results'.
results[0].find('div', {'class':'dealer-name'}).get_text().strip()


'Mercedes-Benz of Atlantic City'

## Rating

In [22]:
# Extract the text content of the 'sds-rating__count' span element within the first'results'.
results[0].find('span', {'class':'sds-rating__count'}).get_text()


'4.6'

## Review Count

In [23]:
# Retrieve the text content of the 'sds-rating__link' span element within the first'results'.
results[0].find('span', {'class':'sds-rating__link'}).get_text()



'(1,102 reviews)'

## Price

In [24]:
# Get the text content of the 'primary-price' span element within the first'results'.
results[0].find('span', {'class':'primary-price'}).get_text()



'$109,853'

## Put everything together inside a For-Loop

In [25]:
name = []
mileage = []
dealer_name = []
rating = []
review_count = []
price = []

for result in results:
    
    # name
    try:
        name.append(result.find('h2').get_text()) 
    except:
        name.append('n/a')
    
    # mileage
    try:
        mileage.append(result.find('div', {'class':'mileage'}).get_text())
    except:
        mileage.append('n/a')
    
    # dealer_name
    try:
        dealer_name.append(result.find('div', {'class':'dealer-name'}).get_text().strip())
    except:
        dealer_name.append('n/a')
        
    # rating
    try:
        rating.append(result.find('span', {'class':'sds-rating__count'}).get_text())
    except:
        rating.append('n/a')
    
    # review_count
    try:
        review_count.append(result.find('span', {'class':'sds-rating__link'}).get_text())
    except:
        review_count.append('n/a')
        
        
         #price 
    try:
        price.append(result.find('span', {'class':'primary-price'}).get_text())
    except:
        price.append('n/a')

## Create Pandas Dataframe

In [26]:
# dictionary
car_dealer = pd.DataFrame({'Name': name, 'Mileage':mileage, 'Dealer Name':dealer_name,
                                'Rating': rating, 'Review Count': review_count, 'Price': price})

In [27]:
car_dealer

Unnamed: 0,Name,Mileage,Dealer Name,Rating,Review Count,Price
0,2020 Mercedes-Benz AMG GT 63 4-Door,"20,157 mi.",Mercedes-Benz of Atlantic City,4.6,"(1,102 reviews)","$109,853"
1,2023 Mercedes-Benz E-Class E 350 4MATIC,"2,971 mi.",Mercedes-Benz of Owings Mills,4.9,"(2,315 reviews)","$59,990"
2,2022 Mercedes-Benz E-Class E 350,"6,473 mi.",Mercedes-Benz of Marin,,(0 reviews),"$48,995"
3,2022 Mercedes-Benz AMG GLE 53 Base,"15,015 mi.",Mercedes-Benz of Atlanta Northeast,4.7,"(2,091 reviews)","$88,884"
4,2023 Mercedes-Benz GLE 350 Base 4MATIC,"3,996 mi.",Mercedes-Benz of State College,,(2 reviews),"$61,991"
5,2020 Mercedes-Benz A-Class A 220 4MATIC,"15,603 mi.",Silver Star Motors,4.5,(826 reviews),"$28,899"
6,2020 Mercedes-Benz C-Class C 300 4MATIC,"48,570 mi.",Mercedes-Benz of Elmbrook,4.5,(199 reviews),"$29,995"
7,2020 Mercedes-Benz E-Class E 350 4MATIC,"17,914 mi.",Silver Star Motors,4.5,(826 reviews),"$38,148"
8,2019 Mercedes-Benz E-Class E 300 4MATIC,"35,561 mi.",Feldmann Imports,4.4,(645 reviews),"$32,948"
9,2020 Mercedes-Benz AMG A 35 Base,"16,257 mi.",Mercedes-Benz Manhattan Inc.,3.4,(472 reviews),"$42,900"


## Data Cleaning

In [30]:
# Remove parentheses and whitespace from the 'Review Count' column in the 'car_dealer' DataFrame using a lambda function.
car_dealer['Review Count'] = car_dealer['Review Count'].apply(lambda x: x.strip('reviews)').strip('('))


In [29]:
# dataframe updated
car_dealer

Unnamed: 0,Name,Mileage,Dealer Name,Rating,Review Count,Price
0,2020 Mercedes-Benz AMG GT 63 4-Door,"20,157 mi.",Mercedes-Benz of Atlantic City,4.6,1102,"$109,853"
1,2023 Mercedes-Benz E-Class E 350 4MATIC,"2,971 mi.",Mercedes-Benz of Owings Mills,4.9,2315,"$59,990"
2,2022 Mercedes-Benz E-Class E 350,"6,473 mi.",Mercedes-Benz of Marin,,0,"$48,995"
3,2022 Mercedes-Benz AMG GLE 53 Base,"15,015 mi.",Mercedes-Benz of Atlanta Northeast,4.7,2091,"$88,884"
4,2023 Mercedes-Benz GLE 350 Base 4MATIC,"3,996 mi.",Mercedes-Benz of State College,,2,"$61,991"
5,2020 Mercedes-Benz A-Class A 220 4MATIC,"15,603 mi.",Silver Star Motors,4.5,826,"$28,899"
6,2020 Mercedes-Benz C-Class C 300 4MATIC,"48,570 mi.",Mercedes-Benz of Elmbrook,4.5,199,"$29,995"
7,2020 Mercedes-Benz E-Class E 350 4MATIC,"17,914 mi.",Silver Star Motors,4.5,826,"$38,148"
8,2019 Mercedes-Benz E-Class E 300 4MATIC,"35,561 mi.",Feldmann Imports,4.4,645,"$32,948"
9,2020 Mercedes-Benz AMG A 35 Base,"16,257 mi.",Mercedes-Benz Manhattan Inc.,3.4,472,"$42,900"
