# Web Scraping 

Web scraping is the process of extracting data from websites. It involves using software or tools to access the HTML or other structured data on a web page and then extracting specific information from it.
In this project, we will explore how to extract data from a specific website. [cars.com](https://www.cars.com/)

In [30]:
# Importing Libraries
from bs4 import BeautifulSoup
import pandas as pd
import requests
import time
from concurrent.futures import ThreadPoolExecutor

In [3]:
def scrap() : 
    data_list = []
    with ThreadPoolExecutor(max_workers=5) as executor:
        for i in range(1,51):
            page = requests.get('https://www.cars.com/shopping/results/?dealer_id=&keyword=&list_price_max=&list_price_min=&makes[]=tesla&maximum_distance=30&mileage_max=&models[]=tesla-cybertruck&models[]=tesla-model_3&models[]=tesla-model_s&models[]=tesla-model_x&monthly_payment=&page='+str(i)+'&page_size=100&sort=best_match_desc&stock_type=all&year_max=&year_min=&zip=')
            Soup = BeautifulSoup(page.text ,'html' )
            details_cars_list = Soup.find_all("div" , {"class" : 'vehicle-details'})
            for data in details_cars_list:
                # Extracting data 
                stock_type = data.find(class_="stock-type")
                title = data.find(class_="title")
                mileage = data.find(class_="mileage")
                price = data.find(class_="primary-price") 
                price_drop = data.find(class_="price-drop")
                car_battery = data.find(class_="battery-badge")
                dealer_name = data.find(class_="dealer-name")
                location = data.find(class_="miles-from")
                
                stock_type_txt = stock_type.get_text(strip = True) if stock_type else None
                title_txt = title.get_text(strip = True)  if title else None
                mileage_txt =  mileage.get_text(strip = True) if mileage else None 
                price_txt = price.get_text(strip = True) if price else None
                price_drop_txt = price_drop.get_text(strip = True) if price_drop else None
                car_battery_txt = car_battery.get_text(strip = True) if car_battery else None
                dealer_name_txt = dealer_name.get_text(strip = True) if dealer_name else None
                location_txt = location.get_text(strip = True) if location else None

        
                # adding data to List
                data_list.append({
                  "Stock_type" : stock_type_txt , 
                  "Title" : title_txt ,
                  "Mileage" : mileage_txt , 
                  "Price" : price_txt , 
                  "Price_drop" : price_drop_txt , 
                  "car_battery" : car_battery_txt , 
                  "Dealer_Name" : dealer_name_txt , 
                  "Location" : location_txt 
              })
        time.sleep(1)

      # Creating a DataFrame 
    df = pd.DataFrame(data_list)
    return df

In [4]:
data_cars = scrap()

In [31]:
data_cars

Unnamed: 0,Stock_type,Title,Mileage,Price,Price_drop,car_battery,Dealer_Name,Location
0,,Used 2016 Tesla Model S 85,"88,871 mi.","$22,800",$700 price drop,EV Battery Rating | Excellent,Sunrise Ford of Fontana,"Fontana, CA"
1,,Used 2016 Tesla Model S P100D,"49,685 mi.","$38,450",,EV Battery Rating | Excellent,NextGear Motors,"Raleigh, NC"
2,,Used 2015 Tesla Model S P85D,"146,663 mi.","$23,995",,EV Battery Rating | Excellent,Brown & Brown Wholesale,"Mesa, AZ"
3,,Used 2017 Tesla Model X 100D,"79,892 mi.","$32,888","$2,607 price drop",EV Battery Rating | Excellent,Star Motor Sales,"Downers Grove, IL"
4,,Used 2022 Tesla Model 3 Performance,"26,400 mi.","$42,900",,,,"Horseheads, NY"
...,...,...,...,...,...,...,...,...
4948,Used,2020 Tesla Model X Long Range Dual Motor All-W...,"49,054 mi.","$46,998","$2,000 price drop",EV Battery Rating | Excellent,HGreg.com Doral,"Doral, FL"
4949,Used,2020 Tesla Model S Long Range Dual Motor All-W...,"39,032 mi.","$42,825",,EV Battery Rating | Excellent,Marietta Auto Sales,"Marietta, GA"
4950,Used,2019 Tesla Model 3 Mid Range,"37,780 mi.","$24,495","$1,500 price drop",EV Battery Rating | Excellent,DecentRide.Com,"West Chester, OH"
4951,Used,2021 Tesla Model 3 Standard Range Plus,"66,736 mi.","$22,999","$1,000 price drop",EV Battery Rating | Excellent,Ever,"San Francisco, CA"


In [35]:
# Write DataFrame to a CSV file
data_cars.to_csv(r'C:\Users\HP EliteBook 840 G6\Desktop\Master\S2\ML\data_cars.csv' , index = False)