#  Day 1 - Web Scraping

## Task: Scraping Tata Cars Data from AckoDrive (https://ackodrive.com/)

In [1]:
## Uncomment the below lines to install the required packages for the task

# !pip install requests
# !pip install beautifulsoup4
# !pip install pandas

## Import required libraries:

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

## Assign target URL:

In [3]:
URL = "https://ackodrive.com/collection/tata-cars/"

## Make GET Request:

In [4]:
response = requests.get(URL)

## Check status code:

In [5]:
response.status_code

200

## Parse the HTML content of the response:

In [6]:
soup = BeautifulSoup(response.content, 'html.parser')

## Find all the car listings:

In [7]:
car_list = soup.find_all('div',{'data-testid':'listingcardesktop'})

## Check the count of cars in the listing:

In [8]:
len(car_list)

16

## Analyze single car listing html data:

In [9]:
print(car_list[13].prettify())

<div class="styles__Wrapper-sc-ad1b3a08-0 iTGjuL" data-testid="listingcardesktop">
 <div class="styles__ModelSection-sc-ad1b3a08-1 cZBYsU">
  <div class="styles__ModelDetails-sc-a6403e05-0 edzyap">
   <a class="styles__AnchorTagWithoutUnderline-sc-a6403e05-33 bXIogR" data-testid="car_model_brand_title" href="https://ackodrive.com/cars/tata-harrier-ev/">
    <div class="styles__ModelStatusWrapper-sc-4db1e074-14 brdlRa">
     <h2 class="styles__ModelTitle-sc-a6403e05-4 ARHdK">
      <span class="styles__Make-sc-a6403e05-5 etWSJY">
       Tata
      </span>
      <span class="styles__ModelName-sc-a6403e05-6 hGuUnc">
       Harrier EV
      </span>
     </h2>
    </div>
   </a>
   <div class="styles__ModelInfo-sc-a6403e05-8 eNUijV">
    <p class="styles__ParaWithoutMargins-sc-a6403e05-34 iAPjDz" data-testid="car_model_body_type">
     SUV
    </p>
    <div class="styles__Dot-sc-a6403e05-9 cNKvzK">
    </div>
    <p class="styles__ParaWithoutMargins-sc-a6403e05-34 iAPjDz" data-testid="car_m

## Initialize data structure for car information:

In [32]:
car_data = {
    "brand": [],
    "model": [],
    "body_type": [],
    "model_seat": [],
    "model_variant": [],
    "price": [],
    "fuel_type": [],
    "transmission_type": [],
    "colour_variants": [],
    "location": []
}

## Extract data from each car listing:

In [33]:
for car in car_list:
    car_data["brand"].append(car.find("h2").find('span', {"class": "styles__Make-sc-a6403e05-5 etWSJY"}).get_text())
    car_data["model"].append(car.find("h2").find('span', {"class": "styles__ModelName-sc-a6403e05-6 hGuUnc"}).get_text())
    car_data["body_type"].append(car.find("p", {"data-testid": "car_model_body_type"}).get_text())
    car_data["model_seat"].append(car.find("p", {"data-testid": "car_model_seat"}).get_text())
    car_data["model_variant"].append(car.find("h3").get_text())
    car_data["price"].append(car.find("div", {"class": "styles__Price-sc-a6403e05-18 bsWAfs"}).get_text())
    car_data["fuel_type"].append(car.find("p", {"data-testid": "car_variant_fuel_type"}).get_text())
    car_data["transmission_type"].append(car.find("p", {"data-testid": "car_variant_transmission"}).get_text())
    car_data["colour_variants"].append(car.find_all("p", {"class": "styles__ParaWithoutMargins-sc-a6403e05-34 iAPjDz"})[5].get_text())
    car_data["location"].append(car.find_all("div", {"class": "styles__CityName-sc-a6403e05-17 fZrHFu"})[0].get_text().split()[1])

## Convert the car data into dataframe:

In [34]:
car_list_df = pd.DataFrame.from_dict(car_data)
car_list_df

Unnamed: 0,brand,model,body_type,model_seat,model_variant,price,fuel_type,transmission_type,colour_variants,location
0,Tata,Punch,SUV,5 Seater,Pure,₹6.71 L,Petrol,Manual,Available in 2 colors,Delhi
1,Tata,Nexon,SUV,5 Seater,1.2 Smart Plus,₹10.02 L,Petrol,Manual,Available in 4 colors,Delhi
2,Tata,Curvv EV,SUV,5 Seater,Accomplished 55,₹20.28 L,Electric,Automatic,Available in 4 colors,Delhi
3,Tata,Nexon EV,SUV,5 Seater,Empowered Plus 45,₹17.94 L,Electric,Automatic,Available in 4 colors,Delhi
4,Tata,Harrier,SUV,5 Seater,Pure Plus S,₹22.14 L,Diesel,Manual,Available in 2 colors,Delhi
5,Tata,Punch EV,SUV,5 Seater,Adventure LR 3.3,₹13.65 L,Electric,Automatic,Available in 4 colors,Delhi
6,Tata,Safari,SUV,7 Seater,Smart,₹18.37 L,Diesel,Manual,Available in 2 colors,Delhi
7,Tata,Curvv,SUV,5 Seater,1.2 Pure Plus,₹13.12 L,Petrol,Manual,Available in 4 colors,Delhi
8,Tata,Tigor,Sedan,5 Seater,XM,₹6.69 L,Petrol,Manual,Available in 2 colors,Delhi
9,Tata,Tiago EV,Hatchback,5 Seater,XT LR,₹10.93 L,Electric,Automatic,Available in 6 colors,Delhi


## Save the data to a csv file:

In [13]:
car_list_df.to_csv('output_ipynb.csv', index=False)