# Import Libraries

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import time

# HTTP Request

#### Store website in a variable

In [2]:
website = "https://cars45.com/listing?region_subdomain=abuja"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36"
}

#### Get Request

In [3]:
r = requests.get(website, headers = headers)

#### Response Status Code

In [4]:
r.status_code

200

Since our status_code for r is 200, then all is well

# Making the Soup Object

In [5]:
#This will give me access to the HTML elements of the website and represent it as a nested data structure
#I'm using the "lxml parser because it is faster than the html.

soup=BeautifulSoup(r.content,"lxml") 

In [6]:
print(soup.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
  <link href="https://assets.jiji.ng/static/bundles/css/vue-cars-js.00a4b26d13358d289af5.min.css" rel="stylesheet" type="text/css"/>
  <link href="https://assets.jiji.ng/static/cars45_files/favicon.ico" rel="icon"/>
  <meta content="#58B4AC" name="theme-color"/>
  <title>
   Used Cars For Sale In Nigeria âœ“ Price On Cars45.Com
  </title>
  <meta content="Cars45 âœ“ find the best car ðŸš— for sale in nigeria âœ“ prices from â‚¦ 1,800,000 on cars45.com" name="description"/>
  <meta content="noindex,follow" name="robots"/>
 </head>
 <body>
  <div class="js-gulp-svg-bundle">
  </div>
  <script nonce="FHDnU4adUXYeacirGGkVHl6K1bOozYMZ">
   (function (svgBundleName, svgAjaxUrl) {
            var storageSvgHash = localStorage ? localStorage.getItem("JijiUserSvgHashBundleName") : null;
            var storageSvgBundle = localStorage ? localStorage.getItem("JijiUserSvgBundle") : nu

# Results

In [7]:
#scrape the listings

listings = soup.find_all("a", class_ = "car-feature")

In [8]:
# The first listing on the web page
listings[0].get_text()

'\n\n\n\n\nToyota Corolla 2012 Silver\nâ‚¦ 3,400,000\n\nNigerian Used\n\n\n'

In [9]:
#Total listings on each page
len(listings)

15

## The Data I need to scrape

1. Car Make
2. Year of Manufacture and colour
3. Price
4. Condition_mileage
5. Milage(KM)

#### Car Make

In [13]:
listings[0].find("p", class_ = "car-feature__name").get_text()

'Toyota Corolla 2012 Silver'

#### Price

In [25]:
listings[0].find("p", class_ = "car-feature__amount").get_text().strip()

'â‚¦ 3,400,000'

#### Condition_mileage

In [23]:
listings[0].find("div", class_ = "car-feature__others").get_text().strip()

'Nigerian Used'

## Use a For Loop to put everything together

In [30]:
#I'll create an empty lists for the data

car_make = []
price = []
condition_mileage = []

for i in listings:
    
    #car_make
    try:
        car_make.append(i.find("p", class_ = "car-feature__name").get_text())
    except:
        car_make.append("missing")
        
    #price
    try:
        price.append(i.find("p", class_ = "car-feature__amount").get_text().strip())
    except:
        price.append("missing")
        
    #condition and mileage
    try:
        condition_mileage.append(i.find("div", class_ = "car-feature__others").get_text().strip())
    except:
        condition_mileage.append("missing")

# Create a Pandas DataFrame to store the Data

In [31]:
car_info = pd.DataFrame({"Name": car_make, "Price": price, "Condition_mileage": condition_mileage})

In [32]:
car_info

Unnamed: 0,Name,Price,Condition_mileage
0,Toyota Corolla 2012 Silver,"â‚¦ 3,400,000",Nigerian Used
1,Toyota Camry 2009 Gray,"â‚¦ 2,500,000",Nigerian Used\n454203 km
2,Jeep Wrangler 2013 Unlimited Sport Blue,"â‚¦ 12,000,000",Foreign Used\n53200 km
3,BMW Z4 2010 sDrive28i White,"â‚¦ 5,300,000",Foreign Used\n98157 km
4,Peugeot 207 2008 Gray,"â‚¦ 1,800,000",Foreign Used
5,New Land Rover Range Rover 2020 Blue,"â‚¦ 75,000,000",Brand New\n794 km
6,Peugeot 307 2007 Silver,"â‚¦ 2,000,000",Foreign Used\n120000 km
7,Mercedes-Benz E350 2012 White,"â‚¦ 4,700,000",Nigerian Used\n85000 km
8,Mazda 3 2014 White,"â‚¦ 5,000,000",Foreign Used
9,Infiniti FX35 2005 Base 4x2 (3.5L 6cyl 5A) Blue,"â‚¦ 1,800,000",Nigerian Used


## Data Cleaning

In [33]:
car_info["Price"] = car_info["Price"].apply(lambda x:x.strip("â‚¦"))

In [34]:
car_info

Unnamed: 0,Name,Price,Condition_mileage
0,Toyota Corolla 2012 Silver,3400000,Nigerian Used
1,Toyota Camry 2009 Gray,2500000,Nigerian Used\n454203 km
2,Jeep Wrangler 2013 Unlimited Sport Blue,12000000,Foreign Used\n53200 km
3,BMW Z4 2010 sDrive28i White,5300000,Foreign Used\n98157 km
4,Peugeot 207 2008 Gray,1800000,Foreign Used
5,New Land Rover Range Rover 2020 Blue,75000000,Brand New\n794 km
6,Peugeot 307 2007 Silver,2000000,Foreign Used\n120000 km
7,Mercedes-Benz E350 2012 White,4700000,Nigerian Used\n85000 km
8,Mazda 3 2014 White,5000000,Foreign Used
9,Infiniti FX35 2005 Base 4x2 (3.5L 6cyl 5A) Blue,1800000,Nigerian Used


#### Save as CVS file

In [35]:
car_info.to_csv("car_info_singlepage.csv", index = False)

# Scraping Multiple Pages

In [49]:
# I'll create a for loop to loop through the pages
car_make = []
price = []
condition_mileage = []

for i in range(1,5):
    #assigning the website to a variable
    website = "https://cars45.com/listing/page" + str(i) + "?listing_id=0YcfpLlhS6mDDJRh&region_subdomain=abuja"
    
    #requests to website
    r = requests.get(website, headers = headers)
    
    #make soup object
    soup=BeautifulSoup(r.content,"lxml") 
    
    #scrape the listings from the pages
    listings = soup.find_all("a", class_ = "car-feature")
    
    for i in listings:
    
        #car_make
        try:
            car_make.append(i.find("p", class_ = "car-feature__name").get_text())
        except:
            car_make.append("missing")

        #price
        try:
            price.append(i.find("p", class_ = "car-feature__amount").get_text().strip())
        except:
            price.append("missing")

        #condition and mileage
        try:
            condition_mileage.append(i.find("div", class_ = "car-feature__others").get_text().strip())
        except:
            condition_mileage.append("missing")
        time.sleep(5)

In [50]:
car_info = pd.DataFrame({"Name": car_make, "Price": price, "Condition_mileage": condition_mileage})

In [51]:
car_info

Unnamed: 0,Name,Price,Condition_mileage
0,Mercedes-Benz C250 2015 Black,"â‚¦ 7,800,000",Foreign Used
1,Toyota Corolla 2007 S Blue,"â‚¦ 2,100,000",Nigerian Used
2,Jeep Wrangler 2007 Silver,"â‚¦ 5,000,000",Foreign Used
3,Ford Edge 2014 Beige,"â‚¦ 2,500,000",Nigerian Used\n100000 km
4,Toyota Camry 2013 Gray,"â‚¦ 3,700,000",Nigerian Used
5,Mercedes-Benz E350 2006 Beige,"â‚¦ 3,200,000",Foreign Used
6,New JAC S2 2019 Black,"â‚¦ 6,500,000",Brand New\n16983 km
7,Toyota Camry 2014 Green,"â‚¦ 3,800,000",Nigerian Used\n60542 km
8,Honda Accord 2003 Automatic Gold,"â‚¦ 1,900,000",Foreign Used\n150000 km
9,Toyota Camry 2017 Gray,"â‚¦ 9,000,000",Foreign Used\n56974 km


# Data Cleaning

In [52]:
car_info["Price"] = car_info["Price"].apply(lambda x:x.strip("â‚¦"))

In [53]:
car_info

Unnamed: 0,Name,Price,Condition_mileage
0,Mercedes-Benz C250 2015 Black,7800000,Foreign Used
1,Toyota Corolla 2007 S Blue,2100000,Nigerian Used
2,Jeep Wrangler 2007 Silver,5000000,Foreign Used
3,Ford Edge 2014 Beige,2500000,Nigerian Used\n100000 km
4,Toyota Camry 2013 Gray,3700000,Nigerian Used
5,Mercedes-Benz E350 2006 Beige,3200000,Foreign Used
6,New JAC S2 2019 Black,6500000,Brand New\n16983 km
7,Toyota Camry 2014 Green,3800000,Nigerian Used\n60542 km
8,Honda Accord 2003 Automatic Gold,1900000,Foreign Used\n150000 km
9,Toyota Camry 2017 Gray,9000000,Foreign Used\n56974 km


### Save as csv file

In [55]:
car_info.to_csv("car_info_multiplepages.csv", index = False)