In [13]:
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd 

In [14]:
# Set up Splinter
browser = Browser('chrome')

In [15]:
# Visit EV Vehicles database site
url = 'https://ev-database.org/uk/compare/newest-upcoming-electric-vehicle#sort:path~type~order=.id~number~desc|range-slider-range:prev~next=0~600|range-slider-towweight:prev~next=0~2500|range-slider-acceleration:prev~next=2~23|range-slider-fastcharge:prev~next=0~1100|range-slider-eff:prev~next=150~500|range-slider-topspeed:prev~next=60~260|paging:currentPage=0|paging:number=all'
browser.visit(url)

In [16]:
# Parse the website
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [17]:
# Find the table
table = soup.find('div', class_='list')
table

<div class="list">
<div class="list-item">
<div class="data-wrapper">
<!-- img -->
<div class="img">
<a href="/uk/car/2033/Volkswagen-ID5-GTX"><img alt="Volkswagen ID.5 GTX" data-src="/img/auto/Volkswagen_ID5_GTX_2023/Volkswagen_ID5_GTX_2023-01-thumb.jpg" data-src-retina="/img/auto/Volkswagen_ID5_GTX_2023/Volkswagen_ID5_GTX_2023-01-thumb@2x.jpg" src="/img/auto/Volkswagen_ID5_GTX_2023/Volkswagen_ID5_GTX_2023-01-thumb@2x.jpg"/></a> </div>
<!-- item-data -->
<div class="title-wrap">
<h2><a class="title" href="/uk/car/2033/Volkswagen-ID5-GTX"><span class="volkswagen">Volkswagen</span> <span class="model">ID.5 GTX</span></a></h2><span class="date_from hidden">1698793200</span><span class="date_to hidden"></span><span class="rank hidden">11</span><span class="id hidden">2033</span> <span class="not-current">(coming soon)</span>
<div class="subtitle">
<span class="BEV hidden">Battery Electric Vehicle</span>
<span class="battery hidden">770</span>
          77 kWh |
          £<span class="pri

In [18]:
# Find the list item
listitem = table.find_all(class_='list-item')
listitem

[<div class="list-item">
 <div class="data-wrapper">
 <!-- img -->
 <div class="img">
 <a href="/uk/car/2033/Volkswagen-ID5-GTX"><img alt="Volkswagen ID.5 GTX" data-src="/img/auto/Volkswagen_ID5_GTX_2023/Volkswagen_ID5_GTX_2023-01-thumb.jpg" data-src-retina="/img/auto/Volkswagen_ID5_GTX_2023/Volkswagen_ID5_GTX_2023-01-thumb@2x.jpg" src="/img/auto/Volkswagen_ID5_GTX_2023/Volkswagen_ID5_GTX_2023-01-thumb@2x.jpg"/></a> </div>
 <!-- item-data -->
 <div class="title-wrap">
 <h2><a class="title" href="/uk/car/2033/Volkswagen-ID5-GTX"><span class="volkswagen">Volkswagen</span> <span class="model">ID.5 GTX</span></a></h2><span class="date_from hidden">1698793200</span><span class="date_to hidden"></span><span class="rank hidden">11</span><span class="id hidden">2033</span> <span class="not-current">(coming soon)</span>
 <div class="subtitle">
 <span class="BEV hidden">Battery Electric Vehicle</span>
 <span class="battery hidden">770</span>
           77 kWh |
           £<span class="price-ran

In [19]:
# Store the following items: Title, PriceRange, Efficiency, RapidCharge, Range, MSRP

Title = []
PriceRange = []
Efficiency = []
Rapidcharge = []
Range = []
MSRP = []

# Loop through list items to build data for items; strip text to leave numbers as integers
for evcar in listitem:
    title = evcar.find(class_='title')
    Title.append(title.text)
    
    pricerange = evcar.find(class_='price-range')
    PriceRange.append(pricerange.text.strip("/mi"))
    
    efficiency = evcar.find(class_='efficiency')
    Efficiency.append(efficiency.text.strip("Wh/mi"))
    
    rapidcharge = evcar.find(class_="fastcharge_speed_print")
    Rapidcharge.append(rapidcharge.text.strip("mph"))
    
    range1 = evcar.find(class_='erange_real')
    Range.append(range1.text.strip("mi"))
    
    msrp = evcar.find(class_="price_buy")
    MSRP.append(msrp.text[0:9].strip("\n£").replace(",",""))

In [20]:
# Create dataframe for collected items from looping
df = pd.DataFrame({
    "Model": Title,
    "PriceRange": PriceRange,
    "Efficiency": Efficiency,
    "Rapidcharge": Rapidcharge,
    "Range": Range,
    "MSRP": MSRP
})
df.head(228)


Unnamed: 0,Model,PriceRange,Efficiency,Rapidcharge,Range,MSRP
0,Volkswagen ID.5 GTX,215,291,390,265,57000
1,Volkswagen ID.5 Pro,182,270,420,285,52000
2,Volkswagen ID.4 GTX,204,296,390,260,53000
3,Volkswagen ID.4 Pro,167,285,400,270,45000
4,Volkswagen ID.4 Pure,223,297,220,175,39000
...,...,...,...,...,...,...
223,BMW i4 eDrive40,181,252,490,320,57890
224,Mercedes EQV 300,463,474,190,190,87995
225,Honda e Advance,356,271,110,105,37395
226,Renault Zoe ZE50 R135,158,274,140,190,29995


In [22]:
# Replace all datatypes as integers for ease of data base creation
df = df.astype({"PriceRange": int, "Efficiency": int, "Rapidcharge": int, "Range": int, "MSRP": int})

In [23]:
# Double check integer types
df.dtypes

Model          object
PriceRange      int32
Efficiency      int32
Rapidcharge     int32
Range           int32
MSRP            int32
dtype: object

In [24]:
# Export CSV file 
df.to_csv('EV_Analysis_Database.csv', index=True)

In [25]:
browser.quit()