### Importing Modules

In [1]:
Page = "http://localhost:8000/auto_mpg.html"

In [2]:
import requests

In [3]:
result = requests.get(Page)

In [4]:
type(result)

requests.models.Response

In [5]:
result.status_code

200

In [6]:
result.content[:10]

b'\n<!DOCTYPE'

In [7]:
type(result.content)

bytes

In [8]:
source = result.text

In [9]:
type(source)

str

In [10]:
from bs4 import BeautifulSoup

### Understanding scrapped data

In [11]:
soup = BeautifulSoup(source, 'html.parser')

In [14]:
print(soup.prettify()[:300])

<!DOCTYPE html>
<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Example Car Dataset
  </title>
  <style>
   body {
        background-color: rgb(0, 0, 0);
        color: rgb(211, 211, 211);
        font-family:'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
        margin: 0 20px;
 


In [17]:
soup.title.text

'Example Car Dataset'

In [21]:
len(soup.find_all('div', class_='car_block'))

406

In [22]:
div=soup.find_all('div', class_='car_block')[0]

In [23]:
print(div.prettify())

<div class="car_block" id="car-1">
 <span class="car_name">
  Chevrolet Chevelle Malibu
 </span>
 <span class="from">
  (1970, USA)
 </span>
 <br/>
 Achieves
 <span class="mpg">
  18.0 mpg
 </span>
 with
 <span class="cylinders">
  8
 </span>
 cylinders backed by
 <span class="horsepower">
  130
 </span>
 hp, 307.0 cubic inches of displacement, weighing
 <span class="weight">
  3,504
 </span>
 lbs with 0-60 mph acceleration in
 <span class="acceleration">
  12.0
 </span>
 seconds
</div>



In [24]:
div.text

'Chevrolet Chevelle Malibu (1970, USA)  Achieves 18.0 mpg with 8 cylinders backed by 130 hp, 307.0 cubic inches of displacement, weighing 3,504 lbs with 0-60 mph acceleration in 12.0 seconds'

In [25]:
list(div.stripped_strings)

['Chevrolet Chevelle Malibu',
 '(1970, USA)',
 'Achieves',
 '18.0 mpg',
 'with',
 '8',
 'cylinders backed by',
 '130',
 'hp, 307.0 cubic inches of displacement, weighing',
 '3,504',
 'lbs with 0-60 mph acceleration in',
 '12.0',
 'seconds']

In [26]:
div.find_all('span')

[<span class="car_name">Chevrolet Chevelle Malibu</span>,
 <span class="from">(1970, USA)</span>,
 <span class="mpg">18.0 mpg</span>,
 <span class="cylinders">8</span>,
 <span class="horsepower">130</span>,
 <span class="weight">3,504</span>,
 <span class="acceleration">12.0</span>]

In [27]:
div.find('span',class_='mpg')

<span class="mpg">18.0 mpg</span>

In [28]:
div.find('span',class_='mpg').text

'18.0 mpg'

In [29]:
# importing regular expression
import re

In [30]:
re.findall('.* cubic inches', div.text)

['Chevrolet Chevelle Malibu (1970, USA)  Achieves 18.0 mpg with 8 cylinders backed by 130 hp, 307.0 cubic inches']

In [31]:
re.findall('.* (\d+.\d+) cubic inches', div.text)

['307.0']

In [35]:
mpg_list = list(mpg.text for mpg in soup.find_all('span',class_='mpg'))

In [39]:
print(mpg_list)

['18.0 mpg', '15.0 mpg', '18.0 mpg', '16.0 mpg', '17.0 mpg', '15.0 mpg', '14.0 mpg', '14.0 mpg', '14.0 mpg', '15.0 mpg', '- mpg', '- mpg', '- mpg', '- mpg', '- mpg', '15.0 mpg', '14.0 mpg', '- mpg', '15.0 mpg', '14.0 mpg', '24.0 mpg', '22.0 mpg', '18.0 mpg', '21.0 mpg', '27.0 mpg', '26.0 mpg', '25.0 mpg', '24.0 mpg', '25.0 mpg', '26.0 mpg', '21.0 mpg', '10.0 mpg', '10.0 mpg', '11.0 mpg', '9.0 mpg', '27.0 mpg', '28.0 mpg', '25.0 mpg', '25.0 mpg', '- mpg', '19.0 mpg', '16.0 mpg', '17.0 mpg', '19.0 mpg', '18.0 mpg', '14.0 mpg', '14.0 mpg', '14.0 mpg', '14.0 mpg', '12.0 mpg', '13.0 mpg', '13.0 mpg', '18.0 mpg', '22.0 mpg', '19.0 mpg', '18.0 mpg', '23.0 mpg', '28.0 mpg', '30.0 mpg', '30.0 mpg', '31.0 mpg', '35.0 mpg', '27.0 mpg', '26.0 mpg', '24.0 mpg', '25.0 mpg', '23.0 mpg', '20.0 mpg', '21.0 mpg', '13.0 mpg', '14.0 mpg', '15.0 mpg', '14.0 mpg', '17.0 mpg', '11.0 mpg', '13.0 mpg', '12.0 mpg', '13.0 mpg', '19.0 mpg', '15.0 mpg', '13.0 mpg', '13.0 mpg', '14.0 mpg', '18.0 mpg', '22.0 mpg', '