# Miles Per Gallon

There are many different features of a car that can help determine how many miles per gallon it has. In this activity, we will be creating a scatterplot to chart some of these relationships.

* The scatterplot that we want to create will compare 'mpg' to 'horsepower'. A reference image can be found at the bottom of this notebook, but how we go about creating this chart is completely up to you!

    * When reading in the data, there are 6 rows that are missing values in 'horsepower'. It is up to you to figure out what the author put in their place and drop the rows.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

In [2]:
file_name = os.path.join("Resources", "MPG.csv")
car_data = pd.read_csv(file_name)
car_data.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


In [3]:
car_data[car_data['horsepower'] == '?']

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
32,25.0,4,98.0,?,2046,19.0,71,1,ford pinto
126,21.0,6,200.0,?,2875,17.0,74,1,ford maverick
330,40.9,4,85.0,?,1835,17.3,80,2,renault lecar deluxe
336,23.6,4,140.0,?,2905,14.3,80,1,ford mustang cobra
354,34.5,4,100.0,?,2320,15.8,81,2,renault 18i
374,23.0,4,151.0,?,3035,20.5,82,1,amc concord dl


In [8]:
# Remove the rows with missing values in horsepower
car_data_clean = car_data[car_data['horsepower'] != '?']
car_data_clean.head(40)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino
5,15.0,8,429.0,198,4341,10.0,70,1,ford galaxie 500
6,14.0,8,454.0,220,4354,9.0,70,1,chevrolet impala
7,14.0,8,440.0,215,4312,8.5,70,1,plymouth fury iii
8,14.0,8,455.0,225,4425,10.0,70,1,pontiac catalina
9,15.0,8,390.0,190,3850,8.5,70,1,amc ambassador dpl


In [5]:
# Set the 'car name' as our index
car_data_reorg = car_data_clean.set_index("car name")

car_data_reorg.head()

Unnamed: 0_level_0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin
car name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
chevrolet chevelle malibu,18.0,8,307.0,130,3504,12.0,70,1
buick skylark 320,15.0,8,350.0,165,3693,11.5,70,1
plymouth satellite,18.0,8,318.0,150,3436,11.0,70,1
amc rebel sst,16.0,8,304.0,150,3433,12.0,70,1
ford torino,17.0,8,302.0,140,3449,10.5,70,1


In [6]:
# Remove the 'origin' column
car_data_reorg.drop('origin', axis=1, inplace=True)

car_data_reorg.head(100)

Unnamed: 0_level_0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year
car name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
chevrolet chevelle malibu,18.0,8,307.0,130,3504,12.0,70
buick skylark 320,15.0,8,350.0,165,3693,11.5,70
plymouth satellite,18.0,8,318.0,150,3436,11.0,70
amc rebel sst,16.0,8,304.0,150,3433,12.0,70
ford torino,17.0,8,302.0,140,3449,10.5,70
ford galaxie 500,15.0,8,429.0,198,4341,10.0,70
chevrolet impala,14.0,8,454.0,220,4354,9.0,70
plymouth fury iii,14.0,8,440.0,215,4312,8.5,70
pontiac catalina,14.0,8,455.0,225,4425,10.0,70
amc ambassador dpl,15.0,8,390.0,190,3850,8.5,70


In [7]:
# Convert the "horsepower" column to numeric so the data can be used
car_data_reorg['horsepower'] = pd.to_numeric(car_data['horsepower'])
car_data_reorg.dtypes

ValueError: Unable to parse string "?" at position 32

In [9]:
# Create a scatter plot which compares MPG to horsepower with the size being model year