# Bubble Plots

In [1]:
# like a scatter plot, 3rd variable info via size of markers
# can add var info by coloring points based on category

In [2]:
import pandas as pd
import numpy as np
import plotly.offline as pyo
import plotly.graph_objs as go

In [3]:
df = pd.read_csv('mpg.csv')
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


In [4]:
df.shape

(398, 9)

In [5]:
df.columns

Index(['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'model_year', 'origin', 'name'],
      dtype='object')

In [6]:
df.describe()

Unnamed: 0,mpg,cylinders,displacement,weight,acceleration,model_year,origin
count,398.0,398.0,398.0,398.0,398.0,398.0,398.0
mean,23.514573,5.454774,193.425879,2970.424623,15.56809,76.01005,1.572864
std,7.815984,1.701004,104.269838,846.841774,2.757689,3.697627,0.802055
min,9.0,3.0,68.0,1613.0,8.0,70.0,1.0
25%,17.5,4.0,104.25,2223.75,13.825,73.0,1.0
50%,23.0,4.0,148.5,2803.5,15.5,76.0,1.0
75%,29.0,8.0,262.0,3608.0,17.175,79.0,2.0
max,46.6,8.0,455.0,5140.0,24.8,82.0,3.0


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 9 columns):
mpg             398 non-null float64
cylinders       398 non-null int64
displacement    398 non-null float64
horsepower      398 non-null object
weight          398 non-null int64
acceleration    398 non-null float64
model_year      398 non-null int64
origin          398 non-null int64
name            398 non-null object
dtypes: float64(3), int64(4), object(2)
memory usage: 28.1+ KB


In [9]:
# cylinder size for mpg
data = [go.Scatter(x = df['horsepower'],
                 y= df['mpg'],
                 text= df['name'],
                 mode='markers',
                   # multiply cyclinder values by 2 to make bigger
                 marker= dict(size= 2*df['cylinders'] )  )]

layout = go.Layout(title='Engine Cylinders BubbleChart')

fig = go.Figure(data=data, layout=layout)

pyo.plot(fig, filename='EngBubbleChart.html')

'EngBubbleChart.html'

In [10]:
# vehicle weight for mpg

data = [go.Scatter(x = df['horsepower'],
                 y= df['mpg'],
                 text= df['name'],
                 mode='markers',
                   # divide weight by 100 to make smaller
                 marker= dict(size= df['weight']/100 )  )]

layout = go.Layout(title='Engine Weight BubbleChart')

fig = go.Figure(data=data, layout=layout)

pyo.plot(fig, filename='EngBubbleChart2.html')

# the cars that weight the most at bottom right
# also have worst mile per gallon

'EngBubbleChart2.html'

In [16]:
# vehicle weight for mpg
# adding mroe detail for cylinder

data = [go.Scatter(x = df['horsepower'],
                 y= df['mpg'],
                 text= df['name'],
                 mode='markers',
                   # divide weight by 100 to make smaller
                 marker= dict(size= df['weight']/100, 
                              color= df['cylinders'],
                             showscale=True )  )]

layout = go.Layout(title='Engine Weight BubbleChart',
                  xaxis={'title':'HORSEPOWER'},
                  yaxis= dict(title='Miles Per Gallon'))

fig = go.Figure(data=data, layout=layout)

pyo.plot(fig, filename='EngBubbleChart3.html')

# the cars that weight the most at bottom right
# also have worst mile per gallon

'EngBubbleChart3.html'