# Plotting Mean Car Prices of Manufacturers

<b> Import dependencies </b>

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
from bokeh.io import output_notebook
output_notebook()

<b> Load the automobiles.csv dataset </b>

In [3]:
df = pd.read_csv('Datasets/automobiles.csv')
df.head()

Unnamed: 0,make,fuel-type,num-of-doors,body-style,engine-location,length,width,height,num-of-cylinders,horsepower,peak-rpm,city-mpg,highway-mpg,price
0,alfa-romero,gas,two,convertible,front,168.8,64.1,48.8,four,111,5000,21,27,13495
1,alfa-romero,gas,two,convertible,front,168.8,64.1,48.8,four,111,5000,21,27,16500
2,alfa-romero,gas,two,hatchback,front,171.2,65.5,52.4,six,154,5000,19,26,16500
3,audi,gas,four,sedan,front,176.6,66.2,54.3,four,102,5500,24,30,13950
4,audi,gas,four,sedan,front,176.6,66.4,54.3,five,115,5500,18,22,17450


<b> Import figure and show from Bokeh's plotting interface </b>

In [4]:
from bokeh.plotting import figure, show

<b> Add a new column index to our dataset </b>

In [5]:
df['Index'] = df.index

<b> Create a new figure and plot each car using a scatter plot with the index and price column </b>

In [6]:
plot = figure(title='Car prices', x_axis_label='Car Index', y_axis_label='Price')
plot.scatter(df['Index'], df['price'])
show(plot)

<b> Group the dataset using groupby and the column make. Then use the mean method to get the mean value for each column </b>

In [7]:
grouped_average = df.groupby(['make'], as_index=False).mean()
grouped_average

Unnamed: 0,make,length,width,height,city-mpg,highway-mpg,price,Index
0,alfa-romero,169.6,64.566667,50.0,20.333333,26.666667,15498.333333,1.0
1,audi,184.766667,68.85,54.833333,19.333333,24.5,17859.166667,5.5
2,bmw,184.5,66.475,54.825,19.375,25.375,26118.75,12.5
3,chevrolet,151.933333,62.5,52.4,41.0,46.333333,6007.0,18.0
4,dodge,160.988889,64.166667,51.644444,28.0,34.111111,7875.444444,24.0
5,honda,160.769231,64.384615,53.238462,30.384615,35.461538,8184.692308,35.0
6,isuzu,171.65,63.5,52.45,24.0,29.0,8916.5,42.5
7,jaguar,196.966667,69.933333,51.133333,14.333333,18.333333,34600.0,45.0
8,mazda,170.805882,65.588235,53.358824,25.705882,31.941176,10652.882353,55.0
9,mercedes-benz,195.2625,71.0625,55.725,18.5,21.0,33647.0,67.5


<b> Create a new figure with a title of Car Manufacturer Mean Prices, an x-axis of Car Manufacturer, and a y-label of Mean Price </b>

In [8]:
grouped_plot = figure(title='Car Manufacturer Mean Prices',
                     x_axis_label='Car Manufacturer',
                     y_axis_label='Mean Price',
                     x_range=grouped_average['make'])
grouped_plot.scatter(grouped_average['make'], grouped_average['price'])
show(grouped_plot)

<b> Assign the value of vertical to the xaxis.major_label_orientation attribute of our grouped_plot </b>

In [9]:
grouped_plot.xaxis.major_label_orientation = 'vertical'
show(grouped_plot)

<b> Import and set up a new LinearColorMapper object with a palette of Magma256, and the min and max prices for the low and high arguments </b>

In [10]:
from bokeh.models import LinearColorMapper
color_mapper = LinearColorMapper(palette='Magma256',
                                low=min(grouped_average['price']),
                                high=max(grouped_average['price']))

<b> Create a new figure with the same name, labels, and x_range as before </b>

In [11]:
grouped_colored_plot = figure(title='Car Manufacturer Mean Prices',
                              x_axis_label='Car Manufacturer',
                              y_axis_label='Mean Price',
                              x_range=grouped_average['make'])
grouped_colored_plot.scatter(grouped_average['make'],
                             grouped_average['price'],
                             color={'field': 'y', 'transform': color_mapper}, size=15)
grouped_colored_plot.xaxis.major_label_orientation = "vertical"
show(grouped_colored_plot)

# Extending Plots with Widgets

<b> Enable notebook output using the bokeh.io interface. Import pandas and load the dataset </b>

In [12]:
import pandas as pd
from bokeh.io import output_notebook
output_notebook()

In [13]:
df = pd.read_csv('Datasets/olympia2016_athletes.csv')
df.head()

Unnamed: 0,id,name,nationality,sex,dob,height,weight,sport,gold,silver,bronze
0,736041664,A Jesus Garcia,ESP,male,10/17/69,1.72,64.0,athletics,0,0,0
1,532037425,A Lam Shin,KOR,female,9/23/86,1.68,56.0,fencing,0,0,0
2,435962603,Aaron Brown,CAN,male,5/27/92,1.98,79.0,athletics,0,0,1
3,521041435,Aaron Cook,MDA,male,1/2/91,1.83,80.0,taekwondo,0,0,0
4,33922579,Aaron Gate,NZL,male,11/26/90,1.81,71.0,cycling,0,0,0


<b> Import figure and show from Bokeh and interact and widgets from ipywidgets </b>

In [14]:
from bokeh.plotting import figure, show, ColumnDataSource
from ipywidgets import interact, widgets

<b> Create widgets for IntSlider for the maximum number of athletes (orientation vertical) and IntSlider for the maximum number of medals (orientation horizontal) </b>

In [15]:
countries = df['nationality'].unique()
athletes_per_country = df.groupby('nationality').size()
medals_per_country = df.groupby('nationality')['gold', 'silver','bronze'].sum()

In [16]:
max_medals = medals_per_country.sum(axis=1).max()
max_athletes = athletes_per_country.max() 

In [17]:
max_athletes_slider = widgets.IntSlider(value=max_athletes,
                                        min=0,
                                        max=max_athletes,
                                        step=1,
                                        description='Max. Athletes:',
                                        continuous_update=False,
                                        orientation='vertical',
                                        layout={'width': '100px'})
max_medals_slider = widgets.IntSlider(value=max_medals,
                                      min=0,
                                      max=max_medals,
                                      step=1,
                                      description='Max. Medals:',
                                      continuous_update=False,
                                      orientation='horizontal')

<b> Set up the @interact method </b>

In [18]:
@interact(max_athletes=max_athletes_slider, max_medals=max_medals_slider)
def get_olympia_stats(max_athletes, max_medals):
    show(get_plot(max_athletes, max_medals))

interactive(children=(IntSlider(value=567, continuous_update=False, description='Max. Athletes:', layout=Layou…

<b> Implement the decorator method, move up in the Notebook, and work on the get_plot method </b>

In [19]:
def get_plot(max_athletes, max_medals):
    filtered_countries=[]
    
    for country in countries:
        if (athletes_per_country[country] <= max_athletes and 
            medals_per_country.loc[country].sum() <= max_medals):
            filtered_countries.append(country)
        
    data_source=get_datasource(filtered_countries)
    TOOLTIPS=[
        ('Country', '@countries'),
        ('Num of Athletes', '@y'),
        ('Gold', '@gold'),
        ('Silver', '@silver'),
        ('Bronze', '@bronze')
    ]
    
    plot=figure(title='Rio Olympics 2016 - Medal comparison', 
                x_axis_label='Number of Medals',  
                y_axis_label='Num of Athletes',
                plot_width=800, 
                plot_height=500,
                tooltips=TOOLTIPS)
    
    plot.circle('x', 'y', source=data_source, size=20, color='color', alpha=0.5)
    return plot  

In [20]:
# get random colors
import random
def get_random_color():
    return '#%06x' % random.randint(0, 0xFFFFFF)

<b> Create our DataSource and use it for the tooltips and the printing of the circle glyphs </b>

In [21]:
def get_datasource(filtered_countries):
    return ColumnDataSource(data=dict(
        color=[get_random_color() for _ in filtered_countries],
        countries=filtered_countries,
        gold=[medals_per_country.loc[country]['gold'] for country in filtered_countries],
        silver=[medals_per_country.loc[country]['silver'] for country in filtered_countries],
        bronze=[medals_per_country.loc[country]['bronze'] for country in filtered_countries],
        x=[medals_per_country.loc[country].sum() for country in filtered_countries],
        y=[athletes_per_country.loc[country].sum() for country in filtered_countries]
    )) 

In [22]:
# Executing the decorator again
@interact(max_athletes=max_athletes_slider, max_medals=max_medals_slider)
def get_olympia_stats(max_athletes, max_medals):
    show(get_plot(max_athletes, max_medals))

interactive(children=(IntSlider(value=567, continuous_update=False, description='Max. Athletes:', layout=Layou…