# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [1]:
import pandas as pd
import numpy as np

import plotly.plotly as py
import cufflinks as cf

import seaborn as sns
import matplotlib.pyplot as plt

cf.go_offline()

import ipywidgets as widgets
from ipywidgets import interact, interact_manual
from IPython.display import display

In [2]:
#os.chdir(r'../data') #choose the directory to look in

data = pd.read_excel('../data/Online Retail.xlsx')

In [3]:
data.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [4]:
data_bar = data[(data['InvoiceDate']>='2011-04-01')&(data['InvoiceDate']<='2011-04-30')&
                (data['Country']!='United Kingdom')]

data_bar = data_bar.groupby('Country', as_index=False).agg({'Quantity':'sum', 'Revenue':'sum'})
data_bar.iplot(kind='bar', x='Country', xTitle='Country', title="Quantity&Revenue by country for 04-2011 excl UK")

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [5]:
data_line = data[(data['InvoiceDate']>='2011-01-01')&(data['InvoiceDate']<='2011-05-31')
                 &(data['Country']=='France')]
data_line = data_line.groupby('InvoiceDate', as_index=False).agg({'Quantity':'sum', 'Revenue':'sum'})

data_line.iplot(kind='line', x='InvoiceDate', xTitle='Invoice Date')

## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [6]:
data_scatter = data[data['Description']=='PARTY BUNTING']
data_scatter = data_scatter.groupby('Country', as_index=False).agg({'Quantity':'mean', 'UnitPrice':'mean'})

data_scatter.iplot(x='Quantity', y='UnitPrice', categories='Country',
           xTitle='Average quantity', yTitle='Average Unit Price',
           title='Average quantity vs Average Unit Price per Country')

## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [7]:
data_hist_set = data.pivot_table(values='Quantity', columns='Country', index='InvoiceNo', aggfunc='sum')
data_hist_set = data_hist_set[['EIRE', 'Germany', 'France', 'Netherlands']]

data_hist_set.iplot(kind='hist', histnorm='percent', xTitle='InvoiceNo', subplots=True,
                    yTitle='Quantity', title='Quantity per invoice')

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [8]:
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

country_list = ['EIRE', 'Germany', 'France', 'Netherlands']


In [9]:
data_bar_side = data[(data['Country'].isin(country_list))&(data['Description'].isin(product_list))]
data_bar_side = data_bar_side.pivot_table(values='Revenue', columns='Country', 
                         index='Description', aggfunc='sum')

data_bar_side.reset_index(inplace=True)
data_bar_side
data_bar_side.iplot(kind='bar', x='Description', xTitle='Description', 
           yTitle='Revenue', title='Revenue by product by country')

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [10]:
data['Year'] = pd.DatetimeIndex(data['InvoiceDate']).year
data['Month'] = pd.DatetimeIndex(data['InvoiceDate']).month
data['Day'] = pd.DatetimeIndex(data['InvoiceDate']).day

uk = data[data['Country']=='United Kingdom']
uk.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country,Year,Month,Day
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010,12,1
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010,12,1
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010,12,1
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom,2010,12,1
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom,2010,12,1


In [11]:
# Create dataframe that aggregates the quantity per day:
data_uk = uk.groupby(['Year','Month','Day']).agg({'Quantity':'sum'}).reset_index()

In [12]:
data_uk.iplot(kind='line', x=['Year','Month','Day'], xTitle='Date',
            yTitle='Quantity', title='Quantity by day for UK')

In [25]:
# Long version, mine:
# Initialise the dropdown:
output = widgets.Output()
plot_output = widgets.Output()

dropdown_year = widgets.Dropdown(options = data_uk.Year.unique())
dropdown_month = widgets.Dropdown(options = data_uk.Month.unique())

# Create the observer handler to filter the dataframe by the selected values; The input argument of the handler, 
# change, contains information about the changes that took place to access the new value:

def common_filtering(year, month):
    output.clear_output()
    plot_output.clear_output()
    common_filter = data_uk[(data_uk.Year == year) & (data_uk.Month == month)]
        
    with output:
        display(common_filter)
        
    with plot_output:
        sns.lineplot(x=data_uk['Day'], y=data_uk['Quantity'], data=data_uk)
        
        #data_uk.iplot(kind='line', x=data_uk['Day'], y=data_uk['Quantity'], xTitle='Date', 
         #             yTitle='Quantity', title='Quantity by day for UK')
        plt.show() 
        
def dropdown_year_eventhandler(change):
    common_filtering(change.new, dropdown_month.value)
    
def dropdown_month_eventhandler(change):
    common_filtering(dropdown_year.value, change.new)

# Bind the handler to the dropdown:
dropdown_year.observe(dropdown_year_eventhandler, names='value')
dropdown_month.observe(dropdown_month_eventhandler, names='value')   
    
display(dropdown_year)
display(dropdown_month)
display(plot_output)

Dropdown(options=(2010, 2011), value=2010)

Dropdown(options=(12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), value=12)

Output()

In [23]:
# Correct solution:

@interact(Year=uk['Year'].unique(), 
          Month=uk['Month'].unique())

def linechart(Year, Month):
    df = uk[(uk['Year']==Year) & (uk['Month']==Month)]
    grouped = df.groupby('Day').agg('sum')['Quantity']
    grouped.iplot(kind='line', title='UK Sales Quantity by Day')

interactive(children=(Dropdown(description='Year', options=(2010, 2011), value=2010), Dropdown(description='Mo…

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [85]:
agg_func = {'InvoiceNo':'nunique',
            'Quantity':'sum',
            'UnitPrice':'mean',
            'Revenue':'sum',
            'CustomerID':'nunique'}

products = uk.groupby('Description').agg(agg_func).reset_index()

In [86]:
products.head()

Unnamed: 0,Description,InvoiceNo,Quantity,UnitPrice,Revenue,CustomerID
0,4 PURPLE FLOCK DINNER CANDLES,35,134,2.318421,255.46,30
1,50'S CHRISTMAS GIFT BAG LARGE,100,1721,1.2479,2067.25,98
2,DOLLY GIRL BEAKER,100,661,1.25,826.25,77
3,I LOVE LONDON MINI BACKPACK,55,181,4.15,751.15,46
4,NINE DRAWER OFFICE TIDY,25,44,14.761538,628.4,24


In [108]:
@interact(Invoices=(products['InvoiceNo'].min(), products['InvoiceNo'].max()), 
          Customers=(products['CustomerID'].min(), products['CustomerID'].max()))

def scatter_plot(Invoices, Customers):
    df = products[(products['InvoiceNo']>Invoices) & (products['CustomerID']>Customers)]
    
    df.iplot(kind='scatter', x='InvoiceNo', y='CustomerID', categories='Description', mode='markers', 
             xTitle = 'Number of invoices', yTitle = 'Number of customers', 
             title='Products by number of invoices & customers')

interactive(children=(IntSlider(value=946, description='Invoices', max=1891, min=1), IntSlider(value=411, desc…

## 8. Create an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.

In [134]:
@interact(product='')

def barchart(product):
    data_rev_prod = data.groupby(['Description'])['Revenue'].sum().reset_index()
    df = data_rev_prod[data_rev_prod['Description'].str.contains(product.upper())]   
    df.iplot(kind='bar', x='Description', xTitle='Product', yTitle='Revenue', title='Revenue by product')

interactive(children=(Text(value='', description='product'), Output()), _dom_classes=('widget-interact',))