# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [191]:
import pandas as pd
import numpy as np

import plotly.plotly as py
import cufflinks as cf
from ipywidgets import widgets
from ipywidgets import interact

cf.go_offline()

In [192]:
#os.chdir(r'../data') #chose the directory to look in

data = pd.read_excel('../data/Online Retail.xlsx')

In [193]:
data.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [159]:
data_bar = data[(data['InvoiceDate']>='2011-04-01')&(data['InvoiceDate']<='2011-04-30')&
                (data['Country']!='United Kingdom')]

data_bar = data_bar.groupby('Country', as_index=False).agg({'Quantity':'sum', 'Revenue':'sum'})
data_bar.iplot(kind='bar', x='Country', xTitle='Country', title="Quantity&Revenue by country for 04-2011 excl UK")

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [160]:
data_line = data[(data['InvoiceDate']>='2011-01-01')&(data['InvoiceDate']<='2011-05-31')
                 &(data['Country']=='France')]
data_line = data_line.groupby('InvoiceDate', as_index=False).agg({'Quantity':'sum', 'Revenue':'sum'})

data_line.iplot(kind='line', x='InvoiceDate', xTitle='Invoice Date')

## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [161]:
data_scatter = data[data['Description']=='PARTY BUNTING']
data_scatter = data_scatter.groupby('Country', as_index=False).agg({'Quantity':'mean', 'UnitPrice':'mean'})

data_scatter.iplot(x='Quantity', y='UnitPrice', categories='Country',
           xTitle='Average quantity', yTitle='Average Unit Price',
           title='Average quantity vs Average Unit Price per Country')

## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [162]:
data_hist_set = data.pivot_table(values='Quantity', columns='Country', index='InvoiceNo', aggfunc='sum')
data_hist_set = data_hist_set[['EIRE', 'Germany', 'France', 'Netherlands']]

data_hist_set.iplot(kind='hist', histnorm='percent', xTitle='InvoiceNo', subplots=True,
                    yTitle='Quantity', title='Quantity per invoice')

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [163]:
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

country_list = ['EIRE', 'Germany', 'France', 'Netherlands']


In [164]:
data_bar_side = data[(data['Country'].isin(country_list))&(data['Description'].isin(product_list))]
data_bar_side = data_bar_side.pivot_table(values='Revenue', columns='Country', 
                         index='Description', aggfunc='sum')

data_bar_side.reset_index(inplace=True)
data_bar_side
data_bar_side.iplot(kind='bar', x='Description', xTitle='Description', 
           yTitle='Revenue', title='Revenue by product by country')

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [194]:
data['Year'] = pd.DatetimeIndex(data['InvoiceDate']).year
data['Month'] = pd.DatetimeIndex(data['InvoiceDate']).month
data['Day'] = pd.DatetimeIndex(data['InvoiceDate']).day

uk = data[data['Country']=='United Kingdom']
uk.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country,Year,Month,Day
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010,12,1
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010,12,1
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010,12,1
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom,2010,12,1
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom,2010,12,1


In [195]:
data_uk = uk.groupby(['Year','Month','Day']).agg({'Quantity':'sum'}).reset_index()
data_uk.iplot(kind='line', x=['Year','Month','Day'], xTitle='Date',
           yTitle='Quantity', title='Quantity by day for UK')

In [208]:
def choose_date():
    return widgets.Dropdown(options=['2010', '2011'], value='2010', description='Year:', disabled=False)
    return widgets.Dropdown(options=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12'], 
                                value='2', description='Month:', disabled=False)
choose_date()

Dropdown(description='Year:', options=('2010', '2011'), value='2010')

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [100]:
agg_func = {'InvoiceNo':'nunique',
            'Quantity':'sum',
            'UnitPrice':'mean',
            'Revenue':'sum',
            'CustomerID':'nunique'}

products = uk.groupby('Description').agg(agg_func)

In [None]:
data_scat_slide = 

data6.iplot(x='tenure', y='TotalCharges', categories='InternetService',
           xTitle='Tenure', yTitle='Total Charges',
           title='Charges vs. Tenure: One Year Contract, Credit Card Customers')

In [123]:
month = widgets.IntSlider(
    value=1.0,
    min=1.0,
    max=12.0,
    step=1.0,
    description='Month:',
    continuous_update=False
)
month

IntSlider(value=1, continuous_update=False, description='Month:', max=12, min=1)

## 8. Create an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.

In [125]:
data_rev_prod = data.groupby(['Description'])['Revenue'].sum().reset_index()
data_rev_prod_plot=data_rev_prod.iplot(kind='bar', x='Description', xTitle='Product', 
           yTitle='Revenue', title='Revenue by product')

In [126]:
textbox = widgets.Text(
    value='Input Product name',
    placeholder='Input Product name',
    description='Product:',
    disabled=False)

textbox

Text(value='Input Product name', description='Product:', placeholder='Input Product name')

In [127]:
def validate():
    if textbox in data_rev_prod['Description'].unique():
        return True
    else:
        return False

In [None]:
def response(change):
    if validate():
        if use_date.value:
            filter_list = [i and j and k for i, j, k in
                           zip(df['month'] == month.value, df['carrier'] == textbox.value,
                               df['origin'] == origin.value)]
            temp_df = df[filter_list]

        else:
            filter_list = [i and j for i, j in
                           zip(df['carrier'] == 'DL', df['origin'] == origin.value)]
            temp_df = df[filter_list]
        x1 = temp_df['arr_delay']
        x2 = temp_df['dep_delay']
        with g.batch_update():
            g.data[0].x = x1
            g.data[1].x = x2
            g.layout.barmode = 'overlay'
            g.layout.xaxis.title = 'Delay in Minutes'
            g.layout.yaxis.title = 'Number of Delays'