# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [3]:
import pandas as pd
import plotly.plotly as py
import cufflinks as cf
from ipywidgets import interact
import numpy as np

cf.go_offline()

In [4]:
data = pd.read_excel('../data/Online Retail.xlsx')
data.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [5]:
data['InvoiceDate'] = data['InvoiceDate'].apply(lambda x: pd.to_datetime(x,format='%Y%m%d'))

In [6]:
data.dtypes

InvoiceNo               int64
InvoiceDate    datetime64[ns]
StockCode              object
Description            object
Quantity                int64
UnitPrice             float64
Revenue               float64
CustomerID              int64
Country                object
dtype: object

In [7]:
countries_list = list(data['Country'].unique())
countries_list.remove('United Kingdom')

In [8]:
data.loc[(data['InvoiceDate']>'2011-04-30') &\
         (data['Country'].isin(countries_list))].pivot_table(index = 'Country',\
                                                             values = ('Quantity','Revenue'),\
                                                             aggfunc = 'sum').iplot(kind = 'bar',\
                                                                                   xTitle = 'Country',
                                                                                   yTitle = 'Quantity',
                                                                                   title = 'Units & Revenue by Country',
                                                                                   subplots = True)

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [9]:
data.loc[(data['InvoiceDate'] >= '2011-01-01') & (data['InvoiceDate'] <= '2011-05-31') \
         & (data['Country']=='France')].pivot_table(index = 'InvoiceDate'
                                                    ,values = ('Quantity','Revenue')).iplot(kind = 'line',
                                                                                             xTitle = 'Dates',
                                                                                             yTitle = 'Quantity & Revenue',
                                                                                             title = 'Qty & Rev evolution sold to France')

## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [10]:
# Just checking that the description exists
data.loc[(data['Description']=='PARTY BUNTING')].head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
252864,536956,2010-12-03 12:43:00,47566,PARTY BUNTING,5,4.65,23.25,14210,United Kingdom
252865,537065,2010-12-05 11:57:00,47566,PARTY BUNTING,5,4.65,23.25,12567,France
252866,537128,2010-12-05 12:15:00,47566,PARTY BUNTING,2,4.65,9.3,12841,United Kingdom
252867,537142,2010-12-05 12:57:00,47566,PARTY BUNTING,1,4.65,4.65,12748,United Kingdom
252868,537420,2010-12-06 15:18:00,47566,PARTY BUNTING,5,4.65,23.25,17519,United Kingdom


In [11]:
pivot_PB= data.loc[(data['Description']=='PARTY BUNTING')].pivot_table(columns='Country',
                                                             values = ('Quantity','UnitPrice'),
                                                             aggfunc = 'mean')

In [12]:
data.loc[data['Description']=='PARTY BUNTING',['Country','Quantity','UnitPrice']].groupby(['Country'], as_index = False).agg('mean')


Unnamed: 0,Country,Quantity,UnitPrice
0,Australia,33.125,4.7125
1,Austria,8.0,4.95
2,Belgium,4.0,4.95
3,Channel Islands,13.333333,4.95
4,Cyprus,2.333333,4.75
5,Denmark,12.0,4.95
6,EIRE,21.210526,4.739474
7,Finland,6.0,4.95
8,France,5.727273,4.922727
9,Germany,6.8,4.89


In [13]:
data.loc[data['Description']=='PARTY BUNTING',
         ['Country','Quantity','UnitPrice']].groupby(['Country'],as_index = False).agg('mean').iplot(x = 'Quantity', 
                                                                                    y = 'UnitPrice',
                                                                                    xTitle = 'Quantity',
                                                                                    yTitle = 'UnitPrice',
                                                                                    title = 'Relationshio btween Qty & UnitPrice',
                                                                                    categories = 'Country')

## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [14]:
# First lets create a list of countries to be included in the analysis
country_list = ['EIRE','Germany','France','Netherlands']

In [15]:
# Now lets plot a graph for each country
data.loc[(data['Country'].isin(country_list))].pivot_table(index = 'InvoiceNo',columns = 'Country',
                                                           values = ('Quantity'),
                                                           aggfunc = 'count').iplot(kind = 'hist',
                                                                           xTitle = 'Invoice',
                                                                           yTitle = 'Qty per Invoice',
                                                                           title = 'Distrib of Qty per Invoice per country',
                                                                           subplots = True)

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [16]:
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

country_list = ['EIRE', 'Germany', 'France', 'Netherlands']

In [17]:
data.loc[(data['Country'].isin(country_list)) &\
         data['Description'].isin(product_list)
         ,['Country','Description','Revenue']].groupby(['Country','Description'],as_index=False).agg({'Revenue':'sum'})

Unnamed: 0,Country,Description,Revenue
0,EIRE,CREAM HANGING HEART T-LIGHT HOLDER,2740.8
1,EIRE,JUMBO BAG RED RETROSPOT,278.72
2,EIRE,REGENCY CAKESTAND 3 TIER,7388.55
3,France,CREAM HANGING HEART T-LIGHT HOLDER,131.75
4,France,JUMBO BAG RED RETROSPOT,903.37
5,France,REGENCY CAKESTAND 3 TIER,2816.85
6,Germany,CREAM HANGING HEART T-LIGHT HOLDER,35.4
7,Germany,JUMBO BAG RED RETROSPOT,1072.76
8,Germany,REGENCY CAKESTAND 3 TIER,9061.95
9,Netherlands,CREAM HANGING HEART T-LIGHT HOLDER,1167.0


In [18]:
data.loc[(data['Country'].isin(country_list)) &\
         data['Description'].isin(product_list)
         ,['Country','Description','Revenue']].groupby(['Country','Description'],as_index=False).agg({'Revenue':'sum'}).\
         iplot(kind = 'bar',
               x = ('Country'),
               y = 'Revenue',
              color = ['blue','red','green'])


In [19]:
table_prod_country = data.loc[(data['Country'].isin(country_list)) &\
         data['Description'].isin(product_list)
         ,['Country','Description','Revenue']].groupby(['Country','Description'],as_index=False).agg({'Revenue':'sum'})

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [20]:
uk = data[data['Country']=='United Kingdom']
uk['Year'] = pd.DatetimeIndex(uk['InvoiceDate']).year
uk['Month'] = pd.DatetimeIndex(uk['InvoiceDate']).month
uk['Day'] = pd.DatetimeIndex(uk['InvoiceDate']).day

In [21]:
from plotly import graph_objs as go
from ipywidgets import interact, interactive, fixed, interact_manual

In [22]:
uk.pivot_table(index = ('InvoiceDate'),values = 'Quantity',aggfunc = 'sum').reset_index().head()

Unnamed: 0,InvoiceDate,Quantity
0,2010-12-01 08:26:00,40
1,2010-12-01 08:28:00,12
2,2010-12-01 08:34:00,98
3,2010-12-01 08:35:00,3
4,2010-12-01 09:00:00,80


In [23]:
# With the interact @ we define the two variables that will have a dropdown menu
@interact(Year=list(uk['Year'].unique()), 
          Month=list(uk['Month'].unique()))

# We have to keep in mind that by default it will select a month and a year, its not that we can do multiselection
def line(Year, Month):
    uk_filtered = uk[(uk['Month']==Month) & 
              (uk['Year']==Year)]
    
    uk_filtered.pivot_table(index = ('Day'),values = 'Quantity',aggfunc = 'sum').iplot(kind = 'line',
                                                                                 xTitle = 'Dates',
                                                                                 yTitle = 'Qty Sold',
                                                                                 title = 'Units Sold by date in UK')

interactive(children=(Dropdown(description='Year', options=(2010, 2011), value=2010), Dropdown(description='Mo…

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [24]:
agg_func = {'InvoiceNo':'nunique',
            'CustomerID':'nunique'}

products = uk.groupby('Description').agg(agg_func).reset_index()

In [25]:
# I am reducing the product Series because otherwise it takes ages to execute
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

In [26]:
products.head()

Unnamed: 0,Description,InvoiceNo,CustomerID
0,4 PURPLE FLOCK DINNER CANDLES,35,30
1,50'S CHRISTMAS GIFT BAG LARGE,100,98
2,DOLLY GIRL BEAKER,100,77
3,I LOVE LONDON MINI BACKPACK,55,46
4,NINE DRAWER OFFICE TIDY,25,24


In [27]:
# With the interact @ we kind of define a variable that the plot will look at when creating the boundaries
@interact(bins_x = (0, len(products['InvoiceNo'])),
          bins_y = (0, len(products['CustomerID'])))

def scatter(bins_x,bins_y):
    products[products['Description'].isin(product_list)].pivot_table(index = 'Description'
                                ,values = ('InvoiceNo','CustomerID')
                                ,aggfunc = 'sum').reset_index().iplot(kind = 'scatter' ,
                                                                      x = 'InvoiceNo',
                                                                      y = 'CustomerID',
                                                                     categories='Description')

interactive(children=(IntSlider(value=1810, description='bins_x', max=3620), IntSlider(value=1810, description…

## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.

In [28]:
# Again to reduce computational use, I am gonna filter by three products
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

In [33]:
@interact(Product='')

def chart(Product):
    data_product = data[data['PaymentMethod'].str.contains(Payment)]
    data = data.groupby('TenureLevel').agg({'MonthlyCharges':'sum'}).reset_index()
    custom_dict = {'New': 0, 'Regular': 1, 'Loyal': 2, 'Very Loyal' : 3}  
    data = data.iloc[data['TenureLevel'].map(custom_dict).argsort()].set_index('TenureLevel')
    
    data.iplot(kind='bar', xTitle='Values')
    data.loc[data['Description'].isin(product_list)].pivot_table(index = 'Description', 
                                                             values = 'Revenue', 
                                                             aggfunc = 'sum').reset_index().iplot(kind = 'bar',
                                                                                                  x = 'Description',
                                                                                                  y = 'Revenue',
                                                                                                  xTitle = 'Product',
                                                                                                  yTitle = 'Revenues',
                                                                                                  title = 'Revenues by product')