# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [1]:
import pandas as pd
import chart_studio.plotly as py
import cufflinks as cf
from ipywidgets import interact

cf.go_offline()

In [2]:
df = pd.read_excel('../data/Online Retail.xlsx')

In [3]:
df.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 396034 entries, 0 to 396033
Data columns (total 9 columns):
 #   Column       Non-Null Count   Dtype         
---  ------       --------------   -----         
 0   InvoiceNo    396034 non-null  int64         
 1   InvoiceDate  396034 non-null  datetime64[ns]
 2   StockCode    396034 non-null  object        
 3   Description  396034 non-null  object        
 4   Quantity     396034 non-null  int64         
 5   UnitPrice    396034 non-null  float64       
 6   Revenue      396034 non-null  float64       
 7   CustomerID   396034 non-null  int64         
 8   Country      396034 non-null  object        
dtypes: datetime64[ns](1), float64(2), int64(3), object(3)
memory usage: 27.2+ MB


## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [5]:
data = df[df.Country!='United Kingdom']
data = data.loc[(data['InvoiceDate'] >= '2011-4-1') & (data['InvoiceDate'] <= '2011-4-30')]

data.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
724,549667,2011-04-11 12:20:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.95,17.7,14911,EIRE
807,551163,2011-04-26 15:52:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,9,2.95,26.55,12573,France
3320,550899,2011-04-21 12:07:00,22752,SET 7 BABUSHKA NESTING BOXES,2,8.5,17.0,13505,Switzerland
4880,550527,2011-04-19 10:48:00,84879,ASSORTED COLOUR BIRD ORNAMENT,40,1.69,67.6,12476,Germany
4882,550620,2011-04-19 13:39:00,84879,ASSORTED COLOUR BIRD ORNAMENT,32,1.69,54.08,12585,Germany


In [6]:
data = data.groupby('Country').sum()
data = data[['Quantity', 'Revenue']]
data.iplot(kind='bar', xTitle='Country', title='Total quantity and revenue by country')

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [7]:
data = df[df.Country=='France']
data = data.loc[(data['InvoiceDate'] >= '2011-5-31') & (data['InvoiceDate'] <= '2011-6-1')]
data = data.sort_values(by = 'InvoiceDate')
data

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
84676,555096,2011-05-31 13:32:00,20726,LUNCH BAG WOODLAND,10,1.65,16.5,12682,France
340181,555096,2011-05-31 13:32:00,23174,REGENCY SUGAR BOWL GREEN,4,4.15,16.6,12682,France
339909,555096,2011-05-31 13:32:00,23175,REGENCY MILK JUG PINK,4,3.25,13.0,12682,France
338293,555096,2011-05-31 13:32:00,23202,JUMBO BAG VINTAGE LEAF,10,2.08,20.8,12682,France
337209,555096,2011-05-31 13:32:00,23203,JUMBO BAG DOILEY PATTERNS,10,2.08,20.8,12682,France
336658,555096,2011-05-31 13:32:00,23200,JUMBO BAG PEARS,10,2.08,20.8,12682,France
335712,555096,2011-05-31 13:32:00,23209,LUNCH BAG DOILEY PATTERN,10,1.65,16.5,12682,France
341760,555096,2011-05-31 13:32:00,23084,RABBIT NIGHT LIGHT,72,1.79,128.88,12682,France
334621,555096,2011-05-31 13:32:00,23298,"BUNTING , SPOTTY",6,4.95,29.7,12682,France
323488,555096,2011-05-31 13:32:00,23155,KNICKERBOCKERGLORY MAGNET ASSORTED,12,0.83,9.96,12682,France


In [8]:
data1 = data.groupby('Country').sum()
data1 = data1[['Quantity', 'Revenue']]
display(data1)
data1.iplot(kind='line', x='', xTitle='', color='blue',  
           title='Quantity and revenue sold to France between January 1st and May 31st 2011')

Unnamed: 0_level_0,Quantity,Revenue
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
France,200,404.84


In [9]:
data2 = data[['Quantity', 'Revenue', 'Description']]
data2.iplot(kind='line', x='Description', xTitle='', color='blue',  
           title='Quantity and revenue sold to France between January 1st and May 31st 2011')

## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [10]:
data = df.loc[df.Description=='PARTY BUNTING']
data = data.groupby('Country').mean()
data = data[['Quantity', 'UnitPrice']]
data.reset_index(inplace=True)
data.head()

Unnamed: 0,Country,Quantity,UnitPrice
0,Australia,33.125,4.7125
1,Austria,8.0,4.95
2,Belgium,4.0,4.95
3,Channel Islands,13.333333,4.95
4,Cyprus,2.333333,4.75


In [11]:
data.iplot(x='Quantity', y='UnitPrice', categories='Country', xTitle='Average quantity', 
           yTitle='Average unit price', color='blue',  title='PARTY BUNTING by Country')


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead



## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [12]:
eire = df[df.Country=='EIRE']
eire = eire.groupby('InvoiceNo').sum()
display(eire.head())
eire = eire[['Quantity']]
eire.iplot(kind='hist', title='Eire - Quantity per invoice')

Unnamed: 0_level_0,Quantity,UnitPrice,Revenue,CustomerID
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
536540,230,82.39,490.38,283309
536541,12,1.25,15.0,14911
536803,6,3.75,22.5,14911
536890,1548,1.86,322.2,42468
536975,827,302.8,1705.65,1058681


In [13]:
ger = df[df.Country=='Germany']
ger = ger.groupby('InvoiceNo').sum()
display(ger.head())
ger = ger[['Quantity']]
ger.iplot(kind='hist', title='Germany - Quantity per invoice')

Unnamed: 0_level_0,Quantity,UnitPrice,Revenue,CustomerID
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
536527,156,26.37,243.48,177268
536840,147,25.45,137.35,127380
536861,76,31.25,249.5,111843
536967,30,1.95,58.5,12600
536983,59,28.64,157.53,101696


In [14]:
fra = df[df.Country=='France']
fra = fra.groupby('InvoiceNo').sum()
display(fra.head())
fra = fra[['Quantity']]
fra.iplot(kind='hist', title='France - Quantity per invoice')

Unnamed: 0_level_0,Quantity,UnitPrice,Revenue,CustomerID
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
536370,446,37.29,801.86,239077
536852,106,4.81,71.14,76116
536974,130,49.07,300.24,190230
537065,602,269.19,1364.92,779154
537463,581,91.23,961.52,507240


In [15]:
neth = df[df.Country=='Netherlands']
neth = neth.groupby('InvoiceNo').sum()
display(neth.head())
neth = neth[['Quantity']]
neth.iplot(kind='hist', title='Netherlands - Quantity per invoice')

Unnamed: 0_level_0,Quantity,UnitPrice,Revenue,CustomerID
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
536403,96,1.85,177.6,12791
539491,45,31.33,55.96,219690
539731,6668,98.51,8520.92,790884
541206,8210,125.58,10389.06,1157034
541570,6072,56.78,7722.04,483318


## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [16]:
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

country_list = ['EIRE', 'Germany', 'France', 'Netherlands']

In [17]:
data = df.loc[df.Description.isin(product_list)]
data = data.loc[data.Country.isin(country_list)]
data = data[['Country', 'Description', 'Revenue']]
data = data.pivot_table(values='Revenue', columns='Description', 
                        index='Country', aggfunc='sum')

data.reset_index(inplace=True)
data.iplot(kind='bar', x='Country', title='Revenue by country and product', xTitle='Country', yTitle='Revenue')

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [18]:
data = df.copy()
data['Year'] = pd.DatetimeIndex(data['InvoiceDate']).year
data['Month'] = pd.DatetimeIndex(data['InvoiceDate']).month
data['Day'] = pd.DatetimeIndex(data['InvoiceDate']).day
uk = data[data['Country']=='United Kingdom']
uk.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country,Year,Month,Day
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010,12,1
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010,12,1
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010,12,1
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom,2010,12,1
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom,2010,12,1


In [19]:
uk = uk.pivot_table(values='Quantity', columns='Month', 
                        index='Day', aggfunc='sum').fillna(0)
uk.reset_index(inplace=True)
uk.head()

Month,Day,1,2,3,4,5,6,7,8,9,10,11,12
0,1,0.0,13536.0,7614.0,17149.0,3172.0,6091.0,6078.0,10484.0,22003.0,0.0,14648.0,44810.0
1,2,0.0,8184.0,8148.0,0.0,0.0,12587.0,0.0,10371.0,13732.0,8375.0,22681.0,53947.0
2,3,0.0,14502.0,7842.0,5667.0,9566.0,6429.0,3117.0,15124.0,0.0,24420.0,30681.0,7646.0
3,4,6659.0,10226.0,11186.0,10937.0,15624.0,0.0,9438.0,35474.0,7516.0,22678.0,31205.0,9855.0
4,5,17635.0,0.0,0.0,10564.0,13906.0,10412.0,17063.0,9982.0,16021.0,32641.0,0.0,49477.0


In [20]:
uk.iplot(kind='line', x='Day', title='Quantity sold by day for the United Kingdom',
        xTitle='Day', yTitle='Quantity')

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [21]:
data = df.copy()
data['Year'] = pd.DatetimeIndex(data['InvoiceDate']).year
data['Month'] = pd.DatetimeIndex(data['InvoiceDate']).month
data['Day'] = pd.DatetimeIndex(data['InvoiceDate']).day
uk = data[data['Country']=='United Kingdom']

agg_func = {'InvoiceNo':'nunique',
            'Quantity':'sum',
            'UnitPrice':'mean',
            'Revenue':'sum',
            'CustomerID':'nunique'}

products = uk.groupby('Description').agg(agg_func)

In [22]:
products.reset_index(inplace=True)
products = products.sort_values('CustomerID')
products.head()

Unnamed: 0,Description,InvoiceNo,Quantity,UnitPrice,Revenue,CustomerID
2401,PINK/AMETHYST/GOLD NECKLACE,1,4,6.95,27.8,1
2285,PINK BOUDOIR T-LIGHT HOLDER,1,12,0.85,10.2,1
492,BLUE/YELLOW CERAMIC CANDLE HOLDER,1,4,1.65,6.6,1
491,BLUE/NAT SHELL NECKLACE W PENDANT,1,1,5.95,5.95,1
490,BLUE/GREEN SHELL NECKLACE W PENDANT,1,1,5.95,5.95,1


In [24]:
products.iplot(x='InvoiceNo', y='CustomerID', kind='scatter')

## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.