# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [105]:
import pandas as pd
import chart_studio.plotly as py
import cufflinks as cf
from ipywidgets import interact
import numpy as np

cf.go_offline()

In [2]:
data = pd.read_excel('../Data/Online Retail.xlsx')

In [3]:
display(data.head())
print(data.shape)

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


(396034, 9)


In [4]:
data.dtypes

InvoiceNo               int64
InvoiceDate    datetime64[ns]
StockCode              object
Description            object
Quantity                int64
UnitPrice             float64
Revenue               float64
CustomerID              int64
Country                object
dtype: object

## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [5]:
dataone = data[data["Country"] != "United Kingdom"]
dataone.shape

(42051, 9)

In [6]:
dataone = dataone[(dataone['InvoiceDate'] >= '2011-04-01') & (dataone['InvoiceDate'] < '2011-05-01')]
dataone.shape

(1688, 9)

In [7]:
plotone = dataone.groupby("Country").sum().drop(columns=["InvoiceNo","UnitPrice","CustomerID"])
plotone.head()

Unnamed: 0_level_0,Quantity,Revenue
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Australia,224,421.6
Austria,308,584.78
Belgium,1170,1788.48
Brazil,356,1143.6
Channel Islands,96,243.0


In [8]:
plotone.iplot(kind='bar', yTitle='Units', title='Quantity & Revenue')

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [9]:
datatwo = data[data["Country"] == "France"]
print(datatwo.shape)
datatwo = datatwo[(datatwo['InvoiceDate'] >= '2011-01-01') & (datatwo['InvoiceDate'] <= '2011-05-31')]
print(datatwo.shape)

(8034, 9)
(2454, 9)


In [10]:
#pandas.resample unifica en días (D) o weeks (W) la columna tipo datetime.
plottwo = datatwo.resample('W', on='InvoiceDate').sum().drop(columns=["InvoiceNo","UnitPrice","CustomerID"])
plottwo.head()

Unnamed: 0_level_0,Quantity,Revenue
InvoiceDate,Unnamed: 1_level_1,Unnamed: 2_level_1
2011-01-09,1835,3063.29
2011-01-16,3339,5272.76
2011-01-23,1468,3156.07
2011-01-30,651,1144.03
2011-02-06,2992,5710.94


In [11]:
plottwo.iplot(kind='line', yTitle='Units', title='Quantity & Revenue')

## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [12]:
data.Description.value_counts()

CREAM HANGING HEART T-LIGHT HOLDER    2035
REGENCY CAKESTAND 3 TIER              1721
JUMBO BAG RED RETROSPOT               1618
ASSORTED COLOUR BIRD ORNAMENT         1405
PARTY BUNTING                         1396
                                      ... 
GOLD/AMBER DROP EARRINGS W LEAF          1
BLACK 3 BEAD DROP EARRINGS               1
FIRE POLISHED GLASS BRACELET RED         1
GLASS BEAD HOOP EARRINGS BLACK           1
BLACK FINE BEAD NECKLACE W TASSEL        1
Name: Description, Length: 3640, dtype: int64

In [13]:
datathree = data[data["Description"] == 'PARTY BUNTING'] 
print(datathree.shape)
plotthree = datathree.groupby("Country").mean().drop(columns=["InvoiceNo","Revenue","CustomerID"]).reset_index(drop=False)
plotthree.head()

(1396, 9)


Unnamed: 0,Country,Quantity,UnitPrice
0,Australia,33.125,4.7125
1,Austria,8.0,4.95
2,Belgium,4.0,4.95
3,Channel Islands,13.333333,4.95
4,Cyprus,2.333333,4.75


In [14]:
plotthree.iplot(kind='scatter', mode='markers', x='Quantity', y='UnitPrice',categories="Country",
                xTitle="Quantity", yTitle='Unit Price', title='Relationship between Quantity and Unit Price')

## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [15]:
lst_countries = ['EIRE', 'Germany', 'France','Netherlands']
datafour = data[data["Country"].isin(lst_countries)]
print(datafour.shape)
datafour.head()

(26150, 9)


Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
179,539320,2010-12-16 19:16:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,24,2.95,70.8,14911,EIRE
198,539722,2010-12-21 13:45:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,24,2.95,70.8,14911,EIRE
304,541570,2011-01-19 12:34:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,256,2.55,652.8,14646,Netherlands
322,541979,2011-01-24 14:54:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.95,17.7,14911,EIRE
367,542777,2011-02-01 08:31:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.95,17.7,14911,EIRE


In [23]:
plotfour = datafour.pivot_table(values="Quantity",index="InvoiceNo", columns="Country",aggfunc='sum')
plotfour.head()

Country,EIRE,France,Germany,Netherlands
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
536370,,446.0,,
536403,,,,96.0
536527,,,156.0,
536540,230.0,,,
536541,12.0,,,


In [94]:
plotfour.iplot(kind='hist',subplots=True, shape=(2, 2), xTitle='Quantities',
           yTitle='Repeat', title='Histogram distribution of quantities by countries')

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [40]:
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

country_list = ['EIRE', 'Germany', 'France', 'Netherlands']

In [45]:
datafive = data[data["Country"].isin(country_list)]
print(datafive.shape)

collist = ['InvoiceNo','InvoiceDate','StockCode','UnitPrice','Quantity','CustomerID']
datafive = datafive[datafive["Description"].isin(product_list)].drop(columns=collist)
print(datafive.shape)
datafive.head()

(26150, 9)
(338, 3)


Unnamed: 0,Description,Revenue,Country
179,CREAM HANGING HEART T-LIGHT HOLDER,70.8,EIRE
198,CREAM HANGING HEART T-LIGHT HOLDER,70.8,EIRE
304,CREAM HANGING HEART T-LIGHT HOLDER,652.8,Netherlands
322,CREAM HANGING HEART T-LIGHT HOLDER,17.7,EIRE
367,CREAM HANGING HEART T-LIGHT HOLDER,17.7,EIRE


In [47]:
plotfive = datafive.pivot_table(values="Revenue",index="Description", columns="Country",aggfunc='sum')
plotfive.head()

Country,EIRE,France,Germany,Netherlands
Description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CREAM HANGING HEART T-LIGHT HOLDER,2740.8,131.75,35.4,1167.0
JUMBO BAG RED RETROSPOT,278.72,903.37,1072.76,3468.0
REGENCY CAKESTAND 3 TIER,7388.55,2816.85,9061.95,3166.35


In [49]:
plotfive.iplot(kind='bar',title='Bar chart revenues by countries', xTitle='Product',yTitle='Revenues')
#,subplots=True, shape=(4, 1), xTitle='Quantities',
#          yTitle='Repeat', title='Histogram distribution of quantities by countries')

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [84]:
data['Year'] = pd.DatetimeIndex(data['InvoiceDate']).year
data['Month'] = pd.DatetimeIndex(data['InvoiceDate']).month
data['Day'] = pd.DatetimeIndex(data['InvoiceDate']).day
uk = data[data['Country']=='United Kingdom']

In [85]:
uk.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country,Year,Month,Day
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010,12,1
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010,12,1
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010,12,1
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom,2010,12,1
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom,2010,12,1


In [86]:
def f(x):
    return x

interact(f, x=['apples','oranges']);

'apples'

In [87]:
collist = ['Description','InvoiceNo','InvoiceDate','StockCode','UnitPrice','Revenue','CustomerID',"Country"]
uk = uk.drop(columns=collist)
uk.head()

Unnamed: 0,Quantity,Year,Month,Day
0,6,2010,12,1
1,6,2010,12,1
2,6,2010,12,1
3,64,2010,12,1
4,32,2010,12,1


In [103]:
plot_uk = uk.pivot_table(values="Quantity",index="Day",
                         columns=["Year","Month"].cumsum(),aggfunc='sum')

AttributeError: 'list' object has no attribute 'cumsum'

In [97]:
plot_uk.head()

Year,2010,2011,2011,2011,2011,2011,2011,2011,2011,2011,2011,2011,2011
Month,12,1,2,3,4,5,6,7,8,9,10,11,12
Day,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
1,21308.0,,13536.0,7614.0,17149.0,3172.0,6091.0,6078.0,10484.0,22003.0,,14648.0,23502.0
2,30987.0,,8184.0,8148.0,,,12587.0,,10371.0,13732.0,8375.0,22681.0,22960.0
3,7646.0,,14502.0,7842.0,5667.0,9566.0,6429.0,3117.0,15124.0,,24420.0,30681.0,
4,,6659.0,10226.0,11186.0,10937.0,15624.0,,9438.0,35474.0,7516.0,22678.0,31205.0,9855.0
5,13603.0,17635.0,,,10564.0,13906.0,10412.0,17063.0,9982.0,16021.0,32641.0,,35874.0


In [98]:
plot_uk.iplot(kind="line")

In [101]:
plot_uk2 = uk.pivot_table(values="Quantity",index=["Year","Month","Day"],aggfunc='sum')
plot_uk2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Quantity
Year,Month,Day,Unnamed: 3_level_1
2010,12,1,21308
2010,12,2,30987
2010,12,3,7646
2010,12,5,13603
2010,12,6,15515
...,...,...,...
2011,12,5,35874
2011,12,6,25909
2011,12,7,29078
2011,12,8,25441


In [102]:
plot_uk2.iplot(kind="line")

In [109]:
df = pd.DataFrame(np.random.randn(1000, 2), columns=['A', 'B'])
df.cumsum()
df

Unnamed: 0,A,B
0,-0.764544,-0.542783
1,-0.863830,0.285083
2,0.722402,2.299839
3,0.667659,0.158144
4,-1.904743,1.393602
...,...,...
995,0.698764,0.099505
996,0.035879,-1.852805
997,-0.750238,0.881456
998,1.215495,0.936314


## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [None]:
agg_func = {'InvoiceNo':'nunique',
            'Quantity':'sum',
            'UnitPrice':'mean',
            'Revenue':'sum',
            'CustomerID':'nunique'}

products = uk.groupby('Description').agg(agg_func)

## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.