# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [189]:
#%pip install chart-studio
#%pip install cufflinks

In [2]:
import pandas as pd
import cufflinks as cf
from ipywidgets import interact
import chart_studio.plotly as py
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display

cf.go_offline()

In [3]:
data = pd.read_excel('../data/Online Retail.xlsx')
data.head(3)

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom


## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [192]:
data1 = data[data['Country']!='United Kingdom'] 

In [193]:
data1 = data1.loc[((data1.InvoiceDate >= '2011-4-1') & (data1.InvoiceDate <= '2011-4-30'))]
data1.head(2)

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
724,549667,2011-04-11 12:20:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.95,17.7,14911,EIRE
807,551163,2011-04-26 15:52:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,9,2.95,26.55,12573,France


In [194]:
data1 = data1.groupby('Country').agg({'Quantity':'sum','Revenue':'sum'})
data1.head(3)

Unnamed: 0_level_0,Quantity,Revenue
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Australia,224,421.6
Austria,308,584.78
Belgium,1170,1788.48


In [195]:
data1.iplot(kind='bar', xTitle='Country', title='Quantiy and Revenue')

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [196]:
data2 = data[(data['Country'] == 'France') & (data.InvoiceDate >= '2011-1-1') & (data.InvoiceDate <= '2011-5-31')]
data2.head(3)

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
807,551163,2011-04-26 15:52:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,9,2.95,26.55,12573,France
2645,540976,2011-01-12 15:00:00,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,4,3.75,15.0,12652,France
3290,545181,2011-02-28 14:45:00,22752,SET 7 BABUSHKA NESTING BOXES,4,8.5,34.0,12509,France


In [197]:
data2 = data2.groupby('Country').agg({'Quantity':'sum','Revenue':'sum'})

In [198]:
data2.iplot(kind='bar', xTitle='Country', title='Quantity and Revenue')

## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [199]:
data3 = data[data['Description'] =='PARTY BUNTING']

In [200]:
data3 = data3.groupby('Country').agg({'Quantity':'mean','UnitPrice':'mean'})
data3.head(3)

Unnamed: 0_level_0,Quantity,UnitPrice
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Australia,33.125,4.7125
Austria,8.0,4.95
Belgium,4.0,4.95


In [201]:
data3.reset_index(inplace=True)

In [202]:
data3.iplot (x='Quantity', 
             y='UnitPrice', 
            categories='Country',
            xTitle='Avg qty', 
            yTitle='Avg unit price',
            color='blue', 
            title='Avg by Country')


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead.


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead.



## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [203]:
countrylist = ['EIRE', 'Germany', 'France','Netherlands']

In [204]:
data4 = data.loc[data.Country.isin(countrylist)]
data4.head(2)

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
179,539320,2010-12-16 19:16:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,24,2.95,70.8,14911,EIRE
198,539722,2010-12-21 13:45:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,24,2.95,70.8,14911,EIRE


In [205]:
fig = px.histogram(data4, 
                   x='Quantity', 
                   color='Country', 
                   facet_col='Country',
                   title='Distribution of Quantity per Invoice for Selected Countries',
                   labels={'Quantity': 'Quantity per Invoice', 'Country': 'Country'})

fig.update_xaxes(range=[0, 700])
fig.update_yaxes(range=[0, 1000])
fig.show()

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [206]:
#I will assume you meant this list as per class
productlist=['JUMBO BAG RED RETROSPOT', 
          'CREAM HANGING HEART T-LIGHT HOLDER',
          'REGENCY CAKESTAND 3 TIER']

In [207]:
#I will also assume you want to limit the countries, otherwise it crashes 
countrylist = ['EIRE', 'Germany', 'France','Netherlands']

In [208]:
data5 = data.loc[data.Description.isin(productlist)]
data5.head(2)

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom


In [209]:
data5 = data5.loc[data5.Country.isin(countrylist)]
data5.head(2)

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
179,539320,2010-12-16 19:16:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,24,2.95,70.8,14911,EIRE
198,539722,2010-12-21 13:45:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,24,2.95,70.8,14911,EIRE


In [210]:
data5 = data5.pivot_table(index= 'Country',
                        columns='Description',
                        values='Revenue',
                        aggfunc='sum')
data5.head()

Description,CREAM HANGING HEART T-LIGHT HOLDER,JUMBO BAG RED RETROSPOT,REGENCY CAKESTAND 3 TIER
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
EIRE,2740.8,278.72,7388.55
France,131.75,903.37,2816.85
Germany,35.4,1072.76,9061.95
Netherlands,1167.0,3468.0,3166.35


In [211]:
data5.reset_index(inplace=True)

In [212]:
data5.iplot(kind='bar', 
           x='Country', 
           title='Revenue by Country',
           xTitle='Country', 
           yTitle='Revenue')

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [213]:
data6 = data[data['Country'] == 'United Kingdom']
data6.head(2)

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom


In [214]:
#I want a column with dates only (not data and time)
data6['InvoiceDate'] = pd.to_datetime(data6['InvoiceDate'])
data6['InvoiceDayOnly'] = data6['InvoiceDate'].dt.date
data6.head(2)

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country,InvoiceDayOnly
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010-12-01
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010-12-01


In [215]:
data6['InvoiceDayOnly'] = pd.to_datetime(data6['InvoiceDayOnly'])

In [216]:
data6['year'] = data6['InvoiceDayOnly'].dt.year

In [217]:
data6['month'] = data6['InvoiceDayOnly'].dt.month

In [218]:
data6.head(2)

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country,InvoiceDayOnly,year,month
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010-12-01,2010,12
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010-12-01,2010,12


In [219]:
#now I can group by day
data6 = data6.pivot_table(index= ['InvoiceDayOnly','year','month'],
                        values='Quantity',
                        aggfunc='sum')

data6.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Quantity
InvoiceDayOnly,year,month,Unnamed: 3_level_1
2010-12-01,2010,12,21308
2010-12-02,2010,12,30987
2010-12-03,2010,12,7646


In [220]:
data6.reset_index(inplace=True)

In [221]:
fig = px.line(data6, x='InvoiceDayOnly', y='Quantity', title='Quantity Sold by Day for the United Kingdom')
fig.show()

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [222]:
data7 = data.groupby('Description').agg({'CustomerID':'nunique','InvoiceNo':'nunique'})
data7.head(2)

Unnamed: 0_level_0,CustomerID,InvoiceNo
Description,Unnamed: 1_level_1,Unnamed: 2_level_1
4 PURPLE FLOCK DINNER CANDLES,31,36
50'S CHRISTMAS GIFT BAG LARGE,108,111


In [223]:
data7.reset_index(inplace=True)

In [224]:
# Create the initial plot
fig = px.scatter(data7, x='InvoiceNo', y='CustomerID', title='Scatter Plot of Number of Invoices vs Number of Customers',
                 labels={'InvoiceNo': 'Number of Invoices', 'CustomerID': 'Number of Customers', 'Description': 'Product'},
                 color='Description')

# Define the sliders for controlling x and y-axis ranges
x_range_slider = widgets.FloatRangeSlider(
    value=[data7['InvoiceNo'].min(), data7['InvoiceNo'].max()],
    min=data7['InvoiceNo'].min(),
    max=data7['InvoiceNo'].max(),
    step=1,
    description='X-axis Range:',
    continuous_update=False
)

y_range_slider = widgets.FloatRangeSlider(
    value=[data7['CustomerID'].min(), data7['CustomerID'].max()],
    min=data7['CustomerID'].min(),
    max=data7['CustomerID'].max(),
    step=1,
    description='Y-axis Range:',
    continuous_update=False
)

# Function to update the plot based on slider values
def update_plot(x_range, y_range):
    fig.update_xaxes(range=x_range)
    fig.update_yaxes(range=y_range)

# Display the sliders
display(x_range_slider)
display(y_range_slider)

# Update the plot based on slider values
widgets.interactive(update_plot, x_range=x_range_slider, y_range=y_range_slider)

# Show the plot
fig.show()

FloatRangeSlider(value=(1.0, 1978.0), continuous_update=False, description='X-axis Range:', max=1978.0, min=1.…

FloatRangeSlider(value=(1.0, 879.0), continuous_update=False, description='Y-axis Range:', max=879.0, min=1.0,…

## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.

In [5]:
data8 = data.groupby('Description').agg({'Revenue':'sum'})
data8.head(3)

Unnamed: 0_level_0,Revenue
Description,Unnamed: 1_level_1
4 PURPLE FLOCK DINNER CANDLES,270.76
50'S CHRISTMAS GIFT BAG LARGE,2302.25
DOLLY GIRL BEAKER,2759.5


In [9]:
data8.reset_index(inplace=True)

In [10]:
data8.iplot(kind='bar', 
           x='Description', 
           title='Revenue by Product',
           xTitle='Product', 
           yTitle='Revenue')

In [13]:
fig = px.bar(data8, x='Description', y='Revenue', title='Revenue by Product',
             labels={'Description': 'Product', 'Revenue': 'Revenue'},
             text='Revenue');

fig.update_layout(yaxis_title='Revenue')  # Set the y-axis title

# Function to update the bar chart based on the text entered
def filter_description(change):
    filtered_data = data8[data8['Description'].str.contains(change.new, case=False)]
    print("Filtered Data:", filtered_data)
    fig.data[0].x = filtered_data['Description']
    fig.data[0].y = filtered_data['Revenue']
    fig.update_layout(transition_duration=500);

# Add the text field widget
text = widgets.Text(placeholder='Enter text to filter', description='Filter:')
display(text)

# Define the action on text value change
text.observe(filter_description, names='value');

# Display the initial bar chart
fig.show();

Text(value='', description='Filter:', placeholder='Enter text to filter')

Filtered Data:       index                       Description   Revenue
4         4           NINE DRAWER OFFICE TIDY    792.85
5         5        OVAL WALL MIRROR DIAMANTE    1059.95
7         7   SET 2 TEA TOWELS I LOVE LONDON   13064.95
13       13         12 DAISY PEGS IN WOOD BOX    539.55
14       14         12 EGG HOUSE PAINTED WOOD   1425.27
...     ...                               ...       ...
3633   3633  ZINC SWEETHEART WIRE LETTER RACK    923.63
3636   3636    ZINC TOP  2 DOOR WOODEN SHELF     169.50
3637   3637  ZINC WILLIE WINKIE  CANDLE STICK   2176.95
3638   3638       ZINC WIRE KITCHEN ORGANISER    156.80
3639   3639  ZINC WIRE SWEETHEART LETTER TRAY    253.24

[1158 rows x 3 columns]
Filtered Data:       index                     Description  Revenue
263     263          BALLOONS  WRITING SET   1262.20
316     316      BIRTHDAY BANQUET GIFT WRAP   383.00
382     382    BLOSSOM IMAGES GIFT WRAP SET   663.60
464     464              BLUE POLKADOT WRAP  2798.00
473     

Filtered Data:       index         Description  Revenue
3591   3591  WRAP MAGIC FOREST     451.5
3592   3592   WRAP MONSTER FUN     514.5
Filtered Data:       index         Description  Revenue
3591   3591  WRAP MAGIC FOREST     451.5
Filtered Data:       index         Description  Revenue
3591   3591  WRAP MAGIC FOREST     451.5
Filtered Data:       index         Description  Revenue
3591   3591  WRAP MAGIC FOREST     451.5
Filtered Data:       index         Description  Revenue
3591   3591  WRAP MAGIC FOREST     451.5
