# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [1]:
import pandas as pd
import plotly.express as px
import cufflinks as cf
from ipywidgets import interact


cf.go_offline()

## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [2]:
retail_df = pd.read_excel('../data/Online retail.xlsx')
retail_df.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


In [3]:
data = retail_df[retail_df.Country != 'United Kingdom']  # Filter out the United Kingdom
data = data[(data.InvoiceDate >= '2011-04-01') & (data.InvoiceDate <= '2011-04-30')]  # Filter by date range

# Group by 'Country' and sum 'Quantity' and 'Revenue'
data = data.groupby('Country').agg({'Quantity': 'sum', 'Revenue': 'sum'}).reset_index()

data.sort_values(by='Revenue', ascending=False)

Unnamed: 0,Country,Quantity,Revenue
8,Germany,5702,10994.79
5,EIRE,4129,7270.5
12,Japan,8058,6548.5
7,France,2265,3899.31
13,Netherlands,1762,2886.56
16,Singapore,1384,2118.74
2,Belgium,1170,1788.48
19,Switzerland,878,1756.94
15,Portugal,951,1687.75
17,Spain,948,1645.65


In [4]:
filtered_data = retail_df[(retail_df['InvoiceDate'] >= '2011-04-01') & (retail_df['InvoiceDate'] <= '2011-04-30') & (retail_df['Country'] != 'United Kingdom')]

# Group the data by 'Country' and calculate the total quantity and revenue
grouped_data = filtered_data.groupby('Country').agg({'Quantity': 'sum', 'Revenue': 'sum'}).reset_index().sort_values(by='Revenue')

# Create an interactive bar chart using Plotly Express
fig = px.bar(grouped_data, x='Country', y=['Quantity', 'Revenue'],
             labels={'Quantity': 'Total Quantity', 'Revenue': 'Total Revenue'},
             title='Total Quantity and Revenue by Country (April 2011, Excluding UK)')

fig.update_layout(xaxis_title='Country', yaxis_title='Total', barmode='group')
fig.show()

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [5]:
data = retail_df[retail_df['Country'] == 'France']  # Filter data for France
data = data[(data['InvoiceDate'] >= '2011-01-01') & (data['InvoiceDate'] <= '2011-05-31')]  # Filter by date range

data[['Quantity', 'Revenue']].iplot(
    kind='line',
    xTitle='InvoiceDate',
    title='Quantity and Revenue Sold to France (Jan 1 - May 31, 2011)'
)


## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [6]:
# Filter the data for 'PARTY BUNTING' product
data = retail_df[retail_df['Description'] == 'PARTY BUNTING']

# Group by 'Country' and calculate the mean for 'Quantity' and 'UnitPrice'
data = data.groupby('Country').agg({'Quantity': 'mean', 'UnitPrice': 'mean'}).reset_index()

# Create an interactive scatter plot
fig = px.scatter(data, x='Quantity', y='UnitPrice', color='Country',
                 title='Average Quantity vs Average Unit Price by Country for PARTY BUNTING',
                 labels={'Quantity': 'Average Quantity', 'UnitPrice': 'Average Unit Price'})

# Show the plot
fig.show()


## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [7]:
countries =['EIRE', 'Germany', 'France']

data=retail_df.loc[retail_df.Country.isin(countries)]

data = data.pivot_table(values='InvoiceNo', columns='Quantity', index='Country', aggfunc='count')

data.iplot(kind='hist', xTitle='Invoice', yTitle='Quantity', title='Distributions of Quantity per Invoice')

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [8]:
countries=['Germany', 'EIRE', 'France']
products=['CREAM HANGING HEART T-LIGHT HOLDER', 'JUMBO BAG RED RETROSPOT', 'REGENCY CAKESTAND 3 TIER']

data=retail_df.loc[retail_df.Description.isin(products)]

data=data.loc[data.Country.isin(countries)]

data=data.pivot_table(values='Revenue', 
                      columns='Description',
                      index='Country', 
                      aggfunc='sum')

data.reset_index(inplace=True)

data.iplot(kind='bar', 
           x='Country', 
           title='Revenue by Country',
           xTitle='Country', 
           yTitle='Revenue')

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. 
## Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [9]:
## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. 
## Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

data = retail_df[retail_df['Country'] == 'United Kingdom']

data = data.groupby('InvoiceDate').agg({'Quantity':'sum'}).reset_index()

fig = px.line(data, x='InvoiceDate', y='Quantity')

fig.show()

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [10]:
import pandas as pd
import plotly.express as px

# Assuming you have a DataFrame called 'retail_df' containing your retail data

# Group the data by product and count the number of invoices and customers for each product
product_data = retail_df.groupby('Description').agg({'InvoiceNo': 'nunique', 'CustomerID': 'nunique'}).reset_index()

# Create the scatter plot
fig = px.scatter(product_data, x='InvoiceNo', y='CustomerID', text='Description', title='Invoices vs. Customers by Product')
fig.update_traces(textposition='top center')

# Update axis labels
fig.update_xaxes(title_text='Number of Invoices')
fig.update_yaxes(title_text='Number of Customers')

# Show the interactive plot
fig.show()


## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.

In [11]:
fig = px.bar(df, x='Description', y='Revenue', title='Revenue by Product Description')


filter_text = widgets.Text(
    value='',
    placeholder='Filter by product description',
    description='Filter:'
)

# Function to update the bar chart based on filter text
def update_chart(change):
    clear_output(wait=True)
    filtered_df = df[df['Description'].str.contains(filter_text.value, case=False)]
    fig = px.bar(filtered_df, x='Description', y='Revenue', title='Revenue by Product Description')
    fig.show()

filter_text.observe(update_chart, names='value')

# Display the widgets and initial chart
display(filter_text)
fig.show()

NameError: name 'df' is not defined