# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [1]:
#%pip install plotly chart-studio
#%pip install pandas plotly chart-studio cufflinks
#%pip install openpyxl

import pandas as pd
import chart_studio.plotly as py
import cufflinks as cf
from ipywidgets import interact
import plotly.express as px

cf.go_offline()

In [2]:
retail=pd.read_excel('../data/Online Retail.xlsx')
retail.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [3]:
# Filtrar los datos para el mes de abril de 2011 y excluyendo el Reino Unido
data_filtered = retail[(retail['InvoiceDate'] >= '2011-04-01') & (retail['InvoiceDate'] <= '2011-04-30') & (retail['Country'] != 'United Kingdom')]
data_filtered

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
724,549667,2011-04-11 12:20:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.95,17.70,14911,EIRE
807,551163,2011-04-26 15:52:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,9,2.95,26.55,12573,France
3320,550899,2011-04-21 12:07:00,22752,SET 7 BABUSHKA NESTING BOXES,2,8.50,17.00,13505,Switzerland
4880,550527,2011-04-19 10:48:00,84879,ASSORTED COLOUR BIRD ORNAMENT,40,1.69,67.60,12476,Germany
4882,550620,2011-04-19 13:39:00,84879,ASSORTED COLOUR BIRD ORNAMENT,32,1.69,54.08,12585,Germany
...,...,...,...,...,...,...,...,...,...
328231,550665,2011-04-20 09:31:00,23204,CHARLOTTE BAG APPLES DESIGN,50,0.85,42.50,12530,Germany
328238,550911,2011-04-21 13:11:00,23204,CHARLOTTE BAG APPLES DESIGN,10,0.85,8.50,12354,Spain
328244,551258,2011-04-27 12:02:00,23204,CHARLOTTE BAG APPLES DESIGN,20,0.85,17.00,12621,Germany
328681,551346,2011-04-28 09:12:00,23208,LUNCH BAG VINTAGE LEAF DESIGN,10,1.65,16.50,12362,Belgium


In [5]:
data_filtered2 = pd.pivot_table(data_filtered, values=['Quantity', 'Revenue'], index='Country', aggfunc='sum')
data_filtered2.iplot(kind='bar', xTitle='Country', title='Total quantity and revenue by Country')

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [8]:
retail_france = retail[retail.Country == 'France']
retail_france = retail_france[(retail_france.InvoiceDate >= '2011-01-01') & (retail_france.InvoiceDate <= '2011-05-31')]
retail_france.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
807,551163,2011-04-26 15:52:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,9,2.95,26.55,12573,France
2645,540976,2011-01-12 15:00:00,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,4,3.75,15.0,12652,France
3290,545181,2011-02-28 14:45:00,22752,SET 7 BABUSHKA NESTING BOXES,4,8.5,34.0,12509,France
4614,542629,2011-01-31 09:57:00,84879,ASSORTED COLOUR BIRD ORNAMENT,160,1.45,232.0,12731,France
4795,548409,2011-03-31 10:27:00,84879,ASSORTED COLOUR BIRD ORNAMENT,160,1.45,232.0,12731,France


In [9]:
retail_france = retail_france.groupby('InvoiceDate').sum(numeric_only=True)

retail_france[['Quantity', 'Revenue']].iplot(kind='line', xTitle='Date', title='Quantity and revenue sold to France')

## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [12]:
retail_quant_price = retail[retail['Description'] == 'PARTY BUNTING']
retail_quant_price = retail_quant_price.groupby('Country').agg({'Quantity': 'mean', 'UnitPrice': 'mean'}).reset_index()
img = px.scatter(retail_quant_price, x='Quantity', y='UnitPrice', color='Country',
                 labels={'Quantity': 'Average Quantity', 'UnitPrice': 'Average Unit Price'},
                 title='Average Quantity and Unit Price for PARTY BUNTING')
img.show()

## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [25]:
lst = ['EIRE', 'Germany', 'France', 'Netherlands']
retail_countries = retail[retail['Country'].isin(lst)]
retail_quantity = retail_countries.groupby(['Country', 'InvoiceNo'])['Quantity']

In [29]:
img = px.histogram(
    retail_quantity, 
    x='Quantity', 
    color='Country',
    facet_col='Country',
    title='Distribution of quantity per invoice for filtered countries',
    labels={'Quantity': 'Quantity per Invoice'}
)
img.show()

ValueError: Value of 'x' is not the name of a column in 'data_frame'. Expected one of [0, 1] but received: Quantity

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [None]:
product_list = ['JUMBO BAG RED RETROSPOT',
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']
country_list = ['EIRE', 'Germany', 'France', 'Netherlands']

rev = retail[retail['Description'].isin(product_list) & retail['Country'].isin(country_list)]
rev = rev.pivot_table( values = 'Revenue', columns = 'Description', index = 'Country', aggfunc = 'sum').reset_index()
rev.iplot(
    kind='bar',
    x='Country',
    title='Revenue by Country for Selected Products',
    xTitle='Countries',
    yTitle='Revenues',
    colors=['blue', 'read', 'yellow'])

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [None]:
uk = retail[retail['Country'] == 'United Kingdom']
uk['InvoiceDate'] = uk['InvoiceDate'].dt.date
uk.groupby('InvoiceDate')['Quantity'].sum().reset_index()
uk['InvoiceDate'] = pd.to_datetime(uk['InvoiceDate'])

@interact(year=widgets.Dropdown(options=uk['InvoiceDate'].dt.year.unique(), description='Year'),
          month=widgets.Dropdown(options=uk['InvoiceDate'].dt.month_name().unique(), description='Month'))
def update_plot(year, month):
    filtered_df = uk[(uk['InvoiceDate'].dt.year == year) & (uk['InvoiceDate'].dt.month_name() == month)]
    
    fig = px.line(filtered_df, x='InvoiceDate', y='Quantity', title=f'Daily Sales for {month} {year}')
    fig.show()

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [None]:
import plotly.express as px

fig = px.scatter(
    retail,
    x='InvoiceNo',
    y='CustomerID',
    color='Quantity',
    title='Scatter Plot of Invoices vs. Customers',
    labels={'InvoiceNo': 'Number of Invoices', 'Quantity': 'Number of Customers'},
    size_max=15
)
fig.update_xaxes(rangeslider_visible=True)
fig.update_yaxes(rangemode='tozero')

fig.show()

#no puedo seguir porque el ordenador me peta

## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.