# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [1]:
#!pip install chart-studio
import pandas as pd
import chart_studio.plotly as py
import cufflinks as cf
from ipywidgets import interact
import plotly.graph_objs as go
from plotly.offline import iplot
import plotly.express as px
from plotly.subplots import make_subplots
from ipywidgets import interact, widgets
cf.go_offline()

In [2]:
retail = pd.read_excel('/Users/davidledo/Desktop/Ejercicios/Semana_5/5.2-lab_interactive_visualization/data/Online Retail.xlsx')

In [3]:
retail

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.30,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.30,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.30,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.20,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.60,13408,United Kingdom
...,...,...,...,...,...,...,...,...,...
396029,580691,2011-12-05 15:48:00,90214W,"LETTER ""W"" BLING KEY RING",12,0.29,3.48,13790,United Kingdom
396030,580691,2011-12-05 15:48:00,90214Z,"LETTER ""Z"" BLING KEY RING",12,0.29,3.48,13790,United Kingdom
396031,580865,2011-12-06 11:58:00,90089,PINK CRYSTAL SKULL PHONE CHARM,12,0.19,2.28,17914,United Kingdom
396032,580865,2011-12-06 11:58:00,90089,PINK CRYSTAL SKULL PHONE CHARM,12,0.19,2.28,17914,United Kingdom


## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [4]:
retail.columns

Index(['InvoiceNo', 'InvoiceDate', 'StockCode', 'Description', 'Quantity',
       'UnitPrice', 'Revenue', 'CustomerID', 'Country'],
      dtype='object')

In [5]:
# Asegúrate de que 'InvoiceDate' es de tipo datetime
retail['InvoiceDate'] = pd.to_datetime(retail['InvoiceDate'])

# Filtrar los datos para abril de 2011 y excluir el Reino Unido
filtered_df = retail[(retail['InvoiceDate'].dt.year == 2011) & 
                 (retail['InvoiceDate'].dt.month == 4) & 
                 (retail['Country'] != 'United Kingdom')]

# Agrupar por país y sumar 'Quantity' y 'Revenue'
grouped_df = filtered_df.groupby('Country').agg({'Quantity':'sum', 'Revenue':'sum'}).reset_index()

# Crear gráficos de barras para 'Quantity' y 'Revenue'
trace1 = go.Bar(x=grouped_df['Country'], y=grouped_df['Quantity'], name='Quantity')
trace2 = go.Bar(x=grouped_df['Country'], y=grouped_df['Revenue'], name='Revenue')

# Crear un objeto Layout
layout = go.Layout(title='Total Quantity and Revenue by Country for April 2011', barmode='stack')

# Crear un objeto Figure y visualizar el gráfico
fig = go.Figure(data=[trace1, trace2], layout=layout)
iplot(fig)

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [6]:
# Asegúrate de que 'InvoiceDate' es de tipo datetime
retail['InvoiceDate'] = pd.to_datetime(retail['InvoiceDate'])

# Filtrar los datos para Francia y entre el 1 de enero y el 31 de mayo de 2011
filtered_df = retail[(retail['InvoiceDate'].dt.year == 2011) & 
                 (retail['InvoiceDate'].dt.month.between(1, 5)) & 
                 (retail['Country'] == 'France')]

# Agrupar por fecha y sumar 'Quantity' y 'Revenue'
grouped_df = filtered_df.groupby('InvoiceDate').agg({'Quantity':'sum', 'Revenue':'sum'}).reset_index()

# Crear gráficos de líneas para 'Quantity' y 'Revenue'
trace1 = go.Scatter(x=grouped_df['InvoiceDate'], y=grouped_df['Quantity'], mode='lines', name='Quantity')
trace2 = go.Scatter(x=grouped_df['InvoiceDate'], y=grouped_df['Revenue'], mode='lines', name='Revenue')

# Crear un objeto Layout
layout = go.Layout(title='Quantity and Revenue Sold to France from January to May 2011')

# Crear un objeto Figure y visualizar el gráfico
fig = go.Figure(data=[trace1, trace2], layout=layout)
iplot(fig)

## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [7]:
# Filtrar los datos para el producto 'PARTY BUNTING'
filtered_df = retail[retail['Description'] == 'PARTY BUNTING']

# Agrupar por país y calcular la cantidad promedio y el precio unitario promedio
grouped_df = filtered_df.groupby('Country').agg({'Quantity':'mean', 'UnitPrice':'mean'}).reset_index()

# Crear un gráfico de dispersión interactivo
fig = px.scatter(grouped_df, x='Quantity', y='UnitPrice', color='Country', 
                 title='Relationship between Average Quantity and Average Unit Price for PARTY BUNTING',
                 labels={'Quantity':'Average Quantity', 'UnitPrice':'Average Unit Price'})

fig.show()

## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [8]:
# Filtrar los datos para los países especificados
filtered_df = retail[retail['Country'].isin(['EIRE', 'Germany', 'France', 'Netherlands'])]

# Agrupar por factura y sumar la cantidad
grouped_df = filtered_df.groupby(['Country', 'InvoiceNo']).agg({'Quantity':'sum'}).reset_index()

# Crear un subplot para cada país
fig = make_subplots(rows=2, cols=2, subplot_titles=['EIRE', 'Germany', 'France', 'Netherlands'])

countries = ['EIRE', 'Germany', 'France', 'Netherlands']
for i, country in enumerate(countries):
    country_df = grouped_df[grouped_df['Country'] == country]
    fig.add_trace(go.Histogram(x=country_df['Quantity'], name=country, nbinsx=20), row=i//2+1, col=i%2+1)

fig.update_layout(title_text='Distributions of Quantity per Invoice', height=700, showlegend=False)
fig.show()

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [9]:
# Lista de países y productos de interés, como no especificaba qué productos, los he elegido yo al azar
countries = ['EIRE', 'Germany', 'France', 'Netherlands']
products = ['CREAM HANGING HEART T-LIGHT HOLDER', 'HAND WARMER UNION JACK', 'DOORMAT NEW ENGLAND', 'JAM MAKING SET WITH JARS']

# Filtrar los datos para los países y productos especificados
filtered_df = retail[retail['Country'].isin(countries) & retail['Description'].isin(products)]

# Agrupar por país y producto y sumar los ingresos
grouped_df = filtered_df.groupby(['Country', 'Description']).agg({'Revenue':'sum'}).reset_index()

# Crear un trazo por cada producto
traces = []
for product in products:
    product_df = grouped_df[grouped_df['Description'] == product]
    traces.append(go.Bar(x=product_df['Country'], y=product_df['Revenue'], name=product))

# Crear un objeto Layout
layout = go.Layout(title='Revenue by Country for Each Product', barmode='group',
                   xaxis=dict(title='Country'), yaxis=dict(title='Revenue'))

# Crear un objeto Figure y visualizar el gráfico
fig = go.Figure(data=traces, layout=layout)
iplot(fig)

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [11]:
# Filtrar los datos para el Reino Unido
filtered_df = retail[retail['Country'] == 'United Kingdom']

# Convertir 'InvoiceDate' a datetime y extraer el año, el mes y el día
filtered_df['InvoiceDate'] = pd.to_datetime(filtered_df['InvoiceDate'])
filtered_df['Year'] = filtered_df['InvoiceDate'].dt.year
filtered_df['Month'] = filtered_df['InvoiceDate'].dt.month
filtered_df['Day'] = filtered_df['InvoiceDate'].dt.day

# Agrupar por año, mes, día y sumar la cantidad
grouped_df = filtered_df.groupby(['Year', 'Month', 'Day']).agg({'Quantity':'sum'}).reset_index()

# Crear cuadros desplegables para el año y el mes
year_widget = widgets.Dropdown(options=grouped_df['Year'].unique(), description='Year:')
month_widget = widgets.Dropdown(options=grouped_df['Month'].unique(), description='Month:')

# Función para actualizar el gráfico
@interact(Year=year_widget, Month=month_widget)
def update_plot(Year, Month):
    filtered_grouped_df = grouped_df[(grouped_df['Year'] == Year) & (grouped_df['Month'] == Month)]
    trace = go.Scatter(x=filtered_grouped_df['Day'], y=filtered_grouped_df['Quantity'], mode='lines')
    layout = go.Layout(title='Quantity Sold by Day for the United Kingdom', xaxis=dict(title='Day'), yaxis=dict(title='Quantity'))
    fig = go.Figure(data=[trace], layout=layout)
    iplot(fig)

interactive(children=(Dropdown(description='Year:', options=(2010, 2011), value=2010), Dropdown(description='M…

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [12]:
# Agrupar por 'Description' y calcular el número de facturas y clientes
grouped_df = retail.groupby('Description').agg({'InvoiceNo':'nunique', 'CustomerID':'nunique'}).reset_index()

# Crear deslizadores para los rangos de los ejes x e y
x_range_slider = widgets.IntRangeSlider(value=[grouped_df['InvoiceNo'].min(), grouped_df['InvoiceNo'].max()], min=grouped_df['InvoiceNo'].min(), max=grouped_df['InvoiceNo'].max(), description='X Range:')
y_range_slider = widgets.IntRangeSlider(value=[grouped_df['CustomerID'].min(), grouped_df['CustomerID'].max()], min=grouped_df['CustomerID'].min(), max=grouped_df['CustomerID'].max(), description='Y Range:')

# Función para actualizar el gráfico
@interact(X_Range=x_range_slider, Y_Range=y_range_slider)
def update_plot(X_Range, Y_Range):
    filtered_grouped_df = grouped_df[(grouped_df['InvoiceNo'].between(X_Range[0], X_Range[1])) & (grouped_df['CustomerID'].between(Y_Range[0], Y_Range[1]))]
    trace = go.Scatter(x=filtered_grouped_df['InvoiceNo'], y=filtered_grouped_df['CustomerID'], mode='markers', text=filtered_grouped_df['Description'])
    layout = go.Layout(title='Number of Invoices vs. Number of Customers', xaxis=dict(title='Number of Invoices', range=X_Range), yaxis=dict(title='Number of Customers', range=Y_Range))
    fig = go.Figure(data=[trace], layout=layout)
    iplot(fig)

interactive(children=(IntRangeSlider(value=(1, 1978), description='X Range:', max=1978, min=1), IntRangeSlider…

## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.

In [13]:
# Agrupar por 'Description' y sumar los ingresos
grouped_df = retail.groupby('Description').agg({'Revenue':'sum'}).reset_index()

# Crear un campo de texto para filtrar las descripciones de los productos
text_widget = widgets.Text(value='', description='Filter:')

# Función para actualizar el gráfico
@interact(Filter=text_widget)
def update_plot(Filter):
    filtered_grouped_df = grouped_df[grouped_df['Description'].str.contains(Filter, case=False, na=False)]
    trace = go.Bar(x=filtered_grouped_df['Description'], y=filtered_grouped_df['Revenue'])
    layout = go.Layout(title='Revenue by Product Description', xaxis=dict(title='Product Description'), yaxis=dict(title='Revenue'))
    fig = go.Figure(data=[trace], layout=layout)
    iplot(fig)

interactive(children=(Text(value='', description='Filter:'), Output()), _dom_classes=('widget-interact',))

In [5]:
retail