# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [7]:
%pip install cufflinks
%pip install ipywidgets

Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.

Collecting cufflinks
  Downloading cufflinks-0.17.3.tar.gz (81 kB)
     ---------------------------------------- 0.0/81.7 kB ? eta -:--:--
     ---------------------------------------- 81.7/81.7 kB 4.5 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting colorlover>=0.2.1 (from cufflinks)
  Downloading colorlover-0.3.0-py3-none-any.whl (8.9 kB)
Building wheels for collected packages: cufflinks
  Building wheel for cufflinks (setup.py): started
  Building wheel for cufflinks (setup.py): finished with status 'done'
  Created wheel for cufflinks: filename=cufflinks-0.17.3-py3-none-any.whl size=68725 sha256=57c3dcb941d77f7eac07dc21bcf85e9f8586dd995986a3c44d2c00171698a21f
  Stored in directory: c:\users\álvaro\appdata\local\pip\cache\wheels\c3\bf\8f\afa0730b29e347e8c1071049

In [24]:
%pip install dash

Defaulting to user installation because normal site-packages is not writeable
Collecting dash
  Obtaining dependency information for dash from https://files.pythonhosted.org/packages/00/69/d0ee9c6524678e98d336464d5c898182794c6b2ba1e5507bc7010a126ce2/dash-2.14.1-py3-none-any.whl.metadata
  Downloading dash-2.14.1-py3-none-any.whl.metadata (11 kB)
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Collecting dash-core-components==2.0.0 (from dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Collecting dash-table==5.0.0 (from dash)
  Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Collecting ansi2html (from dash)
  Downloading ansi2html-1.8.0-py3-none-any.whl (16 kB)
Downloading dash-2.14.1-py3-none-any.whl (10.4 MB)
   ---------------------------------------- 0.0/10.4 MB ? eta -:--:--
   ---------------------------------------- 0.1/10.4 MB 2.4 MB/s eta 0:00:05
   - ---------------------------



In [27]:
import pandas as pd
import plotly.express as px
import cufflinks as cf
from ipywidgets import interact
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import datetime
import plotly.graph_objects as go
from ipywidgets import interact, widgets
from IPython.display import display

cf.go_offline()

## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [11]:
df = pd.read_excel('..\\data\\Online Retail.xlsx')
df.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


In [14]:
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
df_filtered = df[(df['InvoiceDate'].dt.month == 4) & (df['InvoiceDate'].dt.year == 2011) & (df['Country'] != 'United Kingdom')]
@interact
def interactive_bar_chart(metric=['Quantity', 'Revenue']):
    chart_data = df_filtered.groupby('Country')[metric].sum().reset_index()

    fig = px.bar(chart_data, x='Country', y=metric, title=f'Total {metric} by Country (April 2011)',
                 labels={'Country': 'Country', metric: f'Total {metric}'},
                 color_discrete_sequence=['blue'])
    fig.show()

interactive(children=(Dropdown(description='metric', options=('Quantity', 'Revenue'), value='Quantity'), Outpu…

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [16]:
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])

start_date = pd.to_datetime('2011-01-01')
end_date = pd.to_datetime('2011-05-31')
country = 'France'
df_filtered = df[(df['InvoiceDate'] >= start_date) & (df['InvoiceDate'] <= end_date) & (df['Country'] == country)]

@interact
def interactive_line_chart(metric=['Quantity', 'Revenue']):
    
    chart_data = df_filtered.groupby(df_filtered['InvoiceDate'].dt.date)[metric].sum().reset_index()

   
    fig = px.line(chart_data, x='InvoiceDate', y=metric, title=f'{metric} sold to {country} ({start_date.date()} to {end_date.date()})',
                  labels={'InvoiceDate': 'Date', metric: f'Total {metric}'},
                  color_discrete_sequence=['orange'])

    
    fig.show()

interactive(children=(Dropdown(description='metric', options=('Quantity', 'Revenue'), value='Quantity'), Outpu…

## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [18]:
product_name = 'PARTY BUNTING'
df_filtered = df[df['Description'] == product_name]

fig = px.scatter(df_filtered, x='Quantity', y='UnitPrice', color='Country',title=f'Relationship between Quantity and Unit Price for {product_name}',labels={'Quantity': 'Average Quantity', 'UnitPrice': 'Average Unit Price'},color_discrete_sequence=px.colors.qualitative.Set1)

fig.show()

## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [20]:
countries_of_interest = ['EIRE', 'Germany', 'France', 'Netherlands']


for country in countries_of_interest:
    df_filtered = df[df['Country'] == country]
    
   
    fig = px.histogram(df_filtered, x='Quantity',
                       title=f'Distribution of Quantity per Invoice in {country}',
                       labels={'Quantity': 'Quantity per Invoice'},
                       marginal='rug',  
                       color_discrete_sequence=px.colors.qualitative.Set1)
    
  
    fig.show()

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [21]:
products_of_interest = ['85123A', '71053']
countries_of_interest = ['United Kingdom', 'France', 'Germany']

df_filtered = df[df['StockCode'].isin(products_of_interest) & df['Country'].isin(countries_of_interest)]


fig = px.bar(df_filtered, x='Country', y='Revenue', color='StockCode',
             title='Revenue by Country for Each Product',
             labels={'Revenue': 'Total Revenue', 'StockCode': 'Product Code'},
             color_discrete_sequence=px.colors.qualitative.Set1)


fig.show()

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [None]:
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])


df_uk = df[df['Country'] == 'United Kingdom']


df_uk['Year'] = df_uk['InvoiceDate'].dt.year
df_uk['Month'] = df_uk['InvoiceDate'].dt.month


app = dash.Dash(__name__)


app.layout = html.Div([
    dcc.Dropdown(
        id='year-dropdown',
        options=[{'label': str(year), 'value': year} for year in df_uk['Year'].unique()],
        value=df_uk['Year'].max(),
        multi=False,
        style={'width': '50%'}
    ),
    dcc.Dropdown(
        id='month-dropdown',
        options=[{'label': datetime.date(1900, month, 1).strftime('%B'), 'value': month} for month in df_uk['Month'].unique()],
        value=df_uk['Month'].max(),
        multi=False,
        style={'width': '50%'}
    ),
    dcc.Graph(id='line-chart')
])

@app.callback(
    Output('line-chart', 'figure'),
    [Input('year-dropdown', 'value'),
     Input('month-dropdown', 'value')]
)
def update_chart(selected_year, selected_month):
    filtered_data = df_uk[(df_uk['Year'] == selected_year) & (df_uk['Month'] == selected_month)]
    fig = px.line(filtered_data, x='InvoiceDate', y='Quantity',
                  title=f'Quantity Sold by Day in the United Kingdom ({selected_year}, {selected_month})',
                  labels={'Quantity': 'Quantity Sold', 'InvoiceDate': 'Date'},
                  line_shape='linear',  
                  height=600)
    return fig


if __name__ == '__main__':
    app.run_server(debug=True)

# al 90% ayudado por ChatGPT, no entendía bien cómo hacer los dropdowns

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [None]:
def plot_scatter(x_range, y_range):
    filtered_data = df[(df['InvoiceNo'] >= x_range[0]) & (df['InvoiceNo'] <= x_range[1]) &
                       (df['CustomerID'] >= y_range[0]) & (df['CustomerID'] <= y_range[1])]

    fig = px.scatter(filtered_data, x='InvoiceNo', y='CustomerID',
                     color='StockCode',
                     title='Number of Invoices vs. Number of Customers',
                     labels={'InvoiceNo': 'Number of Invoices', 'CustomerID': 'Number of Customers'},
                     height=600)

    fig.show()

x_range_slider = widgets.IntRangeSlider(value=[df['InvoiceNo'].min(), df['InvoiceNo'].max()],
                                        min=df['InvoiceNo'].min(),
                                        max=df['InvoiceNo'].max(),
                                        description='Invoice Range:')
y_range_slider = widgets.IntRangeSlider(value=[df['CustomerID'].min(), df['CustomerID'].max()],
                                        min=df['CustomerID'].min(),
                                        max=df['CustomerID'].max(),
                                        description='Customer Range:')

interact(plot_scatter, x_range=x_range_slider, y_range=y_range_slider)

display(x_range_slider)
display(y_range_slider)

# He quitado el output porque se me cerraba Visual Studio

## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.

In [None]:
def plot_bar_chart(filter_text):
    filtered_data = df[df['Description'].str.contains(filter_text, case=False, na=False)]
    
    fig = px.bar(filtered_data, x='Description', y='Revenue',
                 title='Revenue by Product Description',
                 labels={'Revenue': 'Revenue'},
                 height=400)
    
    fig.update_layout(barmode='group')
    fig.show()

text_widget = widgets.Text(value='', placeholder='Enter text to filter', description='Filter:')
text_widget.observe(lambda change: plot_bar_chart(change.new), names='value')

display(text_widget)

plot_bar_chart('')

# No hago display del output porque volvía a darme problemas