# Interactive Time Series Visualization Assignment

In [3]:
%reload_ext nb_black

<IPython.core.display.Javascript object>

In [4]:
import pandas as pd
import plotly_express as px
from ipywidgets import interact

<IPython.core.display.Javascript object>

### Load the Online Retail.xlsx Excel file into a Pandas dataframe.

In [6]:
web_df = pd.read_excel("../data/Online Retail.xlsx")

<IPython.core.display.Javascript object>

In [7]:
web_df.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


<IPython.core.display.Javascript object>

In [9]:
web_df['date'] = pd.to_datetime(web_df['InvoiceDate'])

web_df["month"] = web_df["date"].dt.month
web_df["day_of_month"] = web_df["date"].dt.day
web_df["hour"] = web_df["date"].dt.hour
web_df["weekday"] = web_df["date"].dt.weekday
web_df["year"] = web_df["date"].dt.year
web_df["date"] = web_df["date"].dt.date


<IPython.core.display.Javascript object>

In [11]:
web_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 396034 entries, 0 to 396033
Data columns (total 15 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   InvoiceNo     396034 non-null  int64         
 1   InvoiceDate   396034 non-null  datetime64[ns]
 2   StockCode     396034 non-null  object        
 3   Description   396034 non-null  object        
 4   Quantity      396034 non-null  int64         
 5   UnitPrice     396034 non-null  float64       
 6   Revenue       396034 non-null  float64       
 7   CustomerID    396034 non-null  int64         
 8   Country       396034 non-null  object        
 9   date          396034 non-null  object        
 10  month         396034 non-null  int64         
 11  day_of_month  396034 non-null  int64         
 12  hour          396034 non-null  int64         
 13  weekday       396034 non-null  int64         
 14  year          396034 non-null  int64         
dtypes: datetime64[ns]

<IPython.core.display.Javascript object>

### Create an interactive histogram to explore the distributions of daily (by date) Quantity, UnitPrice, and Revenue. Include a dropdown widget where you can switch between the three fields.

**Hint:** You will need to create a Date field and then aggregate on it. The Quantity and Revenue fields should be summed and the UnitPrice field should be averaged (mean).

In [17]:
quant_rev_up = (
    web_df.groupby("date")
    .agg({"Quantity": "sum", "Revenue": "sum", "UnitPrice": "mean"})
    .reset_index()
)


<IPython.core.display.Javascript object>

In [29]:
fields = ["Quantity", "UnitPrice", "Revenue"]


@interact(field=fields)
def histogram(field):
    if field == "Quantity" or field == "Revenue":
        df = web_df.groupby("date").agg({field: "sum"}).reset_index()
    elif field == "UnitPrice":
        df = web_df.groupby("date").agg({field: "mean"}).reset_index()
    fig = px.histogram(df, x="date", y=field)
    fig.update_traces(marker_line_color="black", marker_line_width=1)
    fig.show()

interactive(children=(Dropdown(description='field', options=('Quantity', 'UnitPrice', 'Revenue'), value='Quant…

<IPython.core.display.Javascript object>

### Create an interactive line chart where you can explore Quantity and Revenue by day (date) for each country. Include two dropdown widgets - one to choose between the Quantity and Revenue fields and the other to choose the Country.

In [40]:
countries_list = sorted(web_df["Country"].unique())
fields = ["Quantity", "Revenue"]


@interact(country=countries_list, field=fields)
def linechart(country, field):
    df = web_df[web_df["Country"] == country]
    df = df.groupby("date").agg({field: "sum"}).reset_index()

    fig = px.line(data_frame=df, x="date", y=field)
    fig.show()

interactive(children=(Dropdown(description='country', options=('Australia', 'Austria', 'Bahrain', 'Belgium', '…

<IPython.core.display.Javascript object>

### Create an interactive scatter plot showing the relationships between daily Quantity, UnitPrice, and Revenue for the United Kingdom. Include two dropdown boxes that let you choose between the 3 fields - one for the x axis and one for the y axis of your scatter plot. Size the data points according to Revenue.

In [49]:
fields = ["Quantity", "UnitPrice", "Revenue"]
df = web_df[web_df["Country"] == "United Kingdom"]
# df = df.groupby("date").agg({field: "sum"}).reset_index()


@interact(x_field=fields, y_field=fields)
def inscatter(x_field, y_field):
    df = (
        web_df.groupby("date")
        .agg({"Quantity": "sum", "Revenue": "sum", "UnitPrice": "mean"})
        .reset_index()
    )
    fig = px.scatter(data_frame=df, x=x_field, y=y_field, size="Revenue")
    fig.show()

interactive(children=(Dropdown(description='x_field', options=('Quantity', 'UnitPrice', 'Revenue'), value='Qua…

<IPython.core.display.Javascript object>

### Create a bar chart showing the top X products sold (by quantity) in the United Kingdom in a specific month. Use a dropdown box to select the month name and a slider to show the top X products. The range for X should be from 5 to 25. Make sure the bars are sorted in descending order according to their total quantity sold for the month.

Bonus points if you can figure out how to ensure that the month names in the dropdown appear in their correct order (January, February, March, April, etc.).

In [86]:
months = sorted(web_df["InvoiceDate"].dt.month.unique())
web_df["month_name"] = web_df["InvoiceDate"].dt.month_name()


@interact(num_products=(5, 25), month=months)
def barchart(num_products, month):
    df = web_df[web_df["Country"] == "United Kingdom"]
    df = df.groupby(["month", "StockCode"]).agg({"Quantity": "sum"}).reset_index()
    df = (
        df[df["month"] == month][["StockCode", "Quantity"]]
        .sort_values(by="Quantity", ascending=False)
        .head(num_products)
    )

    fig = px.histogram(df, x="StockCode", y="Quantity")
    fig.show()

interactive(children=(IntSlider(value=15, description='num_products', max=25, min=5), Dropdown(description='mo…

<IPython.core.display.Javascript object>

In [66]:
web_df.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country,date,month,day_of_month,hour,weekday,year,month_name
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010-12-01,12,1,8,2,2010,December
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010-12-01,12,1,9,2,2010,December
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010-12-01,12,1,9,2,2010,December
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom,2010-12-01,12,1,10,2,2010,December
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom,2010-12-01,12,1,10,2,2010,December


<IPython.core.display.Javascript object>

In [81]:
df = web_df.groupby(["month", "StockCode"]).agg({"Quantity": "sum"}).reset_index()
df = df[df["month"] == 1][["StockCode", "Quantity"]].sort_values(
    by="Quantity", ascending=False
)
df
# df["Quantity"].sort_values(ascending=False)

Unnamed: 0,StockCode,Quantity
2030,85123A,5467
262,21212,3329
205,21108,3240
21,17003,3174
1031,22469,2830
...,...,...
518,21711,1
1640,84974,1
1647,85006,1
1834,72801G,1


<IPython.core.display.Javascript object>