# Tutoral 08 A Dashboards

In [8]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import panel as pn # for making dashboards
import hvplot.pandas
pn.extension()

### 1. Dashboard for website

In [9]:
data = pd.read_csv('https://tinyurl.com/ChrisCoDV/Pages/DailyHits.csv', index_col=0)
data.index = pd.to_datetime(data.index)
print(data.head())

exit_rate = pd.read_csv('https://tinyurl.com/ChrisCoDV/Pages/PageExitRate.csv', index_col=0)
page_revenue = pd.read_csv('https://tinyurl.com/ChrisCoDV/Pages/PageRevenue.csv', index_col=0)
page_size = pd.read_csv('https://tinyurl.com/ChrisCoDV/Pages/PageSize.csv', index_col=0)
page_speed = pd.read_csv('https://tinyurl.com/ChrisCoDV/Pages/PageSpeed.csv', index_col=0)
page_viewing_time = pd.read_csv('https://tinyurl.com/ChrisCoDV/Pages/PageViewingTime.csv', index_col=0)

summary_data = pd.DataFrame(index=data.columns)
summary_data['Size'] = page_size.values
summary_data['Hits'] = data.sum().values
summary_data['Revenue'] = page_revenue.values
summary_data['Viewing'] = page_viewing_time.values
summary_data['Download'] = page_speed.values
summary_data['Exit'] = exit_rate.values
print(summary_data.head())

             001  002  003  004  005  006  007  008  009  010  ...  158  159  \
Date                                                           ...             
2019-01-01  1062   33   67   42    0    4    0    0    1   69  ...    0    0   
2019-01-02   976   41   72   19    2    0    0   14    1   62  ...    5    0   
2019-01-03   956   27   71   28    4    6   11    0    0   64  ...    0    0   
2019-01-04   976   30   57   29    0    0    3    0    2   52  ...    0    2   
2019-01-05   956   45   65   27    4    1    0    0    0   55  ...   12    0   

            160  161  162  163  164  165  166  167  
Date                                                
2019-01-01    4    0   10    2    2    2    0    0  
2019-01-02    0    7    0    0    0    0    1    3  
2019-01-03    0    0    6    0    0    3    0    0  
2019-01-04    0    0    0    0    0    0    0    7  
2019-01-05    0    4    0    0    0    0    5    2  

[5 rows x 167 columns]
           Size    Hits      Revenue     Vie

In [19]:
variables = list(summary_data.columns)
variables
selected =  ['001', '015'] + ['002' , '003' , '004' , '010' , '048' , '080' , '155' , '156']
data= data[selected]
summary_data = summary_data.loc[selected]

### 2. Modify the time series

In [20]:
# selected = ['001', '015'] high vol
# ['002' , '003' , '004' , '010' , '048' , '080' , '155' , '156'] # medium volume

In [21]:
## function for drawing line plots between two selected groups
def get_line_plot(volume, window_size, date_range):
    if volume == 'high':
        selected =  ['001', '015'] 
    elif volume == 'medium':
        selected = ['002' , '003' , '004' , '010' , '048' , '080' , '155' , '156']
    df = data[selected] # restrict to selected products
    df = df.loc[pd.to_datetime(date_range[0]) : pd.to_datetime(date_range[1])] # restrict to date range
    df = df.rolling(window=window_size).mean() # calculate rolling average
    return df.hvplot.line(frame_height=200, frame_width=600, grid=True)


In [26]:
time_title = '### Time-Series'

volumes = ['high', 'medium']

volume = pn.widgets.Select(name='Volume', options=volumes, width=200)
day_avg = pn.widgets.IntSlider(name='Day average', value=7, start=1, end=21, width=200)
time_date_range = pn.widgets.DateRangeSlider(name='Dates', width=200,
                                            value=(data.index[0], data.index[-1]),
                                            start=data.index[0], end=data.index[-1])

time_row = pn.Row(
pn.Column(time_title, volume, day_avg, time_date_range),
get_line_plot(volume.options[0], day_avg.value, time_date_range.value)
)


In [23]:
def update_time_row(event):
    time_row[1].object = get_line_plot(volume.value, day_avg.value, time_date_range.value)

volume.param.watch(update_time_row, 'value')
day_avg.param.watch(update_time_row, 'value')
time_date_range.param.watch(update_time_row, 'value')

time_row.servable()

In [24]:
### 3. restrict dataset to high and medium

In [25]:
def get_correlation_plots(product_x, product_y, date_range):
    df = data.loc[pd.to_datetime(date_range[0]) : pd.to_datetime(date_range[1])] # restrict to date range
    return df.hvplot.scatter(frame_height=250, frame_width=250,
                             x=product_x, y=product_y,
                             title=product_x + ' vs ' + product_y,
                             size=10, padding=0.1) +\
           df.corr().hvplot.heatmap(frame_height=250, frame_width=250,
                                    title='Page correlations',
                                    rot=90, cmap='coolwarm'
                                   ).opts(invert_yaxis=True, clim=(-1, 1))


correlation_title = '### Correlations'

product_x = pn.widgets.Select(name='x-axis', options=list(data.columns), width=200)
product_y = pn.widgets.Select(name='y-axis', options=list(data.columns), width=200)
corr_date_range = pn.widgets.DateRangeSlider(name='Dates',
                                        value=(data.index[0], data.index[-1]),
                                        start=data.index[0], end=data.index[-1],
                                        width=200)

correlation_row = pn.Row(
    pn.Column(correlation_title, product_x, product_y, corr_date_range),
    get_correlation_plots(product_x.value, product_y.value, corr_date_range.value)
)

def update_correlation_row(event):
    correlation_row[1].object = get_correlation_plots(product_x.value, product_y.value, corr_date_range.value)

product_x.param.watch(update_correlation_row, 'value')
product_y.param.watch(update_correlation_row, 'value')
corr_date_range.param.watch(update_correlation_row, 'value')

correlation_row.servable()