In [1]:
import pandas as pd
import plotly.express as px
from ipywidgets import interact

In [2]:
web_df = pd.read_csv('https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/web_events.csv')

In [4]:
web_df['timestamp'] = pd.to_datetime(web_df['timestamp'], unit='ms')
web_df.head()

Unnamed: 0,timestamp,visitorid,event,itemid,transactionid
0,2015-06-02 05:02:12.117,257597,view,355908,
1,2015-06-02 05:50:14.164,992329,view,248676,
2,2015-06-02 05:13:19.827,111016,view,318965,
3,2015-06-02 05:12:35.914,483717,view,253185,
4,2015-06-02 05:02:17.106,951259,view,367447,


In [85]:
web_df['Date'] = web_df['timestamp'].dt.date
web_df['Month'] = web_df['timestamp'].dt.strftime('%B')
web_df['Day'] = web_df['timestamp'].dt.day
web_df['Weekday'] = web_df['timestamp'].dt.weekday_name

In [108]:
web_df.head()

Unnamed: 0,timestamp,visitorid,event,itemid,transactionid,Date,Month,Day,Weekday,Hour
0,2015-06-02 05:02:12.117,257597,view,355908,,2015-06-02,June,2,Tuesday,5
1,2015-06-02 05:50:14.164,992329,view,248676,,2015-06-02,June,2,Tuesday,5
2,2015-06-02 05:13:19.827,111016,view,318965,,2015-06-02,June,2,Tuesday,5
3,2015-06-02 05:12:35.914,483717,view,253185,,2015-06-02,June,2,Tuesday,5
4,2015-06-02 05:02:17.106,951259,view,367447,,2015-06-02,June,2,Tuesday,5


In [113]:
def ilinechart(df, x, y, groups=None, title=''):
    fig = px.line(df, x=x, y=y, color=groups, title=title, 
                  template='none')#.update(layout=dict(title=dict(x=0.5)))

    fig.show()

In [114]:
df = web_df.groupby(['Day', 'Month'], as_index=False).agg({'event': 'count'})

In [115]:
df.head()

Unnamed: 0,Day,Month,event
0,1,August,15459
1,1,July,20764
2,1,June,22796
3,1,September,18178
4,2,August,15220


In [116]:
ilinechart(df, 'Day', 'event', groups='Month', title='Number of Events by Day')

In [35]:
def ihistogram(df, field, bins=None, title=''):
    fig = px.histogram(df, x=field, title=title, nbins=bins,
                       template='none').update(layout=dict(title=dict(x=0.5)))

    fig.update_yaxes(title_text='Number of Records')
    fig.update_traces(marker_color='lightskyblue',
                      marker_line_color='black', 
                      marker_line_width=1)

    fig.show()

In [117]:
ihistogram(df, 'event', bins=50, title='Daily Web Events Distribution')

In [41]:
def ibarchart(df, x, y, order=None, title=''):
    fig = px.bar(df, x=x, y=y, title=title, template='none')

    fig.update_traces(marker_color='lightskyblue',
                      marker_line_color='black', 
                      marker_line_width=1)
    
    fig.update_layout(xaxis={'categoryorder':'array', 
                             'categoryarray': order})
    
    fig.show()

In [45]:
newdf = web_df.groupby(['Weekday'], as_index=False).agg({'event': 'count'})

In [119]:
order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
ibarchart(newdf, 'Weekday', 'event', order=order)

In [86]:
def iscatter(df, x, y, color=None, size=None, title=''):
    fig = px.scatter(df, x=x, y=y, color=color, size=size, 
                     title=title, template='none')


    fig.update_traces(marker_line_color='black', 
                  marker_line_width=1)

    fig.show()

In [87]:
new_df = web_df.pivot_table(index=['Hour','Month'], columns=['event'],  values='visitorid', aggfunc='count').reset_index()

In [88]:
new_df.head()

event,Hour,Month,addtocart,transaction,view
0,0,August,751,225,27577
1,0,July,951,288,35425
2,0,June,773,254,30557
3,0,May,826,226,30412
4,0,September,424,126,15488


In [89]:
iscatter(new_df, x='view', y='transaction', color='Month', size='view')

In [126]:
dates = list(web_df['Date'].unique())

@interact(BegDate=sorted(dates), EndDate=sorted(dates, reverse=True))
def viz(BegDate, EndDate):
    filtered_df = web_df[(web_df['timestamp'] > str(BegDate)) & (web_df['timestamp'] < str(EndDate))]
    grouped_df = filtered_df.groupby('Date', as_index=False).count()
    ilinechart(grouped_df, x='Date', y='event')

interactive(children=(Dropdown(description='BegDate', options=(datetime.date(2015, 5, 3), datetime.date(2015, …

In [93]:
dates

[datetime.date(2015, 5, 3),
 datetime.date(2015, 5, 4),
 datetime.date(2015, 5, 5),
 datetime.date(2015, 5, 6),
 datetime.date(2015, 5, 7),
 datetime.date(2015, 5, 8),
 datetime.date(2015, 5, 9),
 datetime.date(2015, 5, 10),
 datetime.date(2015, 5, 11),
 datetime.date(2015, 5, 12),
 datetime.date(2015, 5, 13),
 datetime.date(2015, 5, 14),
 datetime.date(2015, 5, 15),
 datetime.date(2015, 5, 16),
 datetime.date(2015, 5, 17),
 datetime.date(2015, 5, 18),
 datetime.date(2015, 5, 19),
 datetime.date(2015, 5, 20),
 datetime.date(2015, 5, 21),
 datetime.date(2015, 5, 22),
 datetime.date(2015, 5, 23),
 datetime.date(2015, 5, 24),
 datetime.date(2015, 5, 25),
 datetime.date(2015, 5, 26),
 datetime.date(2015, 5, 27),
 datetime.date(2015, 5, 28),
 datetime.date(2015, 5, 29),
 datetime.date(2015, 5, 30),
 datetime.date(2015, 5, 31),
 datetime.date(2015, 6, 1),
 datetime.date(2015, 6, 2),
 datetime.date(2015, 6, 3),
 datetime.date(2015, 6, 4),
 datetime.date(2015, 6, 5),
 datetime.date(2015, 6, 6)

In [102]:
web_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2756101 entries, 0 to 2756100
Data columns (total 10 columns):
timestamp        datetime64[ns]
visitorid        int64
event            object
itemid           int64
transactionid    float64
Date             object
Month            object
Day              int64
Weekday          object
Hour             int64
dtypes: datetime64[ns](1), float64(1), int64(4), object(4)
memory usage: 210.3+ MB


In [98]:
filtered_df = web_df[(web_df['timestamp'] > BegDate) & (web_df['timestamp'] < EndDate)]
filtered_df.groupby('Date')

Unnamed: 0,timestamp,visitorid,event,itemid,transactionid,Date,Month,Day,Weekday,Hour
0,2015-06-02 05:02:12.117,257597,view,355908,,2015-06-02,June,2,Tuesday,5
1,2015-06-02 05:50:14.164,992329,view,248676,,2015-06-02,June,2,Tuesday,5
2,2015-06-02 05:13:19.827,111016,view,318965,,2015-06-02,June,2,Tuesday,5
3,2015-06-02 05:12:35.914,483717,view,253185,,2015-06-02,June,2,Tuesday,5
4,2015-06-02 05:02:17.106,951259,view,367447,,2015-06-02,June,2,Tuesday,5
...,...,...,...,...,...,...,...,...,...,...
2756096,2015-08-01 03:13:05.939,591435,view,261427,,2015-08-01,August,1,Saturday,3
2756097,2015-08-01 03:30:13.142,762376,view,115946,,2015-08-01,August,1,Saturday,3
2756098,2015-08-01 02:57:00.527,1251746,view,78144,,2015-08-01,August,1,Saturday,2
2756099,2015-08-01 03:08:50.703,1184451,view,283392,,2015-08-01,August,1,Saturday,3


In [128]:
@interact(View=True, AddtoCart=False, Transaction=False)

def viz(View, AddtoCart, Transaction):
    
    events = []
    if View == True:
        events.append('view')
        
    if AddtoCart==True:
        events.append('addtocart')
        
    if Transaction==True:
        events.append('transaction')
        
    filtered_data = data[data['event'].isin(events)]
    
    

interactive(children=(Checkbox(value=True, description='View'), Checkbox(value=False, description='AddtoCart')…