In [101]:

import datetime as dt
import pandas as pd
from plotly_chart_generator import bar_chart, line_chart, display_chart
from plotly_chart_generator.chart_styles import chart_colors, chart_styles

pd.options.display.float_format = '{:,.2f}'.format

In [230]:
def create_df():
    use_cols = ['CreatedTimestamp', 'DataSetSizeInBytes', 'HoldingTimeInMinutes', 'TokenAmountPerHolder', 'BlockchainDisplayName']
    df = pd.read_csv('origin-trail-jobs-data.csv', usecols=use_cols, parse_dates=['CreatedTimestamp'])
    df = df.loc[df.BlockchainDisplayName == 'xDai']
    week_df = df.CreatedTimestamp.dt.isocalendar()
    return  pd.merge(df, week_df, left_index=True, right_index=True)

df = create_df()

In [258]:

def filter_full_weeks_num_days(df, days):
    grouped_by_week = df.groupby('week')['day'].nunique()
    full_weeks = grouped_by_week[grouped_by_week == 7].index.tolist()

    f1 = df.CreatedTimestamp > max(df.CreatedTimestamp)- pd.Timedelta(days=days)
    f2 = df.week.isin(full_weeks)
    df = df[f1 & f2]

    t_delta = max(df.CreatedTimestamp) - min(df.CreatedTimestamp)
    num_days = min(days, t_delta.days + 1)
    # print('t_delta: ', t_delta)
    # print('days: ', days)
    # print('num_days: ', num_days)
    return df, num_days

def jobs_per_day(df, days=90, height=500, width=700, chart_type='line'):
    df, num_days = filter_full_weeks_num_days(df, days)
    jobs_per_day = df.groupby(df.CreatedTimestamp.dt.date)[['day']].count().transpose()
   
    layout = chart_styles(
        color_palette=['orange'], 
        title=f'Jobs per day (last {num_days} days)', 
        x_title='Date', 
        showlegend=False, 
        y_title='Jobs', 
        width=width,
        height=height)
    
    if chart_type == 'line':
        traces = line_chart(jobs_per_day, mode='markers+lines')
    else:
        traces = bar_chart(jobs_per_day)
    return display_chart(traces=traces, layout=layout)

In [259]:
def jobs_per_week(df,height=500, width=700, days=360, agg='sum'):
    df, num_days = filter_full_weeks_num_days(df=df, days=days)
    if agg == 'sum':
        jobs_per_week = pd.DataFrame(df.week.value_counts().sort_index()).transpose().iloc[:, :-1]
        title = 'Jobs per week'
    else:
        jobs_per_week = pd.DataFrame(df.week.value_counts().div(7).sort_index()).transpose().iloc[:, :-1]
        title='Average number of jobs per day (grouped by week)'
    
    layout = chart_styles(
        color_palette=['orange'], 
        title=title, 
        x_title='Week number', 
        showlegend=False, 
        y_title='Jobs',
        height=height, 
        width=width)
    
    traces = bar_chart(jobs_per_week)
    return display_chart(traces=traces, layout=layout)

In [266]:
def average_job_length(df, days=360, chart_type='line', measure='months', width=800, height=500):
    df, num_days = filter_full_weeks_num_days(df=df, days=days)
    
    divide_by = 1440 if measure == 'days' else 43200

    holding_days = df.groupby('week')[['HoldingTimeInMinutes']].mean()
    holding_days.HoldingTimeInMinutes = holding_days.HoldingTimeInMinutes.div(divide_by).round(2)
    holding_days = holding_days.transpose()

    layout = chart_styles(
            color_palette=['orange'], 
            title=f'Average job length in {measure} (last {num_days} days)', 
            x_title='Week number', 
            showlegend=False, 
            y_title=f'Job length in {measure.capitalize()}', 
            width=width,
            height=height)
        
    if chart_type == 'line':
        traces = line_chart(holding_days, mode='markers+lines')
    else:
        traces = bar_chart(holding_days)

    return display_chart(traces=traces, layout=layout)

In [267]:

def job_distribution(df, days=360, width=700, height=600):
    df = filter_full_weeks_num_days(df, days=360)
    num_weeks = df.week.nunique()
    col_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    job_dist = df.groupby(df.CreatedTimestamp.dt.day_name())[['day']].count().div(num_weeks).transpose()[col_order]
    
    #colors = chart_colors(color=['orange'])
    layout = chart_styles(
                color_palette=['orange'], 
                title='Number of jobs grouped by day of week', 
                x_title='Day', 
                showlegend=False, 
                y_title='Jobs', 
                width=width,
                height=height)
    traces = bar_chart(df=job_dist)
    return display_chart(traces=traces, layout=layout)
    

## Number of jobs per day

In [268]:
jobs_per_day(df=df, days=60, chart_type='bar')

## Number of jobs per week

In [269]:
jobs_per_week(df=df)

## Average number of jobs per day - grouped by week

In [270]:
jobs_per_week(df=df, agg='mean')

## Average job length

In [271]:
average_job_length(df)

In [61]:
tokens_per_week = df.groupby('week')[['TokenAmountPerHolder']].sum().transpose()
layout = chart.layout(color_palette=['orange'], title='Token amount per holder per week', x_title='Week number', showlegend=False, y_title='Jobs')
chart.bar(tokens_per_week, layout=layout)