In [None]:
import altair as alt 
import pandas as pd
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
reports = pd.read_csv('./reports.csv', header=[0], skipinitialspace=True)

reports.head()

In [None]:
list(reports)

In [None]:
reports['ReportStartDateTime'] = pd.to_datetime(reports['ReportStartDateTime'])
reports['ReportEndDateTime'] = pd.to_datetime(reports['ReportEndDateTime'])

In [None]:
reports_normal = reports[~reports.DayOfWeek.isin(['Saturday','Sunday'])]
reports_normal = reports_normal[reports_normal.HourOfDay.between(6,18)]

In [None]:
reports.LagTime.sort_values()

## Goals:
* Show distribution of all reports vs runtime
* Show average wait-time by report-run hour


In [None]:
sns.set_style("dark")
plt.figure(figsize=(20, 7))
ax = sns.violinplot(x="DayOfWeek", y="HourOfDay", hue="SchedFreq",
               inner="quart",
               data=reports_normal, order=['Monday','Tuesday','Wednesday','Thursday','Friday'])
sns.despine(left=True)

ax.set_title('Distribution of Runtime by Schedule Frequency')

In [None]:
sns.set_style("dark")
plt.figure(figsize=(25, 10))
ax = sns.violinplot(x="DayOfMonth", y="HourOfDay",# hue="SchedFreq",
               inner="quart",
               data=reports_normal)#, order=['Monday','Tuesday','Wednesday','Thursday','Friday'])
sns.despine(left=True)

ax.set_title('Distribution of Runtime by Schedule Frequency')

In [None]:
type(reports.LagTime[0])

In [None]:
sns.set_style("darkgrid")
sns.set_context("talk")
plt.figure(figsize=(25, 17))
ax = sns.lineplot(y="LagTime", x="DayOfMonth", hue="SchedFreq", ci = 95,

               #inner="quart",
               #palette={"Yes": "y", "No": "b"},
               data=reports_normal)#, order=['Monday','Tuesday','Wednesday','Thursday','Friday'])

ax.set_title('Average Wait Time vs Day of the Month')

In [None]:

sns.set_style("darkgrid")
plt.figure(figsize=(25, 17))
ax = sns.barplot(y="ReportDeliveryTime", x="ReportCategory", hue="SchedFreq",
               data=reports_normal)
ax.set_title('Average Delivery Time by Report Type')
ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
plt.show()

In [None]:

sns.set_style("darkgrid")
plt.figure(figsize=(25, 17))
ax = sns.barplot(y="ReportBuildTime", x="ReportCategory", hue="SchedFreq",
               #inner="quart",
               #palette={"Yes": "y", "No": "b"},
               data=reports_normal.sort_values('ReportCategory'))
ax.set_title('Average Delivery Time by Report Type')
ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
plt.show()

In [None]:
reinsurance = reports_normal[reports_normal['ReportCategory']== 'Reinsurance']

sns.set_style("dark")
plt.figure(figsize=(25, 10))
ax = sns.lineplot(x="DayOfMonth", y="ReportDeliveryTime", hue="ReportCategory",
             #  inner="quart",
               data=reports_normal)#, order=['Monday','Tuesday','Wednesday','Thursday','Friday'])
sns.despine(left=True)

ax.set_title('Distribution of Reinsurance Report Runtimes')

In [None]:
report_metrics = reports_normal[['ReportBuildTime','QueueTime','ReportDeliveryTime','ReportCategory','SchedFreq','DayOfWeek','DayOfMonth','HourOfDay']].melt(id_vars=['ReportCategory','SchedFreq','DayOfWeek','DayOfMonth','HourOfDay'], value_vars = ['ReportBuildTime','QueueTime','ReportDeliveryTime'], var_name = 'Type', value_name='Seconds')

report_metrics

In [None]:

sns.set_style("darkgrid")
plt.figure(figsize=(25, 17))
ax = sns.barplot(y="Seconds", x="ReportCategory", hue="Type",
               #inner="quart",
               #palette={"Yes": "y", "No": "b"},
               data=report_metrics)
ax.set_title('Average Delivery Time by Report Type')
ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
plt.show()

In [None]:
#report_metrics[report_metrics['ReportCategory'] == 'Reinsurance']
sns.set_style("dark")
plt.figure(figsize=(25, 10))
ax = sns.lineplot(x="DayOfMonth", y="Seconds", hue="Type", ci=None,# style='Type',
             #  inner="quart",
               data=report_metrics)#, order=['Monday','Tuesday','Wednesday','Thursday','Friday'])
sns.despine(left=True)

ax.set_title('Distribution of Reinsurance Report Runtimes')

In [None]:
#report_metrics[report_metrics['ReportCategory'] == 'Reinsurance']
sns.set_style("dark")
plt.figure(figsize=(25, 10))
ax = sns.lineplot(x="DayOfMonth", y="Seconds", hue="ReportCategory", ci=None,# style='Type',
             #  inner="quart",
               data=report_metrics)#[report_metrics['ReportCategory'] == 'Reinsurance'])#, order=['Monday','Tuesday','Wednesday','Thursday','Friday'])
sns.despine(left=True)

ax.set_title('Distribution of Reinsurance Report Runtimes')

In [None]:
#report_metrics[report_metrics['ReportCategory'] == 'Reinsurance']
sns.set_style("dark")
plt.figure(figsize=(25, 10))
ax = sns.lineplot(x="DayOfMonth", y="Seconds", hue="ReportCategory", ci=None,
             #  inner="quart",
               data=report_metrics[report_metrics['ReportCategory'] != 'Reinsurance'])#, order=['Monday','Tuesday','Wednesday','Thursday','Friday'])
sns.despine(left=True)

ax.set_title('Distribution of Non-Reinsurance Report Runtimes')

In [None]:
#report_metrics[report_metrics['ReportCategory'] == 'Reinsurance']
sns.set_style("dark")
plt.figure(figsize=(25, 10))
ax = sns.lineplot(x="HourOfDay", y="Seconds", hue="ReportCategory", ci=None,
               data=report_metrics)#, order=['Monday','Tuesday','Wednesday','Thursday','Friday'])
sns.despine(left=True)

ax.set_title('Distribution of Non-Reinsurance Report Runtimes')

In [None]:
#report_metrics[report_metrics['ReportCategory'] == 'Reinsurance']
sns.set_style("dark")
plt.figure(figsize=(25, 10))
ax = sns.lineplot(x="HourOfDay", y="Seconds", hue="ReportCategory", ci=None,
               data=report_metrics[report_metrics['ReportCategory'] != 'Reinsurance'])#, order=['Monday','Tuesday','Wednesday','Thursday','Friday'])
sns.despine(left=True)

ax.set_title('Distribution of Non-Reinsurance Report Runtimes')

In [None]:
reinsurance = reports_normal[reports_normal['ReportCategory']== 'Reinsurance']

sns.set_style("dark")
plt.figure(figsize=(25, 10))
ax = sns.violinplot(x="DayOfMonth", y="HourOfDay",# hue="SchedFreq",
               inner="quart",
               data=reinsurance)#, order=['Monday','Tuesday','Wednesday','Thursday','Friday'])
sns.despine(left=True)

ax.set_title('Distribution of Reinsurance Report Runtimes')