In [1]:
import os, sys, inspect
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.simplefilter('ignore')
from IPython.display import display, Markdown, clear_output, HTML
import ipywidgets as widgets
from qgrid import show_grid
import matplotlib.gridspec as gridspec

import importlib

class Args(object):
    pass
args = Args()

In [2]:
import src
import src.train
importlib.reload(src.train)
from src.train import Train
from src.train import Model

from src.Config import Config

Using TensorFlow backend.


In [3]:
# create tabs per field
display(Markdown('<a id="top"></a>'))
sources = ['District']
if Config.MODELLING_CONFIG["MODEL_TYPE"] == "Supervised":
    sections = ["Summary", "Metrics", "Feature Importance", "Actual-vs-Predict"]
else:
    sections = ["Summary", "Metrics", "Actual-vs-Predict"]
accordions = {}

for source in sources:
    accordions[source] = widgets.Accordion(children=[widgets.Output() for section in sections])
    [accordions[source].set_title(isection, section) for isection, section in enumerate(sections)]
    
tab_fields = widgets.Tab(children=[accordions[source] for source in accordions])
[tab_fields.set_title(i, source) for i, source in enumerate(accordions.keys())];

<a id="top"></a>

In [4]:
tab_fields

Tab(children=(Accordion(children=(Output(), Output(), Output(), Output()), _titles={'0': 'Summary', '1': 'Metrâ€¦

In [5]:
%matplotlib agg

if Config.MODELLING_CONFIG["MODEL_TYPE"] == 'Supervised':

    algs = ["XGBR", "XGBR_tuned", "LGBMR", "LGBMR_tuned"]

    for source in accordions:
        train = Train("Prod_Sales")
        results = None
        with accordions[source].children[0]:
            clear_output()
            train.read_csv_file(vars='Primax_95', fname='primax_95')
            train.run(algs)
            train.save_models()
            display(Markdown('[Home](#toc)'))

        with accordions[source].children[1]:
            clear_output()
            for imet, (metric, group_data) in enumerate(train.results.groupby('metric_name')):
                display(Markdown(r'<h3> {}. {} </h3>'.format(imet+1, metric)))
                display(Markdown(r'<p> Boxplot of {0} performance segregated by algorithms for cluster in {1}. </p>'.format(metric, source)))
                display(Train.boxplot_metric(group_data, metric))
                if len(train.models) > 1:
                    display(Markdown(r'<p> Barplot of {0} performance by algorithm for each cluster. Overall, there are <b>{1}</b> cluster. </p>'.format(metric, len(train.models))))
                    display(Train.barplot_metric(group_data, metric))
                    display(Markdown(r'<p> Pie chart showing the percentage of best algorithm(s) for {1} cluster in terms of {0} metric. </p>'.format(metric, len(train.models))))
                    display(train.piechart_metric(metric))
                    display(Markdown(r'<p> Boxplot of {0} performance from best algorithm in {1}. </p>'.format(metric, source)))
                    display(train.boxplot_best(metric))
            display(Markdown('[Home](#toc)'))
            
        with accordions[source].children[2]:
            clear_output()
            districts = train.get_districts_for_plt(20)
            for idistrict, district in enumerate(districts):
                display(Markdown(r'<h3>{}. {} </h3>'.format(idistrict+1, district)))
                display(train.models[district][0].feature_importance_plot())
            display(Markdown('[Home](#toc)'))

        with accordions[source].children[3]:
            clear_output()
            districts = train.get_districts_for_plt(20)
            for idistrict, district in enumerate(districts):
                display(Markdown(r'<h3>{}. {} </h3>'.format(idistrict+1, district)))
                display(train.actual_pred_plot(district))
                display(train.models[district][0].residual_plot())
                index = train.models[district][0].date
                display(train.time_series_plot(index, district))
            display(Markdown('[Home](#toc)'))
            
elif Config.MODELLING_CONFIG["MODEL_TYPE"] == 'Forecasting':
    algs = ["ARIMA", "SARIMA", "HOLT_WINTER"]

    for source in accordions:
        train = Train("Prod_Sales")
        results = None
        with accordions[source].children[0]:
            clear_output()
            train.read_csv_file(vars='Biodiesel_50', fname='biodiesel_B10')
            train.run(algs)
            train.save_models()
            display(Markdown('[Home](#toc)'))

        with accordions[source].children[1]:
            clear_output()
            for imet, (metric, group_data) in enumerate(train.results.groupby('metric_name')):
                display(Markdown(r'<h3> {}. {} </h3>'.format(imet+1, metric)))
                display(Markdown(r'<p> Boxplot of {0} performance segregated by algorithms for cluster in {1}. </p>'.format(metric, source)))
                display(Train.boxplot_metric(group_data, metric))
                if len(train.models) > 1:
                    display(Markdown(r'<p> Barplot of {0} performance by algorithm for each cluster. Overall, there are <b>{1}</b> cluster. </p>'.format(metric, len(train.models))))
                    display(Train.barplot_metric(group_data, metric))
                    display(Markdown(r'<p> Pie chart showing the percentage of best algorithm(s) for {1} cluster in terms of {0} metric. </p>'.format(metric, len(train.models))))
                    display(train.piechart_metric(metric))
                    display(Markdown(r'<p> Boxplot of {0} performance from best algorithm in {1}. </p>'.format(metric, source)))
                    display(train.boxplot_best(metric))
            display(Markdown('[Home](#toc)'))

        with accordions[source].children[2]:
            clear_output()
            districts = train.get_districts_for_plt(40)
            for idistrict, district in enumerate(districts):
                display(Markdown(r'<h3>{}. {} </h3>'.format(idistrict+1, district)))
                display(train.actual_pred_plot(district))
                display(train.models[district][0].residual_plot())
                index = train.models[district][0].date
                display(train.time_series_plot(index, district))
            display(Markdown('[Home](#toc)'))