# Setup

## Import packages

In [1]:
import os
from datetime import datetime

import numpy as np
import pandas as pd
from evidently.dashboard import Dashboard
from evidently.model_profile import Profile
from evidently.profile_sections import (DataDriftProfileSection,
                                        NumTargetDriftProfileSection,
                                        RegressionPerformanceProfileSection)
from evidently.tabs import (DataDriftTab, NumTargetDriftTab,
                            RegressionPerformanceTab)
from sklearn.ensemble import RandomForestRegressor


## Install and Enable Evidently Jupyter Notebook Extension

In [2]:
!jupyter nbextension install --sys-prefix --symlink --overwrite --py evidently
!jupyter nbextension enable evidently --py --sys-prefix

Installing /Users/shabenga/anaconda3/envs/mle/lib/python3.7/site-packages/evidently/nbextension/static -> evidently
Removing: /Users/shabenga/anaconda3/envs/mle/share/jupyter/nbextensions/evidently
Symlinking: /Users/shabenga/anaconda3/envs/mle/share/jupyter/nbextensions/evidently -> /Users/shabenga/anaconda3/envs/mle/lib/python3.7/site-packages/evidently/nbextension/static
- Validating: [32mOK[0m

    To initialize this nbextension in the browser every time the notebook (or other app) loads:
    
          jupyter nbextension enable evidently --py --sys-prefix
    
Enabling notebook extension evidently/extension...
      - Validating: [32mOK[0m


# Read the Data

In [4]:
df_train = pd.read_csv('train.csv', index_col='Unnamed: 0')
df_test = pd.read_csv('test.csv', index_col='Unnamed: 0')

In [5]:
# Create a column mapping dictionary and assign values.
# This will also be used by Evidently later to perform the appropriate statistical tests.
column_mapping={}

column_mapping['target'] = 'Weekly_Sales'


column_mapping['numerical_features'] = ['Week_Number', 'Quarter_Number', 'Temperature',
                                        'Fuel_Price', 'CPI', 'Unemployment', 'Size']

column_mapping['categorical_features'] = ['Type_A', 'Type_B', 'Type_C', 'IsHoliday']

In [6]:
# Set target and feature variables
target = 'Weekly_Sales'
features = column_mapping['numerical_features'] + column_mapping['categorical_features']

In [7]:
df_train = df_train[features + [target]]

# Train the Model

In [8]:
rfr = RandomForestRegressor(n_estimators=100, criterion='squared_error', n_jobs=4)

In [9]:
rfr.fit(df_train[features], df_train[target])

RandomForestRegressor(n_jobs=4)

# Perform Predictions

## On Train Data

In [10]:
df_train['prediction'] = rfr.predict(df_train[features])

In [11]:
column_mapping['prediction'] = 'prediction'

## On Test Data

In [12]:
df_test['prediction'] = rfr.predict(df_test[features])

# Evidently Dashboards

## Regression Performance

In [14]:
regression_perfomance_dashboard = Dashboard(tabs=[RegressionPerformanceTab])
regression_perfomance_dashboard.calculate(df_train, df_test, column_mapping=column_mapping)

In [15]:
regression_perfomance_dashboard.save('performance.html')

In [22]:
regression_perfomance_dashboard.show()

## Data Drift

In [23]:
drift_dashboard = Dashboard(tabs=[DataDriftTab])
drift_dashboard.calculate(df_train, df_test)

In [30]:
drift_dashboard.show()

## Target Drift

In [36]:
drift_dashboard.save('drift.html')

In [32]:
concept_drift_dashboard = Dashboard(tabs=[NumTargetDriftTab])
concept_drift_dashboard.calculate(df_train, df_test)

## Bringing it all together

In [39]:
evidently_dashboard = Dashboard(tabs=[NumTargetDriftTab, DataDriftTab, RegressionPerformanceTab])
evidently_dashboard.calculate(df_train, df_test, column_mapping)

In [40]:
evidently_dashboard.save('dashboard.html')