# Report Examples

Let's see how we can create html report with different models

## Import libraries and prepare data

In [1]:
# Import key libraries we potentially need to start this pipeline.

import numpy as np
import pandas as pd
from hyperopt import hp
from sklearn.metrics import mean_gamma_deviance, mean_squared_error

# InsolverDataFrame is a special class based on pandas data frame with additional properties, which allow you to use additional methods for data frames. We will look at these methods later.
from insolver import InsolverDataFrame

# Insolver transformation is used for data transformation especially during inference.
# After you experiment with dataset, you can register your own transformations and then use them anywhere. Also, it is extremely useful during model implementation.
from insolver.transforms import (
    InsolverTransform,
    TransformExp,
    TransformAge,
    TransformMapValues,
    TransformPolynomizer,
    TransformAgeGender,
)

# Special wrappers allow you to create models with simple interfaces,
# here we import special GLM models which are very often used in insurance, GBM models which became very popular last year and Trivial models to compare our model with trivial ones.

from insolver.wrappers import InsolverGLMWrapper, InsolverGBMWrapper, InsolverTrivialWrapper, InsolverRFWrapper
from insolver.model_tools import ModelMetricsCompare, deviance_gamma, download_dataset

In [2]:
# We can set up user transformations
class TransformSocioCateg:
    def __init__(self, column_socio_categ):
        self.priority = 0
        self.column_socio_categ = column_socio_categ

    def __call__(self, df):
        df[self.column_socio_categ] = df[self.column_socio_categ].str.slice(0, 4)
        return df

In [3]:
# Renew experience function
class TransformExp:
    def __init__(self, column_driver_minexp, exp_max=52):
        self.priority = 1
        self.column_driver_minexp = column_driver_minexp
        self.exp_max = exp_max

    @staticmethod
    def _exp(exp, exp_max):
        import pandas as pd

        if pd.isnull(exp):
            exp = None
        elif exp < 0:
            exp = None
        else:
            exp = exp // 12
        if exp > exp_max:
            exp = exp_max
        return exp

    def __call__(self, df):
        df[self.column_driver_minexp] = df[self.column_driver_minexp].apply(self._exp, args=(self.exp_max,))
        return df

In [4]:
# put data to pandas dataframe
pd.options.display.float_format = '{:.2f}'.format
download_dataset('freMPL-R')
df = pd.read_csv('./datasets/freMPL-R.csv', low_memory=False)
df = df[df.Dataset.isin([5, 6, 7, 8, 9])]
df.dropna(how='all', axis=1, inplace=True)
df = df[df.ClaimAmount > 0]

# Transfer our dataframe to InsolverDataFrame to get additional possibilities for analytics and dataframe transforms.
InsDataFrame = InsolverDataFrame(df)

# After that we can combine all transformations into one one object
InsTransforms = InsolverTransform(
    InsDataFrame,
    [
        TransformSocioCateg('SocioCateg'),
        TransformAge('DrivAge', 18, 75),
        TransformExp('LicAge', 57),
        TransformMapValues('Gender', {'Male': 0, 'Female': 1}),
        TransformMapValues('MariStat', {'Other': 0, 'Alone': 1}),
        TransformAgeGender('DrivAge', 'Gender', 'Age_m', 'Age_f', age_default=18, gender_male=0, gender_female=1),
        TransformPolynomizer('Age_m'),
        TransformPolynomizer('Age_f'),
    ],
)

# Now we are ready to implement transformations
InsTransforms.ins_transform()

# Classical train test split of transformations
train, valid, test = InsTransforms.split_frame(val_size=0.15, test_size=0.15, random_state=0, shuffle=True)

# Lets take features and target
features = [
    'LicAge',
    'Gender',
    'MariStat',
    'DrivAge',
    'HasKmLimit',
    'BonusMalus',
    'RiskArea',
    'Age_m',
    'Age_f',
    'Age_m_2',
    'Age_f_2',
]
target = 'ClaimAmount'

# Split on train, validation and test data
x_train, y_train = train[features], train[target]
x_valid, y_valid = valid[features], valid[target]
x_test, y_test = test[features], test[target]
offset_train = train['Exposure']
offset_valid = valid['Exposure']
offset_test = test['Exposure']

## Report creation

Let's train some models and see how we can create reports for them 

To create a report we need `insolver.report.Report` class

In [5]:
from insolver.report import Report

**Random Forest**

In [None]:
irf = InsolverRFWrapper(backend='sklearn', task='reg')
irf.fit(x_train, y_train)
predict_rf = irf.predict(x_test)
predict_rf_train = irf.predict(x_train)
predict_rf_test = irf.predict(x_test)

# To use Report we need to pass as parameters
# model:             model instanse
# task:              'reg' for regression and 'class' for classification
# X_train, y_train:  train dataset
# predicted_train:   model predictions for train dataset
# X_test, y_test:    test dataset
# predicted_test:    model predictions for test dataset

r = Report(
    model=irf,
    task='reg',
    X_train=x_train,
    y_train=y_train,
    predicted_train=pd.Series(predict_rf_train),
    X_test=x_test,
    y_test=y_test,
    predicted_test=pd.Series(predict_rf_test),
)

# To create an html file we use `Report.to_html()` method
# Parameters are:
# path:        existing directory to save report (default '.')
# report_name: name of created report directory (default 'report')

r.to_html(report_name='0_random_forest_report')

---

Now you can open the `report.html` from the directory where it was created

---

Same way you can create report for other models and dataframes

In [None]:
# iglm_h2o
iglm = InsolverGLMWrapper(backend='h2o', family='gamma', link='log')
iglm.fit(
    x_train, y_train, sample_weight=offset_train, X_valid=x_valid, y_valid=y_valid, sample_weight_valid=offset_valid
)
predict_glm_train = iglm.predict(x_train, sample_weight=offset_train)
predict_glm_test = iglm.predict(x_test, sample_weight=offset_test)

r = Report(
    model=iglm,
    task='reg',
    X_train=x_train,
    y_train=y_train,
    predicted_train=pd.Series(predict_glm_train),
    X_test=x_test,
    y_test=y_test,
    predicted_test=pd.Series(predict_glm_test),
)
r.to_html(report_name='1_glm_h2o_report')

In [None]:
# iglm_sklearn
iglm2 = InsolverGLMWrapper(backend='sklearn', family='gamma', link='log', standardize=True)
iglm2.fit(x_train, y_train, sample_weight=offset_train)
predict_glm2_train = iglm2.predict(x_train, sample_weight=offset_train)
predict_glm2_test = iglm2.predict(x_test, sample_weight=offset_test)

r = Report(
    model=iglm2,
    task='reg',
    X_train=x_train,
    y_train=y_train,
    predicted_train=pd.Series(predict_glm2_train),
    X_test=x_test,
    y_test=y_test,
    predicted_test=pd.Series(predict_glm2_test),
)
r.to_html(report_name='2_glm_sklearn_report')

In [None]:
# igbm_xgboost
igbm = InsolverGBMWrapper(backend='xgboost', task='reg', n_estimators=100, objective='gamma', tree_method='hist')
igbm.fit(x_train, y_train, sample_weight=offset_train)
predict_gbm_train = igbm.predict(x_train)
predict_gbm_test = igbm.predict(x_test)

r = Report(
    model=igbm,
    task='reg',
    X_train=x_train,
    y_train=y_train,
    predicted_train=pd.Series(predict_gbm_train),
    X_test=x_test,
    y_test=y_test,
    predicted_test=pd.Series(predict_gbm_test),
)
r.to_html(report_name='3_gbm_xgboost_report')

In [None]:
# igbm_lightgbm
igbm2 = InsolverGBMWrapper(
    backend='lightgbm', task='reg', n_estimators=100, objective='gamma', metric='gamma_deviance', boosting_type='goss'
)
igbm2.fit(x_train, y_train, sample_weight=offset_train)
predict_gbm2_train = igbm2.predict(x_train)
predict_gbm2_test = igbm2.predict(x_test)

r = Report(
    model=igbm2,
    task='reg',
    X_train=x_train,
    y_train=y_train,
    predicted_train=pd.Series(predict_gbm2_train),
    X_test=x_test,
    y_test=y_test,
    predicted_test=pd.Series(predict_gbm2_test),
)
r.to_html(report_name='4_gbm_lightgbm_report')

In [None]:
# igbm3
igbm3 = InsolverGBMWrapper(backend='catboost', task='reg', n_estimators=100, objective='gamma', silent=True)
igbm3.fit(x_train, y_train, sample_weight=offset_train)
predict_gbm3_train = igbm3.predict(x_train)
predict_gbm3_test = igbm3.predict(x_test)

r = Report(
    model=igbm3,
    task='reg',
    X_train=x_train,
    y_train=y_train,
    predicted_train=pd.Series(predict_gbm3_train),
    X_test=x_test,
    y_test=y_test,
    predicted_test=pd.Series(predict_gbm3_test),
)
r.to_html(report_name='5_gbm_catboost_report')