In [1]:
import pandas as pd

In [2]:
# Load Insolver

from insolver.frame import InsolverDataFrame
from insolver.transforms import InsolverTransform
from insolver.transforms import (
    TransformPolynomizer,
    TransformAge,
    TransformExp,
    TransformMapValues,
    TransformGetDummies,
    TransformAgeGender,
)

from insolver.model_tools import download_dataset

In [3]:
# Download dataset
download_dataset('freMPL-R')

'Dataset freMPL-R saved to "datasets" folder'

In [4]:
# Load dataset
df = pd.read_csv('./datasets/freMPL-R.csv', low_memory=False)

# InsolverDataFrame

In [5]:
ins_df = InsolverDataFrame(df)

In [6]:
ins_df.head()

Unnamed: 0,Exposure,LicAge,RecordBeg,RecordEnd,VehAge,Gender,MariStat,SocioCateg,VehUsage,DrivAge,...,ClaimInd,Dataset,DeducType,ClaimNbResp,ClaimNbNonResp,ClaimNbParking,ClaimNbFireTheft,ClaimNbWindscreen,OutUseNb,RiskArea
0,0.583,366,2004-06-01,,2,Female,Other,CSP1,Professional,55,...,0,1,,,,,,,,
1,0.2,187,2004-10-19,,0,Male,Alone,CSP55,Private+trip to office,34,...,0,1,,,,,,,,
2,0.083,169,2004-07-16,2004-08-16,1,Female,Other,CSP1,Professional,33,...,0,1,,,,,,,,
3,0.375,170,2004-08-16,,1,Female,Other,CSP1,Professional,34,...,0,1,,,,,,,,
4,0.5,224,2004-01-01,2004-07-01,3,Male,Other,CSP47,Professional,53,...,1,1,,,,,,,,


In [7]:
ins_df.info()

<class 'insolver.frame.frame.InsolverDataFrame'>
RangeIndex: 343080 entries, 0 to 343079
Data columns (total 31 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   Exposure           343080 non-null  float64
 1   LicAge             343080 non-null  int64  
 2   RecordBeg          343080 non-null  object 
 3   RecordEnd          181115 non-null  object 
 4   VehAge             177880 non-null  object 
 5   Gender             343080 non-null  object 
 6   MariStat           343080 non-null  object 
 7   SocioCateg         343080 non-null  object 
 8   VehUsage           343080 non-null  object 
 9   DrivAge            343080 non-null  int64  
 10  HasKmLimit         343080 non-null  int64  
 11  BonusMalus         343080 non-null  int64  
 12  VehBody            145780 non-null  object 
 13  VehPrice           145780 non-null  object 
 14  VehEngine          145780 non-null  object 
 15  VehEnergy          145780 non-null  obje

In [8]:
ins_df.get_meta_info()

{'type': 'InsolverDataFrame',
 'len': 343080,
 'columns': [{'name': 'Exposure', 'dtype': dtype('float64'), 'use': 'unknown'},
  {'name': 'LicAge', 'dtype': dtype('int64'), 'use': 'unknown'},
  {'name': 'RecordBeg', 'dtype': dtype('O'), 'use': 'unknown'},
  {'name': 'RecordEnd', 'dtype': dtype('O'), 'use': 'unknown'},
  {'name': 'VehAge', 'dtype': dtype('O'), 'use': 'unknown'},
  {'name': 'Gender', 'dtype': dtype('O'), 'use': 'unknown'},
  {'name': 'MariStat', 'dtype': dtype('O'), 'use': 'unknown'},
  {'name': 'SocioCateg', 'dtype': dtype('O'), 'use': 'unknown'},
  {'name': 'VehUsage', 'dtype': dtype('O'), 'use': 'unknown'},
  {'name': 'DrivAge', 'dtype': dtype('int64'), 'use': 'unknown'},
  {'name': 'HasKmLimit', 'dtype': dtype('int64'), 'use': 'unknown'},
  {'name': 'BonusMalus', 'dtype': dtype('int64'), 'use': 'unknown'},
  {'name': 'VehBody', 'dtype': dtype('O'), 'use': 'unknown'},
  {'name': 'VehPrice', 'dtype': dtype('O'), 'use': 'unknown'},
  {'name': 'VehEngine', 'dtype': dtype(

# InsolverTransforms

In [9]:
# User change of the existing transform with the same name


class TransformExp:
    def __init__(self, column_driver_minexp, exp_max=52):
        self.priority = 1
        self.column_driver_minexp = column_driver_minexp
        self.exp_max = exp_max

    @staticmethod
    def _exp(exp, exp_max):
        import pandas as pd

        if pd.isnull(exp):
            exp = None
        elif exp < 0:
            exp = None
        else:
            exp = exp // 12
        if exp > exp_max:
            exp = exp_max
        return exp

    def __call__(self, df):
        df[self.column_driver_minexp] = df[self.column_driver_minexp].apply(self._exp, args=(self.exp_max,))
        return df

In [10]:
# New user transform


class TransformSocioCateg:
    def __init__(self, column_socio_categ):
        self.priority = 0
        self.column_socio_categ = column_socio_categ

    def __call__(self, df):
        df[self.column_socio_categ] = df[self.column_socio_categ].str.slice(0, 4)
        return df

In [11]:
# Initialising transforms class

ins_transf = InsolverTransform(
    df,
    [
        TransformPolynomizer('DrivAge'),
        TransformAge('DrivAge', 18, 75),
        TransformExp('LicAge', 57),
        TransformMapValues('Gender', {'Male': 0, 'Female': 1}),
        TransformMapValues('MariStat', {'Other': 0, 'Alone': 1}),
        TransformSocioCateg('SocioCateg'),
        TransformGetDummies('SocioCateg'),
        TransformAgeGender('DrivAge', 'Gender', 'Age_m', 'Age_f', age_default=18, gender_male=0, gender_female=1),
        TransformPolynomizer('Age_m'),
        TransformPolynomizer('Age_f'),
    ],
)

In [12]:
ins_transf.sample_request()

{'df': {'Exposure': 0.739,
  'LicAge': 346,
  'RecordBeg': '2004-04-05',
  'RecordEnd': None,
  'VehAge': None,
  'Gender': 'Male',
  'MariStat': 'Other',
  'SocioCateg': 'CSP50',
  'VehUsage': 'Private+trip to office',
  'DrivAge': 48,
  'HasKmLimit': 0,
  'BonusMalus': 50,
  'VehBody': None,
  'VehPrice': None,
  'VehEngine': None,
  'VehEnergy': None,
  'VehMaxSpeed': None,
  'VehClass': None,
  'ClaimAmount': 0.0,
  'RiskVar': None,
  'Garage': None,
  'ClaimInd': 0,
  'Dataset': 9,
  'DeducType': None,
  'ClaimNbResp': 0.0,
  'ClaimNbNonResp': 0.0,
  'ClaimNbParking': 0.0,
  'ClaimNbFireTheft': 0.0,
  'ClaimNbWindscreen': 0.0,
  'OutUseNb': 0.0,
  'RiskArea': 7.0}}

In [13]:
# Applying transforms

ins_transf.ins_transform()



{0: {'name': 'TransformPolynomizer',
  'attributes': {'column_param': 'DrivAge', 'n': 2, 'priority': 3}},
 1: {'name': 'TransformAge',
  'attributes': {'age_max': 75,
   'age_min': 18,
   'column_driver_minage': 'DrivAge',
   'priority': 1}},
 2: {'name': 'TransformExp',
  'attributes': {'column_driver_minexp': 'LicAge',
   'exp_max': 57,
   'priority': 1}},
 3: {'name': 'TransformMapValues',
  'attributes': {'column_param': 'Gender',
   'dictionary': {'Male': 0, 'Female': 1},
   'priority': 1}},
 4: {'name': 'TransformMapValues',
  'attributes': {'column_param': 'MariStat',
   'dictionary': {'Other': 0, 'Alone': 1},
   'priority': 1}},
 5: {'name': 'TransformSocioCateg',
  'attributes': {'column_socio_categ': 'SocioCateg', 'priority': 0}},
 6: {'name': 'TransformGetDummies',
  'attributes': {'column_param': 'SocioCateg',
   'drop_first': False,
   'dummy_columns': ['SocioCateg_CSP1',
    'SocioCateg_CSP2',
    'SocioCateg_CSP3',
    'SocioCateg_CSP4',
    'SocioCateg_CSP5',
    'Socio

In [14]:
ins_transf.save('transforms_freMPL-R.pickle')

In [15]:
ins_transf.head(7)

Unnamed: 0,Exposure,LicAge,RecordBeg,RecordEnd,VehAge,Gender,MariStat,SocioCateg,VehUsage,DrivAge,...,SocioCateg_CSP4,SocioCateg_CSP5,SocioCateg_CSP6,SocioCateg_CSP7,SocioCateg_CSP8,SocioCateg_CSP9,Age_m,Age_f,Age_m_2,Age_f_2
0,0.583,7,2004-06-01,,2,1,0,CSP1,Professional,55,...,0,0,0,0,0,0,18,55,324,3025
1,0.2,3,2004-10-19,,0,0,1,CSP5,Private+trip to office,34,...,0,1,0,0,0,0,34,18,1156,324
2,0.083,3,2004-07-16,2004-08-16,1,1,0,CSP1,Professional,33,...,0,0,0,0,0,0,18,33,324,1089
3,0.375,3,2004-08-16,,1,1,0,CSP1,Professional,34,...,0,0,0,0,0,0,18,34,324,1156
4,0.5,4,2004-01-01,2004-07-01,3,0,0,CSP4,Professional,53,...,1,0,0,0,0,0,53,18,2809,324
5,0.499,4,2004-07-01,,3,0,0,CSP4,Professional,53,...,1,0,0,0,0,0,53,18,2809,324
6,0.218,3,2004-01-01,2004-03-20,6-7,0,0,CSP5,Private+trip to office,32,...,0,1,0,0,0,0,32,18,1024,324


In [16]:
ins_transf.info()

<class 'insolver.transforms.core.InsolverTransform'>
RangeIndex: 343080 entries, 0 to 343079
Data columns (total 45 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   Exposure           343080 non-null  float64
 1   LicAge             343080 non-null  int64  
 2   RecordBeg          343080 non-null  object 
 3   RecordEnd          181115 non-null  object 
 4   VehAge             177880 non-null  object 
 5   Gender             343080 non-null  int64  
 6   MariStat           343080 non-null  int64  
 7   SocioCateg         343080 non-null  object 
 8   VehUsage           343080 non-null  object 
 9   DrivAge            343080 non-null  int64  
 10  HasKmLimit         343080 non-null  int64  
 11  BonusMalus         343080 non-null  int64  
 12  VehBody            145780 non-null  object 
 13  VehPrice           145780 non-null  object 
 14  VehEngine          145780 non-null  object 
 15  VehEnergy          145780 non-null  