In [2]:
# !pip install "git+ssh://git@github.com/MindSetLib/MS-InsuranceScoring.git@InsolverPackage#egg=insolver"

In [4]:
import pandas as pd

In [28]:
from insolver.InsolverDataFrame import InsolverDataFrame
from insolver.InsolverTransforms import (
    TransformExp,
    InsolverTransformMain,
    InsolverTransforms,
    TransformAge,
    TransformMapValues,
    TransformPolynomizer,
    TransformAgeGender,
)

In [13]:
df = pd.read_csv('freMPL-R.csv', low_memory=False)

In [14]:
InsDataFrame = InsolverDataFrame(df)

In [15]:
InsDataFrame.head(7)

Unnamed: 0,Exposure,LicAge,RecordBeg,RecordEnd,VehAge,Gender,MariStat,SocioCateg,VehUsage,DrivAge,...,ClaimInd,Dataset,DeducType,ClaimNbResp,ClaimNbNonResp,ClaimNbParking,ClaimNbFireTheft,ClaimNbWindscreen,OutUseNb,RiskArea
0,0.583,366,2004-06-01,,2,Female,Other,CSP1,Professional,55,...,0,1,,,,,,,,
1,0.2,187,2004-10-19,,0,Male,Alone,CSP55,Private+trip to office,34,...,0,1,,,,,,,,
2,0.083,169,2004-07-16,2004-08-16,1,Female,Other,CSP1,Professional,33,...,0,1,,,,,,,,
3,0.375,170,2004-08-16,,1,Female,Other,CSP1,Professional,34,...,0,1,,,,,,,,
4,0.5,224,2004-01-01,2004-07-01,3,Male,Other,CSP47,Professional,53,...,1,1,,,,,,,,
5,0.499,230,2004-07-01,,3,Male,Other,CSP47,Professional,53,...,0,1,,,,,,,,
6,0.218,169,2004-01-01,2004-03-20,6-7,Male,Other,CSP50,Private+trip to office,32,...,1,1,,,,,,,,


In [16]:
InsDataFrame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 343080 entries, 0 to 343079
Data columns (total 31 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   Exposure           343080 non-null  float64
 1   LicAge             343080 non-null  int64  
 2   RecordBeg          343080 non-null  object 
 3   RecordEnd          181115 non-null  object 
 4   VehAge             177880 non-null  object 
 5   Gender             343080 non-null  object 
 6   MariStat           343080 non-null  object 
 7   SocioCateg         343080 non-null  object 
 8   VehUsage           343080 non-null  object 
 9   DrivAge            343080 non-null  int64  
 10  HasKmLimit         343080 non-null  int64  
 11  BonusMalus         343080 non-null  int64  
 12  VehBody            145780 non-null  object 
 13  VehPrice           145780 non-null  object 
 14  VehEngine          145780 non-null  object 
 15  VehEnergy          145780 non-null  object 
 16  Ve

In [17]:
InsDataFrame.get_meta_info()

{'type': 'InsolverDataFrame',
 'columns': Index(['Exposure', 'LicAge', 'RecordBeg', 'RecordEnd', 'VehAge', 'Gender',
        'MariStat', 'SocioCateg', 'VehUsage', 'DrivAge', 'HasKmLimit',
        'BonusMalus', 'VehBody', 'VehPrice', 'VehEngine', 'VehEnergy',
        'VehMaxSpeed', 'VehClass', 'ClaimAmount', 'RiskVar', 'Garage',
        'ClaimInd', 'Dataset', 'DeducType', 'ClaimNbResp', 'ClaimNbNonResp',
        'ClaimNbParking', 'ClaimNbFireTheft', 'ClaimNbWindscreen', 'OutUseNb',
        'RiskArea'],
       dtype='object'),
 'len': 343080}

In [18]:
@staticmethod
def exp_new(exp, exp_max):
    if pd.isnull(exp):
        exp = None
    elif exp < 0:
        exp = None
    else:
        exp = exp * 7 // 365
    if exp > exp_max:
        exp = exp_max
    return exp

TransformExp._exp = exp_new

In [22]:
class TransformSocioCateg(InsolverTransformMain):
    def __init__(self, column_socio_categ):
        self.priority = 0
        super().__init__()
        self.column_socio_categ = column_socio_categ

    def __call__(self, df):
        df[self.column_socio_categ] = df[self.column_socio_categ].str.slice(0,4)
        return df

In [23]:
class TransformToNumeric(InsolverTransformMain):
    def __init__(self, column_param, downcast='integer'):
        self.priority = 0
        super().__init__()
        self.column_param = column_param
        self.downcast = downcast

    def __call__(self, df):
        df[self.column_param] = pd.to_numeric(df[self.column_param], downcast=self.downcast)
        return df

In [29]:
InsTransforms = InsolverTransforms(InsDataFrame.get_data(), [
    TransformAge('DrivAge', 18, 75),
    TransformExp('LicAge', 57),
    TransformMapValues('Gender', {'Male':0, 'Female':1}),
    TransformMapValues('MariStat', {'Other':0, 'Alone':1}),
    TransformSocioCateg('SocioCateg'),
    TransformAgeGender('DrivAge', 'Gender', 'Age_m', 'Age_f', age_default=18, gender_male=0, gender_female=1),
    TransformPolynomizer('Age_m'),
    TransformPolynomizer('Age_f'),
    # TransformToNumeric('VehAge'),
    # T.TransformVehAge('VehAge', 20),
    ])

In [30]:
InsTransforms.transforms

[<insolver.InsolverTransforms.TransformAge at 0x7fe40b2731f0>,
 <insolver.InsolverTransforms.TransformExp at 0x7fe40b1a9370>,
 <insolver.InsolverTransforms.TransformMapValues at 0x7fe40b1a9eb0>,
 <insolver.InsolverTransforms.TransformMapValues at 0x7fe40b1a9d90>,
 <__main__.TransformSocioCateg at 0x7fe40b2128e0>,
 <insolver.InsolverTransforms.TransformAgeGender at 0x7fe40b212eb0>,
 <insolver.InsolverTransforms.TransformPolynomizer at 0x7fe40b212a00>,
 <insolver.InsolverTransforms.TransformPolynomizer at 0x7fe40b212b50>]

In [31]:
InsTransforms.transform()

['TransformSocioCateg',
 'TransformAge',
 'TransformExp',
 'TransformMapValues',
 'TransformMapValues',
 'TransformAgeGender',
 'TransformPolynomizer',
 'TransformPolynomizer']

In [32]:
InsTransforms.head(7)

Unnamed: 0,Exposure,LicAge,RecordBeg,RecordEnd,VehAge,Gender,MariStat,SocioCateg,VehUsage,DrivAge,...,ClaimNbNonResp,ClaimNbParking,ClaimNbFireTheft,ClaimNbWindscreen,OutUseNb,RiskArea,Age_m,Age_f,Age_m_2,Age_f_2
0,0.583,7,2004-06-01,,2,1,0,CSP1,Professional,55,...,,,,,,,18,55,324,3025
1,0.2,3,2004-10-19,,0,0,1,CSP5,Private+trip to office,34,...,,,,,,,34,18,1156,324
2,0.083,3,2004-07-16,2004-08-16,1,1,0,CSP1,Professional,33,...,,,,,,,18,33,324,1089
3,0.375,3,2004-08-16,,1,1,0,CSP1,Professional,34,...,,,,,,,18,34,324,1156
4,0.5,4,2004-01-01,2004-07-01,3,0,0,CSP4,Professional,53,...,,,,,,,53,18,2809,324
5,0.499,4,2004-07-01,,3,0,0,CSP4,Professional,53,...,,,,,,,53,18,2809,324
6,0.218,3,2004-01-01,2004-03-20,6-7,0,0,CSP5,Private+trip to office,32,...,,,,,,,32,18,1024,324


In [33]:
InsTransforms.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 343080 entries, 0 to 343079
Data columns (total 35 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   Exposure           343080 non-null  float64
 1   LicAge             343080 non-null  int64  
 2   RecordBeg          343080 non-null  object 
 3   RecordEnd          181115 non-null  object 
 4   VehAge             177880 non-null  object 
 5   Gender             343080 non-null  int64  
 6   MariStat           343080 non-null  int64  
 7   SocioCateg         343080 non-null  object 
 8   VehUsage           343080 non-null  object 
 9   DrivAge            343080 non-null  int64  
 10  HasKmLimit         343080 non-null  int64  
 11  BonusMalus         343080 non-null  int64  
 12  VehBody            145780 non-null  object 
 13  VehPrice           145780 non-null  object 
 14  VehEngine          145780 non-null  object 
 15  VehEnergy          145780 non-null  object 
 16  Ve

In [34]:
InsTransforms.get_meta_info()

{'type': 'InsolverDataFrame',
 'columns': Index(['Exposure', 'LicAge', 'RecordBeg', 'RecordEnd', 'VehAge', 'Gender',
        'MariStat', 'SocioCateg', 'VehUsage', 'DrivAge', 'HasKmLimit',
        'BonusMalus', 'VehBody', 'VehPrice', 'VehEngine', 'VehEnergy',
        'VehMaxSpeed', 'VehClass', 'ClaimAmount', 'RiskVar', 'Garage',
        'ClaimInd', 'Dataset', 'DeducType', 'ClaimNbResp', 'ClaimNbNonResp',
        'ClaimNbParking', 'ClaimNbFireTheft', 'ClaimNbWindscreen', 'OutUseNb',
        'RiskArea', 'Age_m', 'Age_f', 'Age_m_2', 'Age_f_2'],
       dtype='object'),
 'len': 343080}

In [35]:
InsTransforms.get_data()

Unnamed: 0,Exposure,LicAge,RecordBeg,RecordEnd,VehAge,Gender,MariStat,SocioCateg,VehUsage,DrivAge,...,ClaimNbNonResp,ClaimNbParking,ClaimNbFireTheft,ClaimNbWindscreen,OutUseNb,RiskArea,Age_m,Age_f,Age_m_2,Age_f_2
0,0.583,7,2004-06-01,,2,1,0,CSP1,Professional,55,...,,,,,,,18,55,324,3025
1,0.200,3,2004-10-19,,0,0,1,CSP5,Private+trip to office,34,...,,,,,,,34,18,1156,324
2,0.083,3,2004-07-16,2004-08-16,1,1,0,CSP1,Professional,33,...,,,,,,,18,33,324,1089
3,0.375,3,2004-08-16,,1,1,0,CSP1,Professional,34,...,,,,,,,18,34,324,1156
4,0.500,4,2004-01-01,2004-07-01,3,0,0,CSP4,Professional,53,...,,,,,,,53,18,2809,324
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
343075,0.217,5,2004-10-13,,3,0,0,CSP5,Professional,40,...,0.0,0.0,1.0,0.0,1.0,10.0,40,18,1600,324
343076,0.225,11,2004-10-10,,4,0,0,CSP6,Private,67,...,0.0,0.0,0.0,0.0,0.0,7.0,67,18,4489,324
343077,0.510,9,2004-01-01,2004-07-05,2,0,0,CSP6,Private,65,...,0.0,0.0,0.0,0.0,0.0,7.0,65,18,4225,324
343078,0.833,14,2004-03-01,,5,0,0,CSP6,Private,75,...,0.0,0.0,0.0,0.0,0.0,7.0,75,18,5625,324
