In [1]:
import pandas as pd

from insolver import InsolverDataFrame
from insolver.transforms import (
    InsolverTransform,
    TransformExp,
    TransformAge,
    TransformMapValues,
    TransformPolynomizer,
    TransformAgeGender,
)

pd.options.display.float_format = '{:.2f}'.format

In [2]:
df = pd.read_csv('data/df.csv', low_memory=False)

In [3]:
df = df[df.Dataset.isin([5, 6, 7, 8, 9])]
df.dropna(how='all', axis=1, inplace=True)
df = df[df.ClaimAmount > 0]


InsDataFrame = InsolverDataFrame(df)

In [4]:
@staticmethod
def new_exp(exp, exp_max):
    if pd.isnull(exp):
        exp = None
    elif exp < 0:
        exp = None
    else:
        exp = exp // 12
    if exp > exp_max:
        exp = exp_max
    return exp


TransformExp._exp = new_exp

In [5]:
# We can set up user transformations
class TransformSocioCateg:
    def __init__(self, column_socio_categ):
        self.priority = 0
        self.column_socio_categ = column_socio_categ

    def __call__(self, df):
        df[self.column_socio_categ] = df[self.column_socio_categ].str.slice(0, 4)
        return df

In [6]:
# After that we can combine all transformations into one one object
InsTransforms = InsolverTransform(
    InsDataFrame,
    [
        TransformAge('DrivAge', 18, 75),
        TransformExp('LicAge', 57),
        TransformMapValues('Gender', {'Male': 0, 'Female': 1}),
        TransformMapValues('MariStat', {'Other': 0, 'Alone': 1}),
        TransformSocioCateg('SocioCateg'),
        TransformAgeGender('DrivAge', 'Gender', 'Age_m', 'Age_f', age_default=18, gender_male=0, gender_female=1),
        TransformPolynomizer('Age_m'),
        TransformPolynomizer('Age_f'),
    ],
)

In [7]:
InsTransforms.transforms

[<insolver.transforms.InsolverTransforms.TransformAge at 0x7f968816f7c0>,
 <insolver.transforms.InsolverTransforms.TransformExp at 0x7f968816fac0>,
 <insolver.transforms.InsolverTransforms.TransformMapValues at 0x7f968816f6a0>,
 <insolver.transforms.InsolverTransforms.TransformMapValues at 0x7f968816f610>,
 <__main__.TransformSocioCateg at 0x7f968816f550>,
 <insolver.transforms.InsolverTransforms.TransformAgeGender at 0x7f968816f8e0>,
 <insolver.transforms.InsolverTransforms.TransformPolynomizer at 0x7f968816fb20>,
 <insolver.transforms.InsolverTransforms.TransformPolynomizer at 0x7f968816fc10>]

In [8]:
InsTransforms.ins_transform()






Check the order of transforms. Transforms with higher priority should be done first.




{0: {'name': 'TransformAge',
  'attributes': {'age_max': 75,
   'age_min': 18,
   'column_driver_minage': 'DrivAge',
   'priority': 1}},
 1: {'name': 'TransformExp',
  'attributes': {'column_driver_minexp': 'LicAge',
   'exp_max': 57,
   'priority': 1}},
 2: {'name': 'TransformMapValues',
  'attributes': {'column_param': 'Gender',
   'dictionary': {'Male': 0, 'Female': 1},
   'priority': 1}},
 3: {'name': 'TransformMapValues',
  'attributes': {'column_param': 'MariStat',
   'dictionary': {'Other': 0, 'Alone': 1},
   'priority': 1}},
 4: {'name': 'TransformSocioCateg',
  'attributes': {'column_socio_categ': 'SocioCateg', 'priority': 0}},
 5: {'name': 'TransformAgeGender',
  'attributes': {'age_default': 18,
   'column_age': 'DrivAge',
   'column_age_f': 'Age_f',
   'column_age_m': 'Age_m',
   'column_gender': 'Gender',
   'gender_female': 1,
   'gender_male': 0,
   'priority': 2}},
 6: {'name': 'TransformPolynomizer',
  'attributes': {'column_param': 'Age_m', 'n': 2, 'priority': 3}},
 7

In [9]:
InsTransforms.to_csv('data/df_transformed.csv', index=False)

print('Transforms done')

Transforms done


