In [12]:
import polars as pl

import rustystats as rs


data = pl.read_parquet("https://raw.githubusercontent.com/PricingFrontier/pricing-data-example/917c853e256df8d5814721ab56f72889a908bb08/data/processed/frequency_set.parquet")

In [13]:
data

IDpol,VehPower,VehAge,DrivAge,BonusMalus,VehBrand,VehGas,Area,Density,Region,Group,Exposure,ClaimCount
i64,f64,f64,f64,f64,str,str,str,f64,str,str,f64,i32
2124053,5.0,1.0,31.0,60.0,"""B2""","""Diesel""","""C""",393.0,"""Centre""","""5""",0.53,0
1049168,4.0,2.0,73.0,50.0,"""B12""","""Regular""","""D""",983.0,"""Pays-de-la-Loire""","""2""",0.1,0
134313,4.0,11.0,60.0,62.0,"""B1""","""Regular""","""E""",3744.0,"""Provence-Alpes-Cotes-D'Azur""","""1""",1.0,0
1145209,7.0,9.0,37.0,50.0,"""B12""","""Regular""","""C""",204.0,"""Pays-de-la-Loire""","""2""",0.06,0
2281532,5.0,4.0,43.0,54.0,"""B1""","""Diesel""","""E""",3317.0,"""Provence-Alpes-Cotes-D'Azur""","""3""",0.5,0
…,…,…,…,…,…,…,…,…,…,…,…,…
4134506,6.0,4.0,61.0,50.0,"""B2""","""Diesel""","""C""",220.0,"""Rhone-Alpes""","""4""",1.0,0
1037983,8.0,11.0,36.0,72.0,"""B10""","""Diesel""","""C""",282.0,"""Centre""","""2""",0.04,0
3197389,7.0,11.0,50.0,50.0,"""B2""","""Diesel""","""A""",9.0,"""Centre""","""5""",1.0,0
25934,7.0,20.0,34.0,52.0,"""B1""","""Diesel""","""C""",176.0,"""Provence-Alpes-Cotes-D'Azur""","""3""",1.0,0


In [14]:
model = rs.glm(
    formula="ClaimCount ~ VehPower + VehAge + C(Area) + C(Region)",
    data=data,
    family="poisson"
).fit()

In [15]:
print(model.summary())

                                 GLM Results                                  

Family:              Poisson         No. Observations:        678012
Link Function:       (default)       Df Residuals:            677984
Method:              IRLS            Df Model:                    27
Scale:               1.0000          Iterations:                   6

Log-Likelihood:         -140874.1999 Deviance:                212748.2537
AIC:                     281804.3999 Null Deviance:           214041.4441
BIC:                     282124.3537 Pearson chi2:              716467.57
Converged:           True           

------------------------------------------------------------------------------
Variable                             Coef    Std.Err        z    P>|z|                 95% CI     
------------------------------------------------------------------------------
Intercept                         -2.7969     0.0902  -31.012  <0.0001   [ -2.9737,  -2.6202]  ***
VehPower                   

In [16]:
result = rs.glm(
    "ClaimCount ~ bs(VehAge, df=5) + ns(VehPower, df=4) + C(Region)",
    data=data,
    family="poisson",
    offset="Exposure"
).fit()

In [17]:
print(result.summary())

                                 GLM Results                                  

Family:              Poisson         No. Observations:        678012
Link Function:       (default)       Df Residuals:            677984
Method:              IRLS            Df Model:                    27
Scale:               1.0000          Iterations:                   7

Log-Likelihood:         -142538.2937 Deviance:                216076.4413
AIC:                     285132.5875 Null Deviance:           214041.4441
BIC:                     285452.5412 Pearson chi2:             1631233.18
Converged:           True           

------------------------------------------------------------------------------
Variable                             Coef    Std.Err        z    P>|z|                 95% CI     
------------------------------------------------------------------------------
Intercept                         -1.4463     0.0884  -16.365  <0.0001   [ -1.6195,  -1.2731]  ***
Region[T.Aquitaine]        

In [18]:
# Continuous × Continuous interaction (main effects + interaction)
result = rs.glm(
    "ClaimCount ~ VehAge*VehPower",  # Equivalent to Age + VehPower + Age:VehPower
    data, family="poisson", offset="Exposure"
).fit()

print(result.summary())

                                 GLM Results                                  

Family:              Poisson         No. Observations:        678012
Link Function:       (default)       Df Residuals:            678008
Method:              IRLS            Df Model:                     3
Scale:               1.0000          Iterations:                   6

Log-Likelihood:         -143853.3626 Deviance:                218706.5790
AIC:                     287714.7251 Null Deviance:           214041.4441
BIC:                     287760.4328 Pearson chi2:             1743718.16
Converged:           True           

------------------------------------------------------------------------------
Variable               Coef    Std.Err        z    P>|z|                 95% CI     
------------------------------------------------------------------------------
Intercept           -1.9977     0.0273  -73.205  <0.0001   [ -2.0512,  -1.9442]  ***
VehAge              -0.0513     0.0033  -15.588  <0.000

In [19]:
# Categorical × Continuous interaction
result = rs.glm(
    "ClaimCount ~ C(Area)*VehAge",  # Each area level has different age effect
    data, family="poisson", offset="Exposure"
).fit()

print(result.summary())

                                 GLM Results                                  

Family:              Poisson         No. Observations:        678012
Link Function:       (default)       Df Residuals:            678000
Method:              IRLS            Df Model:                    11
Scale:               1.0000          Iterations:                   6

Log-Likelihood:         -143552.3722 Deviance:                218104.5983
AIC:                     287128.7445 Null Deviance:           214041.4441
BIC:                     287265.8675 Pearson chi2:             1727075.48
Converged:           True           

------------------------------------------------------------------------------
Variable               Coef    Std.Err        z    P>|z|                 95% CI     
------------------------------------------------------------------------------
Intercept           -2.2116     0.0243  -90.924  <0.0001   [ -2.2593,  -2.1639]  ***
Area[T.B]            0.0291     0.0367    0.794   0.427

In [20]:
# Categorical × Categorical interaction
result = rs.glm(
    "ClaimCount ~ C(Area)*C(VehBrand)",
    data, family="poisson", offset="Exposure"
).fit()

print(result.summary())

                                 GLM Results                                  

Family:              Poisson         No. Observations:        678012
Link Function:       (default)       Df Residuals:            677946
Method:              IRLS            Df Model:                    65
Scale:               1.0000          Iterations:                   6

Log-Likelihood:         -143819.8831 Deviance:                218639.6201
AIC:                     287771.7662 Null Deviance:           214041.4441
BIC:                     288525.9430 Pearson chi2:             1709888.91
Converged:           True           

------------------------------------------------------------------------------
Variable                        Coef    Std.Err        z    P>|z|                 95% CI     
------------------------------------------------------------------------------
Intercept                    -2.5768     0.0264  -97.511  <0.0001   [ -2.6286,  -2.5250]  ***
Area[T.B]                     0.0711 

In [21]:
# Pure interaction (no main effects added)
result = rs.glm(
    "ClaimCount ~ VehAge + C(Area):VehPower",  # Area-specific VehPower slopes
    data, family="poisson", offset="Exposure"
).fit()

In [22]:
print(result.summary())

                                 GLM Results                                  

Family:              Poisson         No. Observations:        678012
Link Function:       (default)       Df Residuals:            678005
Method:              IRLS            Df Model:                     6
Scale:               1.0000          Iterations:                   6

Log-Likelihood:         -143616.1679 Deviance:                218232.1896
AIC:                     287246.3358 Null Deviance:           214041.4441
BIC:                     287326.3242 Pearson chi2:             1735303.91
Converged:           True           

------------------------------------------------------------------------------
Variable                 Coef    Std.Err        z    P>|z|                 95% CI     
------------------------------------------------------------------------------
Intercept             -2.1616     0.0134 -161.427  <0.0001   [ -2.1878,  -2.1354]  ***
VehAge                -0.0373     0.0010  -35.720  