In [1]:
import polars as pl

import rustystats as rs

data = pl.read_parquet("https://raw.githubusercontent.com/PricingFrontier/pricing-data-example/917c853e256df8d5814721ab56f72889a908bb08/data/processed/frequency_set.parquet")

In [2]:
#creates analysis/exploration.json

exploration = rs.explore_data(
    data=data,
    response="ClaimCount",
    categorical_factors=["Region", "Area", "VehBrand", "VehGas"],
    continuous_factors=["VehPower", "VehAge", "DrivAge", "BonusMalus" ],
    exposure="Exposure",
)

KeyboardInterrupt: 

In [None]:
# fits model

model = rs.glm(
    formula="ClaimCount ~ VehPower + VehAge + C(Area) + C(Region)",
    data=data,
    family="negbinomial",
    offset="Exposure"
).fit()



In [None]:
model = rs.glm(
    formula="ClaimCount ~ C(Area) + bs(VehAge, df=4)",
    data=data,
    family="negbinomial",
    offset="Exposure"
).fit()

In [3]:

model = rs.glm(
    formula="ClaimCount ~ C(Region) + VehPower + bs(VehAge, df=4) + bs(DrivAge, df=4) + BonusMalus + TE(Area) + TE(VehBrand)",
    data=data,
    family="negbinomial",
    offset="Exposure"
).fit()

In [4]:
print(model.summary())

                                 GLM Results                                  

Family:              NegativeBinomial(theta=2.1124) No. Observations:        678012
Link Function:       (default)       Df Residuals:            677981
Method:              IRLS            Df Model:                    30
Scale:               0.2926          Iterations:                  10

Log-Likelihood:         -269754.2121 Deviance:                198408.7541
AIC:                     539570.4243 Null Deviance:           199142.5592
BIC:                     539924.6588 Pearson chi2:             2408052.55
Converged:           True           

------------------------------------------------------------------------------
Variable                             Coef    Std.Err        z    P>|z|                 95% CI     
------------------------------------------------------------------------------
Intercept                         -4.2205     0.0584  -72.216  <0.0001   [ -4.3351,  -4.1060]  ***
Region[T.Aqu

In [5]:
#writes analysis/diagnostics
model.diagnostics(
    data=data,
    categorical_factors=["Region", "Area", "VehBrand", "VehGas"],
    continuous_factors=["VehPower", "VehAge", "DrivAge", "BonusMalus" ]
)

