In [1]:
import polars as pl

import rustystats as rs

data = pl.read_parquet("https://raw.githubusercontent.com/PricingFrontier/pricing-data-example/917c853e256df8d5814721ab56f72889a908bb08/data/processed/frequency_set.parquet")

In [9]:
train_data = data.filter(pl.col("Group") != "5")
test_data = data.filter(pl.col("Group") == "5")

In [None]:
#creates analysis/exploration.json
exploration = rs.explore_data(
    data=train_data,
    response="ClaimCount",
    categorical_factors=["Region", "Area", "VehBrand", "VehGas"],
    continuous_factors=["VehPower", "VehAge", "DrivAge", "BonusMalus" ],
    exposure="Exposure",
)

In [13]:
# fits model
model = rs.glm(
    formula="ClaimCount ~ VehPower + VehAge + C(Area) + C(Region)",
    data=train_data,
    family="negbinomial",
    offset="Exposure"
).fit()

In [14]:
print(model.summary())

                                 GLM Results                                  

Family:              NegativeBinomial No. Observations:        542055
Link Function:       (default)       Df Residuals:            542027
Method:              IRLS + Ridge    Df Model:                    27
Scale:               0.2917          Alpha (Î»):               0.0000
L1 Ratio:            0.00            Iterations:                   6
Non-zero coefs:      27             

Log-Likelihood:         -114419.6762 Deviance:                158131.7470
AIC:                     228895.3524 Null Deviance:           149116.5514
BIC:                     229209.0399 Pearson chi2:             1334514.71
Converged:           True           

------------------------------------------------------------------------------
Variable                             Coef    Std.Err        z    P>|z|                 95% CI     
------------------------------------------------------------------------------
Intercept         

In [None]:
#writes analysis/diagnostics
model.diagnostics(
    train_data=train_data,
    test_data = test_data,
    categorical_factors=["Region", "Area", "VehBrand", "VehGas"],
    continuous_factors=["VehPower", "VehAge", "DrivAge", "BonusMalus" ]
)