In [1]:
import polars as pl

import rustystats as rs

data = pl.read_parquet("https://raw.githubusercontent.com/PricingFrontier/pricing-data-example/917c853e256df8d5814721ab56f72889a908bb08/data/processed/frequency_set.parquet")

In [2]:
#creates analysis/exploration.json

exploration = rs.explore_data(
    data=data,
    response="ClaimCount",
    categorical_factors=["Region", "Area", "VehBrand", "VehGas"],
    continuous_factors=["VehPower", "VehAge", "DrivAge", "BonusMalus" ],
    exposure="Exposure",
)

In [3]:
# fits model

model = rs.glm(
    formula="ClaimCount ~ VehPower + VehAge + C(Area) + C(Region)",
    data=data,
    family="negbinomial",
    offset="Exposure"
).fit()

In [4]:
#writes analysis/diagnostics
model.diagnostics(
    data=data,
    categorical_factors=["Region", "Area", "VehBrand", "VehGas"],
    continuous_factors=["VehPower", "VehAge", "DrivAge", "BonusMalus" ]
)



In [5]:
model = rs.glm(
    formula="ClaimCount ~ C(Area) + bs(VehAge, df=4)",
    data=data,
    family="negbinomial",
    offset="Exposure"
).fit()

In [6]:

model = rs.glm(
    formula="ClaimCount ~ C(Area):bs(VehAge, df=4)",
    data=data,
    family="negbinomial",
    offset="Exposure"
).fit()

In [7]:
print(model.summary())

                                 GLM Results                                  

Family:              NegativeBinomial(theta=1.2840) No. Observations:        678012
Link Function:       (default)       Df Residuals:            677996
Method:              IRLS            Df Model:                    15
Scale:               0.2904          Iterations:                   7

Log-Likelihood:         -268298.8899 Deviance:                196866.1067
AIC:                     536629.7799 Null Deviance:           191531.0335
BIC:                     536812.6106 Pearson chi2:             1747655.92
Converged:           True           

------------------------------------------------------------------------------
Variable                        Coef    Std.Err        z    P>|z|                 95% CI     
------------------------------------------------------------------------------
Intercept                    -2.1776     0.0050 -433.279  <0.0001   [ -2.1875,  -2.1678]  ***
Area[T.B]:bs(VehAge, 2

In [2]:
import time

t0 = time.time()
model1 = rs.glm("ClaimCount ~ C(Area) + bs(VehAge, df=4)", data, family="negbinomial", offset="Exposure").fit()
print(f"Additive: {time.time()-t0:.2f}s, Iterations: {model1.iterations}, Converged: {model1.converged}")

t0 = time.time()
model2 = rs.glm("ClaimCount ~ C(Area):bs(VehAge, df=4)", data, family="negbinomial", offset="Exposure").fit()
print(f"Interaction: {time.time()-t0:.2f}s, Iterations: {model2.iterations}, Converged: {model2.converged}")

Additive: 24.29s, Iterations: 25, Converged: False
Interaction: 6.25s, Iterations: 1, Converged: True


In [6]:
# More iterations
model1 = rs.glm("ClaimCount ~ C(Area) + bs(VehAge, df=4)", data, family="negbinomial", offset="Exposure").fit(max_iter=100)
print(f"Iterations: {model1.iterations}, Converged: {model1.converged}")

Iterations: 2, Converged: True


In [2]:
import time

t0 = time.time()
model1 = rs.glm("ClaimCount ~ C(Area) + bs(VehAge, df=4)", data, family="negbinomial", offset="Exposure").fit()
print(f"Additive: {time.time()-t0:.2f}s, Iterations: {model1.iterations}, Converged: {model1.converged}")

t0 = time.time()
model2 = rs.glm("ClaimCount ~ C(Area):bs(VehAge, df=4)", data, family="negbinomial", offset="Exposure").fit()
print(f"Interaction: {time.time()-t0:.2f}s, Iterations: {model2.iterations}, Converged: {model2.converged}")

Additive: 6.12s, Iterations: 1, Converged: True
Interaction: 6.46s, Iterations: 2, Converged: True
