# Example: Fit DMetrics

This notebook loads the bundled metrics data

In [1]:
import importlib.resources as resources

import pandas as pd
import numpy as np

from pysrat.nhpp.data import DMetricsData
from pysrat.nhpp.models.dglm import dGLMLogit, dGLMProbit, dGLMCloglog

In [2]:
data_path = resources.files("pysrat").joinpath("datasets/dmetrics/dmetrics1.csv")
df = pd.read_csv(data_path)
df

Unnamed: 0,day,fault,tc,ctc,cov,ccov
0,1,3,5,5,0.018,0.018
1,2,16,175,180,0.409,0.427
2,3,9,186,366,0.066,0.493
3,4,3,67,433,0.132,0.625
4,5,2,14,447,0.013,0.638
5,6,0,54,501,0.055,0.693
6,7,2,28,529,0.021,0.714
7,8,1,15,544,0.018,0.732
8,9,1,23,567,0.024,0.756
9,10,2,17,584,0.026,0.782


In [4]:
data = DMetricsData.from_dataframe(df, fault='fault', metrics=df.columns.drop('fault').tolist())

In [5]:
models = {
    "logit": dGLMLogit(),
    "probit": dGLMProbit(),
    "cloglog": dGLMCloglog(),
}

for model in models.values():
    model.fit(data, verbose=True)

  from .autonotebook import tqdm as notebook_tqdm
                                                                     

In [6]:
summary = []
for name, model in models.items():
    summary.append({
        "name": name,
        "params_": model.params_.copy(),
        "aic_": float(model.aic_),
    })

summary

[{'name': 'logit',
  'params_': array([ 6.60331317e+01, -3.67104467e+00,  5.22474322e-01,  1.77179246e-02,
         -6.34912977e-03, -4.57378491e-01, -4.23183289e-01]),
  'aic_': 82.89452065150746},
 {'name': 'probit',
  'params_': array([ 6.61238755e+01, -1.94479253e+00,  2.17540585e-01,  7.84299151e-03,
         -5.45702469e-04,  8.43977501e-01, -1.74933953e+00]),
  'aic_': 84.77255949216769},
 {'name': 'cloglog',
  'params_': array([ 6.60006949e+01, -3.72501426e+00,  5.52901578e-01,  1.81100945e-02,
         -9.24633065e-03, -1.70250033e+00,  1.44403488e+00]),
  'aic_': 81.44716274407051}]

In [7]:
models['logit'].params_dict_

{'omega': 66.03313173117347,
 'intercept': -3.6710446707999123,
 'day': 0.5224743216701413,
 'tc': 0.017717924618563557,
 'ctc': -0.006349129772215396,
 'cov': -0.4573784912351742,
 'ccov': -0.4231832893358709}

In [8]:
models['logit'].dmvf(range(1,5))

array([ 2.82594148, 16.55453642,  9.4475872 ,  1.12486294])

In [17]:
df['fault']

0      3
1     16
2      9
3      3
4      2
5      0
6      2
7      1
8      1
9      2
10     9
11     3
12     2
13     1
14     2
15     7
16     3
17     0
18     0
19     0
Name: fault, dtype: int64