In [1]:
!pip install mord



In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
# Si no tienes mord instalado, ejecuta: !pip install mord
from mord import LogisticIT 

# --- PASO 0: CREACIÓN DE DATOS DE EJEMPLO (Simulando accidentsFull.csv) ---
# (Solo para que el código sea ejecutable de inmediato)
data_sim = {
    'MAX_SEV_IR': [0, 1, 2, 0, 1, 0, 2, 0, 1, 0] * 10,
    'ALCHL_I': [0, 1, 1, 0, 0, 1, 1, 0, 0, 1] * 10,
    'WEATHER_R': [1, 1, 2, 2, 1, 2, 1, 1, 2, 1] * 10
}
data = pd.DataFrame(data_sim)

# --- INICIO DEL CÓDIGO DEL LIBRO (TABLA 10.5) ---

# data = pd.read_csv('accidentsFull.csv') # Descomenta si tienes el archivo
outcome = 'MAX_SEV_IR'
predictors = ['ALCHL_I', 'WEATHER_R']
y = data[outcome]
X = data[predictors]

# En el libro usan train_X, train_y = X, y para simplificar el ejemplo
train_X, train_y = X, y

# --- 1. REGRESIÓN LOGÍSTICA NOMINAL ---
print('Nominal logistic regression')
# C=1e24 se usa para evitar la regularización (l2 por defecto en sklearn)
logit = LogisticRegression(penalty="l2", solver='lbfgs', C=1e24, 
                           multi_class='multinomial')
logit.fit(train_X, train_y)

print(' intercept', logit.intercept_)
print(' coefficients', logit.coef_)
print()

# Generar probabilidades predichas
probs = logit.predict_proba(train_X)
results = pd.DataFrame({
    'actual': y, 
    'predicted': logit.predict(train_X),
    'P(0)': [p[0] for p in probs],
    'P(1)': [p[1] for p in probs],
    'P(2)': [p[2] for p in probs],
})
print("Resultados Modelo Nominal (Primeras 5 filas):")
print(results.head())
print('\n' + '-'*30 + '\n')

# --- 2. REGRESIÓN LOGÍSTICA ORDINAL ---
print('Ordinal logistic regression')
# alpha=0 para evitar la regularización
logit_ord = LogisticIT(alpha=0)
logit_ord.fit(train_X, train_y)

print(' theta (thresholds)', logit_ord.theta_)
print(' coefficients', logit_ord.coef_)
print()

# Generar probabilidades predichas
probs_ord = logit_ord.predict_proba(train_X)
results_ord = pd.DataFrame({
    'actual': y, 
    'predicted': logit_ord.predict(train_X),
    'P(0)': [p[0] for p in probs_ord],
    'P(1)': [p[1] for p in probs_ord],
    'P(2)': [p[2] for p in probs_ord],
})
print("Resultados Modelo Ordinal (Primeras 5 filas):")
print(results_ord.head())

Nominal logistic regression




 intercept [ 4.92085506  4.94532536 -9.86618042]
 coefficients [[-4.54316895 -0.09500056]
 [-4.85273088 -0.40412985]
 [ 9.39589983  0.49913041]]

Resultados Modelo Nominal (Primeras 5 filas):
   actual  predicted      P(0)      P(1)          P(2)
0       0          0  0.570688  0.429312  3.912898e-07
1       1          0  0.429588  0.237129  3.332826e-01
2       2          2  0.355799  0.144173  5.000272e-01
3       0          0  0.644233  0.355766  8.001491e-07
4       1          0  0.570688  0.429312  3.912898e-07

------------------------------

Ordinal logistic regression
 theta (thresholds) [1.19103125 1.30825767]
 coefficients [0.96460531 0.21382451]

Resultados Modelo Ordinal (Primeras 5 filas):
   actual  predicted      P(0)      P(1)      P(2)
0       0          0  0.726554  0.022662  0.250784
1       1          0  0.503150  0.029261  0.467589
2       2          2  0.449863  0.029150  0.520987
3       0          0  0.682088  0.024861  0.293052
4       1          0  0.726554  0