In [74]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from lifelines import ExponentialFitter, WeibullFitter, CoxPHFitter


In [75]:
%store -r df_cox
%store -r base_cox

In [76]:
fecha_corte = pd.Timestamp("2009-06-30")
records = []

for state, group in base_cox.groupby('Estado'):
    group = group.reset_index(drop=True)
    start_date = None
    
    for i, row in group.iterrows():
        # Detecta inicio de recesión
        if row['recession_start']:
            start_date = row['Fecha']
            
        # Detecta final de recesión
        elif row['recession_end'] and start_date is not None:
            end_date = row['Fecha']
            if end_date > fecha_corte:
                end_date = fecha_corte
                event = 0
            else:
                event = 1
            duration = (end_date.year - start_date.year) * 4 + (end_date.quarter - start_date.quarter) # en meses
            records.append([state, duration, event])
            start_date = None

    # Si el estado sigue en recesión al final del período → censurado
    if start_date is not None:
        end_date = min(group['Fecha'].max(), fecha_corte)
        duration = (end_date.year - start_date.year) * 4 + (end_date.quarter - start_date.quarter)
        records.append([state, duration, 0])
        


In [77]:
df_cox = pd.DataFrame(records, columns=['id', 'duration', 'event'])
covariables = base_cox.groupby('Estado')[['GDP', 'Personal_Income','TasaDesempleo']].mean().reset_index()
df_cox = df_cox.merge(covariables, left_on='id', right_on='Estado', how='left').drop(columns='Estado')
df_cox = df_cox.groupby('id', as_index=False).first()

df_cox

Unnamed: 0,id,duration,event,GDP,Personal_Income,TasaDesempleo
0,Alabama,6,0,12.196037,11.964237,8.009091
1,Alaska,6,0,10.863448,10.404198,7.351515
2,Arizona,6,0,12.574947,12.289487,7.875758
3,Arkansas,1,1,11.629167,11.427889,6.681818
4,California,6,0,14.582742,14.266541,9.578788
5,Colorado,5,0,12.538412,12.228448,6.684848
6,Connecticut,5,0,12.509942,12.270401,7.20303
7,Delaware,5,0,11.120425,10.504921,6.612121
8,Florida,6,0,13.672976,13.472931,8.463636
9,Georgia,6,0,13.068082,12.713747,8.266667


# Ventana de tiempó 2007Q4 a 2009Q2

In [78]:
df_cox= df_cox[df_cox['duration']>0]
df_cox

Unnamed: 0,id,duration,event,GDP,Personal_Income,TasaDesempleo
0,Alabama,6,0,12.196037,11.964237,8.009091
1,Alaska,6,0,10.863448,10.404198,7.351515
2,Arizona,6,0,12.574947,12.289487,7.875758
3,Arkansas,1,1,11.629167,11.427889,6.681818
4,California,6,0,14.582742,14.266541,9.578788
5,Colorado,5,0,12.538412,12.228448,6.684848
6,Connecticut,5,0,12.509942,12.270401,7.20303
7,Delaware,5,0,11.120425,10.504921,6.612121
8,Florida,6,0,13.672976,13.472931,8.463636
9,Georgia,6,0,13.068082,12.713747,8.266667


In [79]:
df_cox.describe()

Unnamed: 0,duration,event,GDP,Personal_Income,TasaDesempleo
count,50.0,50.0,50.0,50.0,50.0
mean,4.7,0.12,12.183424,11.895107,7.022303
std,1.446318,0.328261,1.03463,1.033115,1.582588
min,1.0,0.0,10.307443,10.138916,3.530303
25%,5.0,0.0,11.276881,11.029417,5.892424
50%,5.0,0.0,12.17232,11.935946,7.042424
75%,6.0,0.0,12.966376,12.673769,8.251515
max,6.0,1.0,14.582742,14.266541,10.99697


In [80]:
#Modelo parametrico

exp_model = ExponentialFitter()
exp_model.fit(df_cox['duration'], event_observed=df_cox['event'])
summary = exp_model.summary
exp_model.print_summary()


0,1
model,lifelines.ExponentialFitter
number of observations,50
number of events observed,6
log-likelihood,-28.01
hypothesis,lambda_ != 0

Unnamed: 0,coef,se(coef),coef lower 95%,coef upper 95%,cmp to,z,p,-log2(p)
lambda_,39.17,15.99,7.83,70.51,0.0,2.45,0.01,6.13

0,1
AIC,58.01


In [81]:
# Weibull
weibull_model = WeibullFitter()
weibull_model.fit(df_cox['duration'], event_observed=df_cox['event'])
summary = weibull_model.summary
weibull_model.print_summary()

0,1
model,lifelines.WeibullFitter
number of observations,50
number of events observed,6
log-likelihood,-27.78
hypothesis,"lambda_ != 1, rho_ != 1"

Unnamed: 0,coef,se(coef),coef lower 95%,coef upper 95%,cmp to,z,p,-log2(p)
lambda_,71.09,84.2,-93.93,236.12,1.0,0.83,0.41,1.3
rho_,0.78,0.31,0.17,1.38,1.0,-0.73,0.47,1.1

0,1
AIC,59.57


In [82]:
cox = CoxPHFitter()
cox.fit(df_cox, duration_col='duration', event_col='event', formula="GDP")
summary = cox.summary
cox.print_summary()


0,1
model,lifelines.CoxPHFitter
duration col,'duration'
event col,'event'
baseline estimation,breslow
number of observations,50
number of events observed,6
partial log-likelihood,-20.81
time fit was run,2025-11-10 14:46:59 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
GDP,-0.96,0.38,0.49,-1.92,-0.0,0.15,1.0,0.0,-1.96,0.05,4.32

0,1
Concordance,0.75
Partial AIC,43.62
log-likelihood ratio test,4.70 on 1 df
-log2(p) of ll-ratio test,5.05


In [83]:
cox = CoxPHFitter()
cox.fit(df_cox, duration_col='duration', event_col='event', formula="GDP+Personal_Income")
summary = cox.summary
cox.print_summary()

0,1
model,lifelines.CoxPHFitter
duration col,'duration'
event col,'event'
baseline estimation,breslow
number of observations,50
number of events observed,6
partial log-likelihood,-20.66
time fit was run,2025-11-10 14:46:59 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
GDP,-3.2,0.04,4.3,-11.64,5.24,0.0,187.77,0.0,-0.74,0.46,1.13
Personal_Income,2.27,9.67,4.34,-6.24,10.78,0.0,48125.15,0.0,0.52,0.6,0.73

0,1
Concordance,0.77
Partial AIC,45.32
log-likelihood ratio test,5.00 on 2 df
-log2(p) of ll-ratio test,3.61


In [84]:
cox = CoxPHFitter()
cox.fit(df_cox, duration_col='duration', event_col='event', formula="GDP+Personal_Income+TasaDesempleo")
summary = cox.summary
cox.print_summary()

0,1
model,lifelines.CoxPHFitter
duration col,'duration'
event col,'event'
baseline estimation,breslow
number of observations,50
number of events observed,6
partial log-likelihood,-18.33
time fit was run,2025-11-10 14:46:59 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
GDP,-3.68,0.03,5.04,-13.55,6.2,0.0,491.48,0.0,-0.73,0.47,1.1
Personal_Income,3.42,30.55,5.08,-6.53,13.37,0.0,640000.0,0.0,0.67,0.5,1.0
TasaDesempleo,-0.75,0.47,0.39,-1.51,0.02,0.22,1.02,0.0,-1.92,0.05,4.19

0,1
Concordance,0.83
Partial AIC,42.66
log-likelihood ratio test,9.66 on 3 df
-log2(p) of ll-ratio test,5.53


In [85]:
cox = CoxPHFitter()
cox.fit(df_cox, duration_col='duration', event_col='event', formula="GDP+TasaDesempleo")
summary = cox.summary
cox.print_summary()

0,1
model,lifelines.CoxPHFitter
duration col,'duration'
event col,'event'
baseline estimation,breslow
number of observations,50
number of events observed,6
partial log-likelihood,-18.58
time fit was run,2025-11-10 14:46:59 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
GDP,-0.29,0.75,0.57,-1.41,0.84,0.24,2.31,0.0,-0.5,0.62,0.69
TasaDesempleo,-0.73,0.48,0.39,-1.48,0.03,0.23,1.03,0.0,-1.89,0.06,4.08

0,1
Concordance,0.80
Partial AIC,41.16
log-likelihood ratio test,9.16 on 2 df
-log2(p) of ll-ratio test,6.61


# Ventana de Tiempo 2007Q4 a 2010Q2

In [86]:
%store -r df_cox

In [87]:
df_cox

Unnamed: 0,id,duration,event,GDP,Personal_Income,TasaDesempleo
0,Alabama,10,0,12.196037,11.964237,8.009091
1,Alaska,10,0,10.863448,10.404198,7.351515
2,Arizona,10,0,12.574947,12.289487,7.875758
3,Arkansas,1,1,11.629167,11.427889,6.681818
4,California,10,0,14.582742,14.266541,9.578788
5,Colorado,9,0,12.538412,12.228448,6.684848
6,Connecticut,9,0,12.509942,12.270401,7.20303
7,Delaware,9,0,11.120425,10.504921,6.612121
8,Florida,10,0,13.672976,13.472931,8.463636
9,Georgia,10,0,13.068082,12.713747,8.266667


In [88]:
fecha_corte = pd.Timestamp("2010-06-30")
records = []

for state, group in base_cox.groupby('Estado'):
    group = group.reset_index(drop=True)
    start_date = None
    
    for i, row in group.iterrows():
        # Detecta inicio de recesión
        if row['recession_start']:
            start_date = row['Fecha']
            
        # Detecta final de recesión
        elif row['recession_end'] and start_date is not None:
            end_date = row['Fecha']
            if end_date > fecha_corte:
                end_date = fecha_corte
                event = 0
            else:
                event = 1
            duration = (end_date.year - start_date.year) * 4 + (end_date.quarter - start_date.quarter) # en meses
            records.append([state, duration, event])
            start_date = None

    # Si el estado sigue en recesión al final del período → censurado
    if start_date is not None:
        end_date = min(group['Fecha'].max(), fecha_corte)
        duration = (end_date.year - start_date.year) * 4 + (end_date.quarter - start_date.quarter)
        records.append([state, duration, 0])

In [89]:
df_cox = pd.DataFrame(records, columns=['id', 'duration', 'event'])
covariables = base_cox.groupby('Estado')[['GDP', 'Personal_Income','TasaDesempleo']].mean().reset_index()
df_cox = df_cox.merge(covariables, left_on='id', right_on='Estado', how='left').drop(columns='Estado')
df_cox = df_cox.groupby('id', as_index=False).first()
df_cox

Unnamed: 0,id,duration,event,GDP,Personal_Income,TasaDesempleo
0,Alabama,10,0,12.196037,11.964237,8.009091
1,Alaska,10,0,10.863448,10.404198,7.351515
2,Arizona,10,0,12.574947,12.289487,7.875758
3,Arkansas,1,1,11.629167,11.427889,6.681818
4,California,10,0,14.582742,14.266541,9.578788
5,Colorado,9,0,12.538412,12.228448,6.684848
6,Connecticut,9,0,12.509942,12.270401,7.20303
7,Delaware,9,0,11.120425,10.504921,6.612121
8,Florida,10,0,13.672976,13.472931,8.463636
9,Georgia,10,0,13.068082,12.713747,8.266667


In [90]:
df_cox = pd.DataFrame(records, columns=['id', 'duration', 'event'])
covariables = base_cox.groupby('Estado')[['GDP', 'Personal_Income','TasaDesempleo']].mean().reset_index()
df_cox = df_cox.merge(covariables, left_on='id', right_on='Estado', how='left').drop(columns='Estado')
df_cox = df_cox.groupby('id', as_index=False).first()

df_cox

Unnamed: 0,id,duration,event,GDP,Personal_Income,TasaDesempleo
0,Alabama,10,0,12.196037,11.964237,8.009091
1,Alaska,10,0,10.863448,10.404198,7.351515
2,Arizona,10,0,12.574947,12.289487,7.875758
3,Arkansas,1,1,11.629167,11.427889,6.681818
4,California,10,0,14.582742,14.266541,9.578788
5,Colorado,9,0,12.538412,12.228448,6.684848
6,Connecticut,9,0,12.509942,12.270401,7.20303
7,Delaware,9,0,11.120425,10.504921,6.612121
8,Florida,10,0,13.672976,13.472931,8.463636
9,Georgia,10,0,13.068082,12.713747,8.266667


In [91]:
%store df_cox

Stored 'df_cox' (DataFrame)


In [92]:
#Modelo parametrico

exp_model = ExponentialFitter()
exp_model.fit(df_cox['duration'], event_observed=df_cox['event'])
summary = exp_model.summary
exp_model.print_summary()


0,1
model,lifelines.ExponentialFitter
number of observations,50
number of events observed,10
log-likelihood,-47.16
hypothesis,lambda_ != 0

Unnamed: 0,coef,se(coef),coef lower 95%,coef upper 95%,cmp to,z,p,-log2(p)
lambda_,41.1,13.0,15.63,66.57,0.0,3.16,<0.005,9.32

0,1
AIC,96.32


In [93]:
# Weibull
weibull_model = WeibullFitter()
weibull_model.fit(df_cox['duration'], event_observed=df_cox['event'])
summary = weibull_model.summary
weibull_model.print_summary()

0,1
model,lifelines.WeibullFitter
number of observations,50
number of events observed,10
log-likelihood,-47.06
hypothesis,"lambda_ != 1, rho_ != 1"

Unnamed: 0,coef,se(coef),coef lower 95%,coef upper 95%,cmp to,z,p,-log2(p)
lambda_,51.01,33.07,-13.81,115.83,1.0,1.51,0.13,2.94
rho_,0.88,0.27,0.35,1.4,1.0,-0.46,0.65,0.63

0,1
AIC,98.13


In [94]:
cox = CoxPHFitter()
cox.fit(df_cox, duration_col='duration', event_col='event', formula="GDP")
summary = cox.summary
cox.print_summary()

0,1
model,lifelines.CoxPHFitter
duration col,'duration'
event col,'event'
baseline estimation,breslow
number of observations,50
number of events observed,10
partial log-likelihood,-33.87
time fit was run,2025-11-10 14:47:00 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
GDP,-0.68,0.51,0.34,-1.35,-0.0,0.26,1.0,0.0,-1.97,0.05,4.36

0,1
Concordance,0.73
Partial AIC,69.73
log-likelihood ratio test,4.38 on 1 df
-log2(p) of ll-ratio test,4.78


In [95]:
cox = CoxPHFitter()
cox.fit(df_cox, duration_col='duration', event_col='event', formula="GDP+Personal_Income")
summary = cox.summary
cox.print_summary()

0,1
model,lifelines.CoxPHFitter
duration col,'duration'
event col,'event'
baseline estimation,breslow
number of observations,50
number of events observed,10
partial log-likelihood,-33.27
time fit was run,2025-11-10 14:47:01 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
GDP,-4.6,0.01,3.93,-12.31,3.11,0.0,22.44,0.0,-1.17,0.24,2.04
Personal_Income,3.98,53.34,4.0,-3.86,11.81,0.02,135000.0,0.0,0.99,0.32,1.64

0,1
Concordance,0.77
Partial AIC,70.54
log-likelihood ratio test,5.56 on 2 df
-log2(p) of ll-ratio test,4.01


In [96]:
cox = CoxPHFitter()
cox.fit(df_cox, duration_col='duration', event_col='event', formula="GDP+Personal_Income+TasaDesempleo")
summary = cox.summary
cox.print_summary()

0,1
model,lifelines.CoxPHFitter
duration col,'duration'
event col,'event'
baseline estimation,breslow
number of observations,50
number of events observed,10
partial log-likelihood,-28.49
time fit was run,2025-11-10 14:47:01 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
GDP,-4.76,0.01,4.16,-12.92,3.39,0.0,29.73,0.0,-1.14,0.25,1.99
Personal_Income,4.82,123.58,4.22,-3.45,13.09,0.03,482000.0,0.0,1.14,0.25,1.98
TasaDesempleo,-0.91,0.4,0.34,-1.58,-0.24,0.21,0.79,0.0,-2.67,0.01,7.04

0,1
Concordance,0.83
Partial AIC,62.97
log-likelihood ratio test,15.13 on 3 df
-log2(p) of ll-ratio test,9.19


In [97]:
cox = CoxPHFitter()
cox.fit(df_cox, duration_col='duration', event_col='event', formula="GDP+TasaDesempleo")
summary = cox.summary
cox.print_summary()

0,1
model,lifelines.CoxPHFitter
duration col,'duration'
event col,'event'
baseline estimation,breslow
number of observations,50
number of events observed,10
partial log-likelihood,-29.21
time fit was run,2025-11-10 14:47:01 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
GDP,-0.0,1.0,0.42,-0.82,0.81,0.44,2.26,0.0,-0.01,1.0,0.01
TasaDesempleo,-0.88,0.41,0.33,-1.54,-0.23,0.22,0.79,0.0,-2.66,0.01,7.02

0,1
Concordance,0.80
Partial AIC,62.42
log-likelihood ratio test,13.69 on 2 df
-log2(p) of ll-ratio test,9.87
