In [1]:
# Ver repaso primeros minutos.

In [8]:
import pandas as pd
pd.set_option('display.max_columns', None)
import pyreadstat as st
import numpy as np
import matplotlib.pyplot as plt


path = r"C:\Users\HP\OneDrive\Escritorio\David Guzzi\DiTella\MEC\Materias\2025\2025 1T\[MT09] Econometría de Datos de Panel\Clases prácticas\PS 3-20250531\data\mod_abdata.dta"

df, meta = st.read_dta(path)
df['year'] = df['year'].astype('int')
df.head(1)

Unnamed: 0,c1,ind,year,emp,wage,cap,indoutpt,n,w,k,ys,rec,yearm1,id,nL1,nL2,wL1,kL1,kL2,ysL1,ysL2,yr1977,yr1978,yr1979,yr1980,yr1981,yr1982
0,1-1,7.0,1977,5.041,13.1516,0.5894,95.707199,1.617604,2.576543,-0.52865,4.561294,1.0,1977.0,1.0,,,,,,,,1,0,0,0,0,0


In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 828 entries, 0 to 827
Data columns (total 27 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   c1        828 non-null    object 
 1   ind       828 non-null    float64
 2   year      828 non-null    int64  
 3   emp       828 non-null    float64
 4   wage      828 non-null    float64
 5   cap       828 non-null    float64
 6   indoutpt  828 non-null    float64
 7   n         828 non-null    float64
 8   w         828 non-null    float64
 9   k         828 non-null    float64
 10  ys        828 non-null    float64
 11  rec       828 non-null    float64
 12  yearm1    828 non-null    float64
 13  id        828 non-null    float64
 14  nL1       770 non-null    float64
 15  nL2       632 non-null    float64
 16  wL1       770 non-null    float64
 17  kL1       770 non-null    float64
 18  kL2       632 non-null    float64
 19  ysL1      770 non-null    float64
 20  ysL2      632 non-null    float6

#### **a) POLS con errores estándar robustos.**

In [31]:
import statsmodels.api as sm
from linearmodels.panel import PooledOLS


df_1 = df.set_index(['id', 'year'])

# X: todas las explicativas, incluyendo las yr*
X_vars = ['nL1', 'nL2', 'w', 'wL1', 'k', 'kL1', 'kL2', 'ys', 'ysL1', 'ysL2']
yr_vars = sorted([col for col in df_1.columns if col.startswith('yr')])
yr_vars = yr_vars[1:-1]  # eliminar la primera y última dummy automáticamente
X = df_1[X_vars + yr_vars]

# Agregar constante
X = sm.add_constant(X)

# Variable dependiente
y = df_1['n']

model = PooledOLS(y, X)
results = model.fit(cov_type='clustered', cluster_entity=True)

print(results.summary)

                          PooledOLS Estimation Summary                          
Dep. Variable:                      n   R-squared:                        0.9948
Estimator:                  PooledOLS   R-squared (Between):              0.9989
No. Observations:                 632   R-squared (Within):               0.7224
Date:                Sat, Jun 28 2025   R-squared (Overall):              0.9948
Time:                        19:44:36   Log-likelihood                    573.37
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      8456.1
Entities:                         138   P-value                           0.0000
Avg Obs:                       4.5797   Distribution:                  F(14,617)
Min Obs:                       4.0000                                           
Max Obs:                       5.0000   F-statistic (robust):          1.513e+04
                            

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


#### **b) FE con errores estándar robustos.**

In [None]:
from linearmodels.panel import PanelOLS

# Asegurarse de tener panel indexado
df_1 = df.set_index(['id', 'year'])

# Variables explicativas (sin colinearidad perfecta)
X_vars = ['nL1', 'nL2', 'w', 'wL1', 'k', 'kL1', 'kL2', 'ys', 'ysL1', 'ysL2']
yr_vars = sorted([col for col in df_1.columns if col.startswith('yr')])
yr_vars = yr_vars[1:-1]  # omitir la primera y última dummy de año
X = df_1[X_vars + yr_vars]

# Variable dependiente
y = df_1['n']

# No agregar constante: PanelOLS con entity_effects=True ya incluye efectos fijos
model = PanelOLS(y, X, entity_effects=True)
results = model.fit(cov_type='clustered', cluster_entity=True)

print(results.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:                      n   R-squared:                        0.7708
Estimator:                   PanelOLS   R-squared (Between):              0.4470
No. Observations:                 632   R-squared (Within):               0.7708
Date:                Sat, Jun 28 2025   R-squared (Overall):              0.4530
Time:                        19:48:07   Log-likelihood                    709.13
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      115.31
Entities:                         138   P-value                           0.0000
Avg Obs:                       4.5797   Distribution:                  F(14,480)
Min Obs:                       4.0000                                           
Max Obs:                       5.0000   F-statistic (robust):             128.97
                            

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


#### **c) A-H.**

In [33]:
from linearmodels.iv import IV2SLS

# 1. Crear primeras diferencias
df_diff = df.sort_values(['id', 'year']).copy()

vars_diff = ['n', 'nL1', 'nL2', 'w', 'wL1', 'k', 'kL1', 'kL2', 'ys', 'ysL1', 'ysL2']
for var in vars_diff:
    df_diff[f'D_{var}'] = df_diff.groupby('id')[var].diff()

# dummies de año también en diferencias
yr_vars = sorted([col for col in df.columns if col.startswith('yr')])
yr_vars = yr_vars[1:-1]  # excluir una al inicio y fin
for col in yr_vars:
    df_diff[f'D_{col}'] = df_diff.groupby('id')[col].diff()

# Eliminar filas con NaN (producto de las diferencias)
df_diff = df_diff.dropna(subset=[f'D_{var}' for var in ['n', 'nL1', 'nL2']])

# 2. Definir variables
y = df_diff['D_n']
endog = df_diff['D_nL1']
instrument = df_diff['nL2']  # sin D, como en Stata
exog_vars = ['D_nL2', 'D_w', 'D_wL1', 'D_k', 'D_kL1', 'D_kL2', 'D_ys', 'D_ysL1', 'D_ysL2'] + [f'D_{col}' for col in yr_vars]
exog = df_diff[exog_vars]

# 3. Ajustar modelo IV
iv_model = IV2SLS(dependent=y, exog=exog, endog=endog, instruments=instrument)
iv_results = iv_model.fit(cov_type='robust')  # también podrías usar 'clustered' si quisieras

# 4. Mostrar resumen
print(iv_results.summary)

                          IV-2SLS Estimation Summary                          
Dep. Variable:                    D_n   R-squared:                     -13.610
Estimator:                    IV-2SLS   Adj. R-squared:                -14.006
No. Observations:                 494   F-statistic:                    23.516
Date:                Sat, Jun 28 2025   P-value (F-stat)                0.0524
Time:                        19:51:46   Distribution:                 chi2(14)
Cov. Estimator:                robust                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
D_nL2         -0.9847     2.0197    -0.4875     0.6259     -4.9432      2.9738
D_w           -0.3323     0.5334    -0.6230     0.53

#### **d) A-B one-step GMM.**

In [17]:
# Ordenar por panel
df_sorted = df.sort_values(['id', 'year']).copy()

# Eliminar filas con cualquier valor faltante en las variables necesarias
cols_needed = ['n', 'w', 'k', 'ys']
for col in cols_needed:
    for l in range(3):  # lag 0, 1, 2
        lag_col = f'{col}L{l}' if l > 0 else col
        if lag_col not in df_sorted.columns:
            continue
        df_sorted = df_sorted[df_sorted[lag_col].notna()]

In [18]:
from pydynpd import regression

cmd = 'n nL1 nL2 w wL1 k kL1 kL2 ys ysL1 ysL2 yr1977 yr1978 yr1979 yr1980 yr1981 yr1982 | gmm(n, 2:5) | onestep nolevel'

In [20]:
from pydynpd import regression

# Ordenar y limpiar
df_sorted = df.sort_values(['id', 'year']).copy()

# Generar lags si no existen
df_sorted['nL1'] = df_sorted.groupby('id')['n'].shift(1)
df_sorted['nL2'] = df_sorted.groupby('id')['n'].shift(2)
df_sorted['wL1'] = df_sorted.groupby('id')['w'].shift(1)
df_sorted['kL1'] = df_sorted.groupby('id')['k'].shift(1)
df_sorted['kL2'] = df_sorted.groupby('id')['k'].shift(2)
df_sorted['ysL1'] = df_sorted.groupby('id')['ys'].shift(1)
df_sorted['ysL2'] = df_sorted.groupby('id')['ys'].shift(2)

# Eliminar NAs generados por los lags
df_clean = df_sorted.dropna(subset=[
    'n', 'nL1', 'nL2', 'w', 'wL1', 'k', 'kL1', 'kL2', 'ys', 'ysL1', 'ysL2'
])

# Modelo
cmd = 'n nL1 nL2 w wL1 k kL1 kL2 ys ysL1 ysL2 yr1977 yr1978 yr1979 yr1980 yr1981 yr1982 | gmm(n, 2:5) | onestep nolevel'
dpd = regression.abond(cmd, df_clean, ['id', 'year'])

# Mostrar resultados
print(dpd.models[0].regression_table)


lagged dependent variable should be included


SystemExit: 

In [16]:
import pandas as pd
from pydynpd import regression

# Ordenar y limpiar
df_sorted = df.sort_values(['id', 'year']).copy()
df_sorted = df_sorted.dropna(subset=['n'])

# Chequear que cada grupo tenga suficientes años
obs_por_id = df_sorted.groupby('id')['year'].count()
df_sorted = df_sorted[df_sorted['id'].isin(obs_por_id[obs_por_id >= 6].index)]

# Especificación más conservadora
cmd = 'n L1.n | gmm(n, 2:3) pred(w k ys) | onestep nolevel'

# Estimar
dpd = regression.abond(cmd, df_sorted, ['id', 'year'])

# Resultados
print(dpd.models[0].regression_table)


OverflowError: Python int too large to convert to C long

In [14]:
import pandas as pd
from pydynpd import regression

# Sort and clean your data
df_sorted = df.sort_values(['id', 'year']).copy()
df_sorted = df_sorted.dropna(subset=['n'])

# Correct command string based on your variables
command_str = 'n L1.n L2.n w k ys yr1977 yr1978 yr1979 yr1980 yr1981 yr1982 | gmm(n, 2:5) pred(w k ys) | onestep nolevel'

# Run the estimation
dpd = regression.abond(command_str, df_sorted, ['id', 'year'])

# Show results
print(dpd.models[0].regression_table)

OverflowError: Python int too large to convert to C long

In [4]:
from pydynpd import regression

# Lista de columnas con lags que queremos borrar
cols_to_drop = [col for col in df.columns if any(lag in col for lag in ['L1', 'L2'])]

# Eliminar esas columnas
df_clean = df.drop(columns=cols_to_drop)

# Paso 1: asegurarse de que el DataFrame esté ordenado por id y year
df_sorted = df.sort_values(by=['id', 'year']).copy()

# Paso 2: definir el comando de regresión
# Dependiente: n
# Regressors en diferencias: L.n L2.n w L.w L(0/2).(k ys)
# Instrumentos GMM: L.n (con lags 2 a 5)
# Instrumentos IV: w, L.w, L.k, L.ys, yr*
# Solo Difference GMM: nolevel
command_str = 'n L(1:2).n w L.w L(0/2).(k ys) yr* | gmm(n, 2:5) | timedumm nolevel'

# Paso 3: ejecutar la estimación tipo Arellano-Bond (one-step Difference GMM)
model = regression.abond(command_str, df_sorted, ['id', 'year'])

# Paso 4: mostrar los resultados
print(model.summary_table)

n L(1:2).n w L.w L(0/2).(k ys) yr* :  variable L.w does not exist


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [4]:
from pydynpd.panel_data import panel_data
from pydynpd.variable import regular_variable
from pydynpd.info import options_info
from pydynpd.dynamic_panel_model import dynamic_panel_model

In [5]:
# Asumimos que ya tenés el DataFrame 'df' cargado
# Debe tener columnas: id, year, n, w, k, ys, yr1977, ..., yr1982

df_sorted = df.sort_values(['id', 'year']).copy()

# Variables dependiente + regresores
dep_indep = [
    regular_variable('n', 0),    # dependiente
    regular_variable('n', 1),    # L.n
    regular_variable('n', 2),    # L2.n
    regular_variable('w', 0),
    regular_variable('w', 1),
    regular_variable('k', 0),
    regular_variable('k', 1),
    regular_variable('k', 2),
    regular_variable('ys', 0),
    regular_variable('ys', 1),
    regular_variable('ys', 2),
]

# Dummies anuales
yr_dummies = [regular_variable(col, 0) for col in ['yr1977', 'yr1978', 'yr1979', 'yr1980', 'yr1981', 'yr1982']]
dep_indep.extend(yr_dummies)

# GMM instruments: L(2/5).n
gmm_var = regular_variable('n', 1)
gmm_var.min_lag = 2
gmm_var.max_lag = 5
Dgmm = [gmm_var]

# IV estándar
iv = [
    regular_variable('n', 2),
    regular_variable('w', 0), regular_variable('w', 1),
    regular_variable('k', 0), regular_variable('k', 1), regular_variable('k', 2),
    regular_variable('ys', 0), regular_variable('ys', 1), regular_variable('ys', 2),
] + yr_dummies

# Diccionario de variables
variables = {'dep_indep': dep_indep, 'Dgmm': Dgmm, 'Lgmm': [], 'iv': iv}

In [6]:
options = options_info()
options.level = False               # difference GMM (no system)
options.transformation = 'fd'       # first-differences
options.steps = 1                   # one-step
options.timedumm = True             # incluye dummies de tiempo

ids = df_sorted['id'].values
time = df_sorted['year'].values

pdata = panel_data(
    df=df_sorted,
    identifiers=('id', 'year'),  # <- corregido: nombres de columnas, no arrays
    variables=variables,
    options=options
)

In [13]:
model = dynamic_panel_model(
    pdata=pdata,
    variables=variables,
    options=options,
    command_str='n L.n L2.n w L.w L(0/2).(k ys) yr*',
    part_2='gmm(L.n)',
    part_3='iv(w L.w L(0/2).(k ys) yr*)'
)

In [18]:
import numpy as np
from scipy.linalg import inv

# === Estimador one-step robust (definido manualmente)
def one_step(model):
    Cy = model.final_xy_tables['Cy']
    Cx = model.final_xy_tables['Cx']
    Z = model._z_t_list  # Ya tiene la forma (N_obs, L)

    # Matriz de proyección: Pz = Z(Z'Z)^(-1)Z'
    ZZ_inv = inv(Z.T @ Z)
    PZ = Z @ ZZ_inv @ Z.T

    # Matriz GMM: (X'PZ X)^(-1) X'PZ y
    beta = inv(Cx.T @ PZ @ Cx) @ (Cx.T @ PZ @ Cy)

    # Predicciones y residuos
    y_hat = Cx @ beta
    resid = Cy - y_hat

    # Matriz de varianza robusta tipo White: (X'PZ X)^(-1) X'PZ Ω PZ X (X'PZ X)^(-1)
    Omega_hat = np.zeros((Z.shape[1], Z.shape[1]))
    for i in range(model.N):
        zi = Z[i * model.z_information.diff_width:(i + 1) * model.z_information.diff_width, :]
        ui = resid[i * model.z_information.diff_width:(i + 1) * model.z_information.diff_width, :]
        Omega_hat += (zi.T @ ui) @ (ui.T @ zi)

    Omega_hat = Omega_hat / model.N
    cov_beta = inv(Cx.T @ PZ @ Cx) @ (Cx.T @ Z @ ZZ_inv @ Omega_hat @ ZZ_inv @ Z.T @ Cx) @ inv(Cx.T @ PZ @ Cx)
    std_err = np.sqrt(np.diag(cov_beta)).reshape(-1, 1)

    # Resultado como objeto simple
    class Result:
        def __init__(self, beta, std_err):
            self.beta = beta
            self.std_err = std_err

    return Result(beta=beta, std_err=std_err)

# === Estimar y almacenar resultados
result = one_step(model)
model.step_results = [result]
model.form_regression_table()

# === Mostrar resultados tipo xtabond2
print(model.regression_table)

LinAlgError: singular matrix

In [23]:
# Variables dependiente y explicativas (como en xtabond2)
dep_indep = [
    regular_variable('n', 0),     # n
    regular_variable('n', 1),     # L.n
    regular_variable('n', 2),     # L2.n
    regular_variable('w', 0),
    regular_variable('w', 1),
    regular_variable('k', 0),
    regular_variable('k', 1),
    regular_variable('k', 2),
    regular_variable('ys', 0),
    regular_variable('ys', 1),
    regular_variable('ys', 2),
]

# Dummies de año
yr_cols = ['yr1977', 'yr1978', 'yr1979', 'yr1980', 'yr1981', 'yr1982']
yr_dummies = [regular_variable(col, 0) for col in yr_cols]
dep_indep.extend(yr_dummies)

# Instrumentos GMM (solo L.n → con lags 2 a 5)
Dgmm = [
    regular_variable('n', 2),
    regular_variable('n', 3),
    regular_variable('n', 4),
    regular_variable('n', 5),
]

# Instrumentos estándar (IV)
iv = [
    regular_variable('n', 2),      # L2.n
    regular_variable('w', 0),
    regular_variable('w', 1),
    regular_variable('k', 0),
    regular_variable('k', 1),
    regular_variable('k', 2),
    regular_variable('ys', 0),
    regular_variable('ys', 1),
    regular_variable('ys', 2),
] + yr_dummies

variables = {
    'dep_indep': dep_indep,
    'Dgmm': Dgmm,
    'Lgmm': [],     # No usamos system-GMM
    'iv': iv
}

In [None]:
# Crear el modelo GMM en diferencias (nivel=False), one-step (steps=1)
model = dynamic_panel_model(
    pdata=pdata,
    variables=variables,
    options=options,
    command_str='n L.n L2.n w L.w L(0/2).(k ys) yr*',
    part_2='gmm(L.n)',
    part_3='iv(w L.w L(0/2).(k ys) yr*)'
)

KeyError: array([  1.,   1.,   1.,   1.,   1.,   1.,   2.,   2.,   2.,   2.,   2.,
         2.,   3.,   3.,   3.,   3.,   3.,   3.,   4.,   4.,   4.,   4.,
         4.,   4.,   5.,   5.,   5.,   5.,   5.,   5.,   6.,   6.,   6.,
         6.,   6.,   6.,   7.,   7.,   7.,   7.,   7.,   7.,   8.,   8.,
         8.,   8.,   8.,   8.,   9.,   9.,   9.,   9.,   9.,   9.,  10.,
        10.,  10.,  10.,  10.,  10.,  11.,  11.,  11.,  11.,  11.,  11.,
        12.,  12.,  12.,  12.,  12.,  12.,  13.,  13.,  13.,  13.,  13.,
        13.,  15.,  15.,  15.,  15.,  15.,  15.,  16.,  16.,  16.,  16.,
        16.,  16.,  17.,  17.,  17.,  17.,  17.,  17.,  18.,  18.,  18.,
        18.,  18.,  18.,  19.,  19.,  19.,  19.,  19.,  19.,  20.,  20.,
        20.,  20.,  20.,  20.,  21.,  21.,  21.,  21.,  21.,  21.,  22.,
        22.,  22.,  22.,  22.,  22.,  23.,  23.,  23.,  23.,  23.,  23.,
        24.,  24.,  24.,  24.,  24.,  24.,  25.,  25.,  25.,  25.,  25.,
        25.,  26.,  26.,  26.,  26.,  26.,  26.,  28.,  28.,  28.,  28.,
        28.,  28.,  29.,  29.,  29.,  29.,  29.,  29.,  30.,  30.,  30.,
        30.,  30.,  30.,  31.,  31.,  31.,  31.,  31.,  31.,  32.,  32.,
        32.,  32.,  32.,  32.,  33.,  33.,  33.,  33.,  33.,  33.,  34.,
        34.,  34.,  34.,  34.,  34.,  35.,  35.,  35.,  35.,  35.,  35.,
        36.,  36.,  36.,  36.,  36.,  36.,  37.,  37.,  37.,  37.,  37.,
        37.,  38.,  38.,  38.,  38.,  38.,  38.,  39.,  39.,  39.,  39.,
        39.,  39.,  40.,  40.,  40.,  40.,  40.,  40.,  41.,  41.,  41.,
        41.,  41.,  41.,  42.,  42.,  42.,  42.,  42.,  42.,  43.,  43.,
        43.,  43.,  43.,  43.,  44.,  44.,  44.,  44.,  44.,  44.,  45.,
        45.,  45.,  45.,  45.,  45.,  46.,  46.,  46.,  46.,  46.,  46.,
        47.,  47.,  47.,  47.,  47.,  47.,  48.,  48.,  48.,  48.,  48.,
        48.,  49.,  49.,  49.,  49.,  49.,  49.,  50.,  50.,  50.,  50.,
        50.,  50.,  51.,  51.,  51.,  51.,  51.,  51.,  52.,  52.,  52.,
        52.,  52.,  52.,  53.,  53.,  53.,  53.,  53.,  53.,  54.,  54.,
        54.,  54.,  54.,  54.,  55.,  55.,  55.,  55.,  55.,  55.,  56.,
        56.,  56.,  56.,  56.,  56.,  57.,  57.,  57.,  57.,  57.,  57.,
        58.,  58.,  58.,  58.,  58.,  58.,  59.,  59.,  59.,  59.,  59.,
        59.,  60.,  60.,  60.,  60.,  60.,  60.,  61.,  61.,  61.,  61.,
        61.,  61.,  62.,  62.,  62.,  62.,  62.,  62.,  63.,  63.,  63.,
        63.,  63.,  63.,  64.,  64.,  64.,  64.,  64.,  64.,  65.,  65.,
        65.,  65.,  65.,  65.,  66.,  66.,  66.,  66.,  66.,  66.,  67.,
        67.,  67.,  67.,  67.,  67.,  68.,  68.,  68.,  68.,  68.,  68.,
        69.,  69.,  69.,  69.,  69.,  69.,  70.,  70.,  70.,  70.,  70.,
        70.,  71.,  71.,  71.,  71.,  71.,  71.,  72.,  72.,  72.,  72.,
        72.,  72.,  73.,  73.,  73.,  73.,  73.,  73.,  74.,  74.,  74.,
        74.,  74.,  74.,  75.,  75.,  75.,  75.,  75.,  75.,  76.,  76.,
        76.,  76.,  76.,  76.,  77.,  77.,  77.,  77.,  77.,  77.,  78.,
        78.,  78.,  78.,  78.,  78.,  79.,  79.,  79.,  79.,  79.,  79.,
        80.,  80.,  80.,  80.,  80.,  80.,  81.,  81.,  81.,  81.,  81.,
        81.,  82.,  82.,  82.,  82.,  82.,  82.,  83.,  83.,  83.,  83.,
        83.,  83.,  84.,  84.,  84.,  84.,  84.,  84.,  85.,  85.,  85.,
        85.,  85.,  85.,  86.,  86.,  86.,  86.,  86.,  86.,  87.,  87.,
        87.,  87.,  87.,  87.,  88.,  88.,  88.,  88.,  88.,  88.,  89.,
        89.,  89.,  89.,  89.,  89.,  90.,  90.,  90.,  90.,  90.,  90.,
        91.,  91.,  91.,  91.,  91.,  91.,  92.,  92.,  92.,  92.,  92.,
        92.,  93.,  93.,  93.,  93.,  93.,  93.,  94.,  94.,  94.,  94.,
        94.,  94.,  95.,  95.,  95.,  95.,  95.,  95.,  96.,  96.,  96.,
        96.,  96.,  96.,  97.,  97.,  97.,  97.,  97.,  97.,  98.,  98.,
        98.,  98.,  98.,  98.,  99.,  99.,  99.,  99.,  99.,  99., 100.,
       100., 100., 100., 100., 100., 101., 101., 101., 101., 101., 101.,
       102., 102., 102., 102., 102., 102., 103., 103., 103., 103., 103.,
       103., 104., 104., 104., 104., 104., 104., 105., 105., 105., 105.,
       105., 105., 106., 106., 106., 106., 106., 106., 107., 107., 107.,
       107., 107., 107., 108., 108., 108., 108., 108., 108., 109., 109.,
       109., 109., 109., 109., 110., 110., 110., 110., 110., 110., 111.,
       111., 111., 111., 111., 111., 112., 112., 112., 112., 112., 112.,
       113., 113., 113., 113., 113., 113., 114., 114., 114., 114., 114.,
       114., 115., 115., 115., 115., 115., 115., 116., 116., 116., 116.,
       116., 116., 117., 117., 117., 117., 117., 117., 118., 118., 118.,
       118., 118., 118., 119., 119., 119., 119., 119., 119., 120., 120.,
       120., 120., 120., 120., 121., 121., 121., 121., 121., 121., 122.,
       122., 122., 122., 122., 122., 123., 123., 123., 123., 123., 123.,
       124., 124., 124., 124., 124., 124., 125., 125., 125., 125., 125.,
       125., 126., 126., 126., 126., 126., 126., 127., 127., 127., 127.,
       127., 127., 128., 128., 128., 128., 128., 128., 129., 129., 129.,
       129., 129., 129., 130., 130., 130., 130., 130., 130., 131., 131.,
       131., 131., 131., 131., 132., 132., 132., 132., 132., 132., 133.,
       133., 133., 133., 133., 133., 134., 134., 134., 134., 134., 134.,
       135., 135., 135., 135., 135., 135., 136., 136., 136., 136., 136.,
       136., 137., 137., 137., 137., 137., 137., 138., 138., 138., 138.,
       138., 138., 139., 139., 139., 139., 139., 139., 140., 140., 140.,
       140., 140., 140.])

In [7]:
# 1. Crear objeto panel_data
# Asegurarse de que el DataFrame esté indexado correctamente
df_indexed = df.set_index(['id', 'year'])

# Crear el objeto panel_data
pdata = panel_data(df_indexed)

# 2. Definir variables
dep_indep = [regular_variable('n', 0),  # dependiente
             regular_variable('n', 1),  # L.n
             regular_variable('n', 2),  # L2.n
             regular_variable('w', 0),
             regular_variable('w', 1),
             regular_variable('k', 0),
             regular_variable('k', 1),
             regular_variable('k', 2),
             regular_variable('ys', 0),
             regular_variable('ys', 1),
             regular_variable('ys', 2)]

# Dummies de año
yr_dummies = [regular_variable(col, 0) for col in ['yr1977', 'yr1978', 'yr1979', 'yr1980', 'yr1981', 'yr1982']]
dep_indep.extend(yr_dummies)

# GMM instruments: solo L.n
Dgmm = [regular_variable('n', 1, min_lag=2, max_lag=5)]  # L(1/5).n

# IVs estándar
iv = [regular_variable('n', 2),  # L2.n
      regular_variable('w', 0), regular_variable('w', 1),
      regular_variable('k', 0), regular_variable('k', 1), regular_variable('k', 2),
      regular_variable('ys', 0), regular_variable('ys', 1), regular_variable('ys', 2)] + yr_dummies

variables = {'dep_indep': dep_indep, 'Dgmm': Dgmm, 'Lgmm': [], 'iv': iv}

TypeError: panel_data.__init__() missing 3 required positional arguments: 'identifiers', 'variables', and 'options'

In [4]:
from pydynpd import DynPanelGMM

# Reordenar y seleccionar variables relevantes
vars_base = ['n', 'w', 'k', 'ys']
yr_dummies = sorted([col for col in df.columns if col.startswith('yr')])
all_vars = vars_base + yr_dummies

# Estimar modelo: Arellano-Bond Difference GMM (one-step, robust)
model = DynPanelGMM(
    data=df,
    depvar='n',
    exog_vars=['w', 'k', 'ys'] + yr_dummies,
    endog_vars=['n'],
    panel_id='id',
    time_var='year',
    lags=1,           # GMM instruments: L1 to L5 internally
    max_instr_lag=5,  # L(1/5).L.n como en xtabond2
    first_diff=True,
    robust=True,      # errores robustos tipo White
    twostep=False,    # One-step
    drop_missing=True # elimina automáticamente NA
)

result = model.fit()
print(result.summary())

ImportError: cannot import name 'DynPanelGMM' from 'pydynpd' (c:\Users\HP\AppData\Local\Programs\Python\Python312\Lib\site-packages\pydynpd\__init__.py)