In [None]:
import pandas as pd 
import numpy as np 
import statsmodels.api as sm 
import scipy.linalg

In [2]:
beta_0 = -3
beta_1 = 0.8
omega = np.array([[4,0,0,0],
                  [0,9,0,0],
                  [0,0,16,0],
                  [0,0,0,25]])

In [3]:
def get_data(N,T):
    data = {'i': [], 't': [], 'y': [], 'cte':[] , 'x': [], 'u': []}
    for person in range(1,N+1): 
        for time in range(1,T+1): 
            data['i'].append(person)
            data['t'].append(time)
            data['cte'].append(1)
            x = np.random.uniform(1,30,1).astype(float)
            u = np.random.normal(0, omega.item(time-1,time-1))
            data['x'].append(x)
            data['u'].append(u)
            data['y'].append(beta_0 + beta_1*x + u)
    return pd.DataFrame(data = data)

In [4]:
def get_omega_est(N, T, df):
    y = df['y'].astype(float)
    x = df[['cte', 'x']].astype(float)
    omega_est = np.zeros((T,T))
    reg_ols = sm.OLS(y,x).fit()
    u_hat = {'i': df['i'], 't': df['t'], 'u_jt': reg_ols.resid.tolist()}
    u_hat = pd.DataFrame(data = u_hat)
    for l in range(0, N*T,T): 
        u_j = np.array([[u_hat['u_jt'][l+i] for i in range(0,4)]]).T
        omega_est = omega_est + u_j.dot(u_j.T)
    aux = omega_est.reshape(1,T,T).repeat((N), axis = 0)
    long_omega_est = scipy.linalg.block_diag(*aux)
    return pd.DataFrame(long_omega_est)

    

In [5]:
def get_fgls(N, T): 
    data = get_data(N, T)
    omega_est = get_omega_est(N, T, data)
    y = data['y'].astype(float)
    x = data[['cte', 'x']].astype(float)
    return sm.GLS(y, x, omega_est).fit()

In [6]:
def get_simulation_report(muestras, N, T):
    np.random.seed(1313) 
    count_power_b0 = 0
    count_power_b1 = 0
    count_size_1= 0
    count_size_5 = 0
    coef_list_0 = []
    coef_list_1 = []
    se_list_b0 = []
    se_list_b1 = []
    for muestra in range(0, muestras):
        fgls = get_fgls(N, T)
        coef_list_0.append(fgls.params[0])
        coef_list_1.append(fgls.params[1])
        se_list_b0.append(fgls.bse[0])
        se_list_b1.append(fgls.bse[1])
        test = fgls.t_test('x = 0.8, cte = 0, x = 0.4')
        if test.pvalue[0] <= 0.01: 
            count_size_1 = count_size_1 + 1
        if test.pvalue[0] <= 0.05: 
            count_size_5 = count_size_5 + 1 
        if test.pvalue[1] <= 0.01: 
            count_power_b0 = count_power_b0 + 1 
        if test.pvalue[2] <= 0.01: 
            count_power_b1 = count_power_b1 + +1
    report = {'Tamaño al 1%': count_size_1/muestras, 
              'Tamaño al 5%': count_size_5/muestras, 
              'Poder de b_0=0 al 1%': count_power_b0/muestras, 
              'Poder de b_1=0.4 al 1%': count_power_b1/muestras,
              'Media de b_0': np.mean(coef_list_0 ),
              'Media de b_1': np.mean(coef_list_1), 
              'Mediana de b_0': np.median(coef_list_0),
              'Mediana de B_1': np.median(coef_list_1),
              'SE de b_0': np.mean(se_list_b0), 
              'SE de b_1': np.mean(se_list_b1)}
    return report
        

In [7]:
get_simulation_report(muestras = 5000, N = 5, T = 4)

{'Tamaño al 1%': 0.3428,
 'Tamaño al 5%': 0.473,
 'Poder de b_0=0 al 1%': 0.3708,
 'Poder de b_1=0.4 al 1%': 0.5676,
 'Media de b_0': -3.005808992728244,
 'Media de b_1': 0.8019037257101465,
 'Mediana de b_0': -3.096550594533539,
 'Mediana de B_1': 0.8070211303833639,
 'SE de b_0': 2.4007660533876587,
 'SE de b_1': 0.13297735760030902}

In [8]:
get_simulation_report(muestras = 5000, N = 10, T = 4)

{'Tamaño al 1%': 0.105,
 'Tamaño al 5%': 0.2154,
 'Poder de b_0=0 al 1%': 0.2276,
 'Poder de b_1=0.4 al 1%': 0.6368,
 'Media de b_0': -3.0653208416639584,
 'Media de b_1': 0.8043436028014082,
 'Mediana de b_0': -3.1232909550411487,
 'Mediana de B_1': 0.8071752317221335,
 'SE de b_0': 2.224118229828138,
 'SE de b_1': 0.12519139540979046}

In [9]:
get_simulation_report(muestras = 5000, N = 30, T = 4)

{'Tamaño al 1%': 0.0258,
 'Tamaño al 5%': 0.0908,
 'Poder de b_0=0 al 1%': 0.3806,
 'Poder de b_1=0.4 al 1%': 0.9846,
 'Media de b_0': -3.0404172131718687,
 'Media de b_1': 0.8019631355248176,
 'Mediana de b_0': -3.044403930719625,
 'Mediana de B_1': 0.8009299320802632,
 'SE de b_0': 1.355656666149554,
 'SE de b_1': 0.07679403622247184}

In [10]:
get_simulation_report(muestras = 5000, N = 100, T = 4)

{'Tamaño al 1%': 0.013,
 'Tamaño al 5%': 0.0608,
 'Poder de b_0=0 al 1%': 0.9178,
 'Poder de b_1=0.4 al 1%': 1.0,
 'Media de b_0': -3.0270782895881743,
 'Media de b_1': 0.8011134132211638,
 'Mediana de b_0': -3.0202700334175825,
 'Mediana de B_1': 0.8013374882246551,
 'SE de b_0': 0.7436734140549827,
 'SE de b_1': 0.04216884018541226}

In [11]:
get_simulation_report(muestras = 5000, N = 200, T = 4)

{'Tamaño al 1%': 0.0106,
 'Tamaño al 5%': 0.0522,
 'Poder de b_0=0 al 1%': 0.9992,
 'Poder de b_1=0.4 al 1%': 1.0,
 'Media de b_0': -3.0121833818669512,
 'Media de b_1': 0.8003741800628054,
 'Mediana de b_0': -3.0093871530707856,
 'Mediana de B_1': 0.8001950138388311,
 'SE de b_0': 0.5257704446880094,
 'SE de b_1': 0.029826639492396993}

In [None]:
get_simulation_report(muestras = 5000, N = 500, T = 4)

In [8]:
# data = get_data(5,4)
# y = data['y'].astype(float)
# x = data[['cte', 'x']].astype(float)
# reg_ols = sm.OLS(y,x).fit()
# u_hat = {'i': data['i'], 't': data['t'], 'u_jt': reg_ols.resid.tolist()}
# u_hat = pd.DataFrame(data = u_hat)
# omg_est = np.zeros((4,4))
# for l in range(0, 20,4): 
#     u_j = np.array([[u_hat['u_jt'][l+i] for i in range(0,4)]]).T
#     omg_est = omg_est + u_j.dot(u_j.T)
# omg_inv = np.linalg.inv(omg_est)
# x = x.to_numpy()
# y = y.to_numpy()
# x_j = np.asanyarray([[x[i], x[i+1], x[i+2], x[i+3]] for i in range(0,20,4)])
# y_j = np.asanyarray([[y[i], y[i+1], y[i+2], y[i+3]] for i in range(0,20,4)])
# var_est = np.zeros((2,2), float) #KXK cambiar nombre esta en realidad es la inversa
# xT_omg_y = np.zeros((2,1), float) #kx1
# for i in range(0,5): 
#     temp = x_j[i].T.dot(omg_inv).dot(x_j[i])
#     var_est = var_est + temp
#     temp2 = x_j[i].T.dot(omg_inv).dot(y_j[i])
#     xT_omg_y = xT_omg_y + temp2
# xT_omg_y = xT_omg_y[1] #Me quedo solo con el primer vector porque numpy hace broadcasting 
# beta_fgls = np.linalg.inv(var_est).dot(xT_omg_y)
# beta_fgls #No son muy diferentes a los ols, me gusta 