# Classes

En esta sección veremos una aplicación del tema en la estimación de una función de costos translogaritmica 

$$
\begin{aligned} 
lnCT &= \beta_{0}+\beta_{q}lnq+ \beta_{qq}(lnq)^2+\beta_{q1}lnqlnp_1+\beta_{q2}lnqlnp_2+ \beta_{q3}lnqlnp_{3} +\beta_{1}lnp_1+\beta_{2}lnp_2+ \beta_{3}lnp_3 \\
& + \beta_{11}(lnp_{1})^2+ \beta_{22}(lnp_{2})^2+ \beta_{33}(lnp_{3})^2 + \beta_{12}lnp_{1}lnp_{2}+ \beta_{13}lnp_{1}lnp_{3}+\beta_{23}lnp_{2}lnp_{3} 
\end{aligned}
$$


- q: total output
- p1 (PL): wage rate
- p2 (PF) : capital price index
- p3 (PK): fuel price


In [1]:
import pandas as pd
import numpy as np
import scipy.stats as stats
from scipy.stats import t # t - student 
import os 

In [2]:
user = os.getlogin()   # Username
os.chdir(f"C:/Users/{user}/Documents/GitHub/1ECO35_2022_2/Lab4")


In [3]:
greene = pd.read_csv(r"../data/christensen_greene_f4.csv")

In [4]:
greene

Unnamed: 0,id,YEAR,COST,Q,PL,SL,PK,SK,PF,SF
0,1,1970,0.2130,8.0,6869.47,0.3291,64.945,0.4197,18.000,0.2512
1,4,1970,3.0427,869.0,8372.96,0.1030,68.227,0.2913,21.067,0.6057
2,5,1970,9.4059,1412.0,7960.90,0.0891,40.692,0.1567,41.530,0.7542
3,14,1970,0.7606,65.0,8971.89,0.2802,41.243,0.1282,28.539,0.5916
4,15,1970,2.2587,295.0,8218.40,0.1772,71.940,0.1623,39.200,0.6606
...,...,...,...,...,...,...,...,...,...,...
153,214,1970,6.8293,946.6,10642.16,0.0883,43.600,0.1914,51.463,0.7203
154,215,1970,3.7605,377.0,7432.24,0.2117,74.120,0.2274,33.436,0.5609
155,216,1970,3.9822,391.0,5826.04,0.1926,78.288,0.0924,44.633,0.7151
156,217,1970,30.1880,5317.0,9586.63,0.0845,78.008,0.2009,41.840,0.7147


In [5]:

ct = greene.COST.values # se convierte a un array 
q = greene.Q.values
p1 = greene.PL.values
p2 = greene.PF.values
p3 = greene.PK.values

In [6]:
ct = greene.COST.values 
ct

array([2.130000e-01, 3.042700e+00, 9.405900e+00, 7.606000e-01,
       2.258700e+00, 1.342200e+00, 6.159000e-01, 4.887000e-01,
       1.147400e+00, 7.549200e+00, 2.053200e+00, 6.363000e-01,
       3.150400e+00, 1.031360e+01, 5.848800e+00, 4.505000e+00,
       5.597100e+00, 3.725500e+00, 6.006500e+00, 1.278300e+01,
       6.621400e+00, 9.642900e+00, 8.685200e+00, 8.637200e+00,
       1.124190e+01, 6.622100e+00, 1.096650e+01, 1.362700e+01,
       7.439500e+00, 9.784300e+00, 2.086710e+01, 1.970920e+01,
       1.404310e+01, 1.019020e+01, 1.617640e+01, 4.225140e+01,
       2.256120e+01, 3.301750e+01, 1.889630e+01, 1.326790e+01,
       2.154540e+01, 3.553030e+01, 2.980110e+01, 3.087730e+01,
       2.435650e+01, 1.748020e+01, 1.990080e+01, 2.878610e+01,
       2.708320e+01, 2.244210e+01, 3.020670e+01, 3.016780e+01,
       3.258400e+01, 4.738640e+01, 2.429030e+01, 3.129220e+01,
       3.198840e+01, 4.190160e+01, 5.811540e+01, 4.052810e+01,
       3.706660e+01, 2.516860e+01, 4.518270e+01, 5.5176

In [7]:
# Generation of data

y = np.log(  ct  ) # lnCT

c = np.ones( len( y ) ) # Constant
xq = np.log( q ) # lnq
xqq = np.log( q )*np.log( q ) # lnq^2
xq1 = np.log( q )*np.log( p1 ) # lnq*lnp_1
xq2 = np.log( q )*np.log( p2 ) # lnq*lnp_2
xq3 = np.log( q )*np.log( p3 ) # lnq*ln_3
x1 = np.log( p1 ) # lnp_1
x2 = np.log( p2 ) # ln_p2
x3 = np.log( p3 ) # ln_p3
x11 = np.log( p1 )*np.log( p1 ) # 0.5ln2p_1
x22 = np.log( p2 )*np.log( p2 ) # 0.5ln2p_2
x33 = np.log( p3 )*np.log( p3 ) # 0.5ln2p_3
x12 = np.log( p1 )*np.log( p2 ) # lnp_1*lnp_2
x13 = np.log( p1 )*np.log( p3 ) # lnp_1*lnp_3
x23 = np.log( p2 )*np.log( p3 ) # lnp_2*lnp_3

columns = [  "ln_cost", "lnq", "(lnq)^2", "(lnq)(lnp1)", "(lnq)(lnp2)",
          "(lnq)(lnp3)", "lnp1", "lnp2", "lnp3", "(lnp1)^2",
          "(lnp2)^2", "(lnp3)^2", "(lnp1)(lnp2)", "(lnp1)(lnp3)", "(lnp2)(lnp3)" ]

data_val = np.asarray( [ y, xq,  xqq, xq1, xq2,
                xq3, x1,  x2,  x3,  x11, 
                x22, x33, x12, x13, x23 ] ).T # se coloca transpuesta para que cada varaible sea una columna 

# np.asarray permite crear una matriz a partir de vectores 

data = pd.DataFrame(  data_val , columns = columns  )

In [8]:
data

Unnamed: 0,ln_cost,lnq,(lnq)^2,(lnq)(lnp1),(lnq)(lnp2),(lnq)(lnp3),lnp1,lnp2,lnp3,(lnp1)^2,(lnp2)^2,(lnp3)^2,(lnp1)(lnp2),(lnp1)(lnp3),(lnp2)(lnp3)
0,-1.546463,2.079442,4.324077,18.371538,6.010359,8.678634,8.834842,2.890372,4.173541,78.054437,8.354249,17.418442,25.535978,36.872574,12.063084
1,1.112745,6.767343,45.796933,61.127805,20.624885,28.577410,9.032763,3.047708,4.222840,81.590803,9.288523,17.832381,27.529222,38.143915,12.869984
2,2.241337,7.252762,52.602563,65.146469,27.026810,26.878966,8.982297,3.726416,3.706032,80.681665,13.886177,13.734670,33.471777,33.288677,13.810215
3,-0.273648,4.174387,17.425509,37.994654,13.989505,15.526556,9.101852,3.351272,3.719481,82.843703,11.231021,13.834542,30.502777,33.854168,12.464992
4,0.814789,5.686975,32.341689,51.263140,20.863674,24.316554,9.014131,3.668677,4.275832,81.254554,13.459189,18.282743,33.069932,38.542913,15.686647
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,1.921222,6.852877,46.961918,63.543838,27.006249,25.870001,9.272579,3.940863,3.775057,85.980717,15.530402,14.251056,36.541963,35.004515,14.876983
154,1.324552,5.932245,35.191533,52.877557,20.820004,25.542382,8.913583,3.509633,4.305685,79.451954,12.317525,18.538927,31.283405,38.379082,15.111376
155,1.381834,5.968708,35.625470,51.749248,22.671977,26.025919,8.670093,3.798473,4.360394,75.170509,14.428401,19.013039,32.933118,37.805024,16.562842
156,3.407444,8.578665,73.593485,78.650266,32.031471,37.375623,9.168125,3.733853,4.356811,84.054510,13.941657,18.981805,34.232428,39.943790,16.267692


### Getting filtered data to regressions

In [9]:
# selecting columns
X = data.iloc[ : , 1: ]
y = data.ln_cost

- Generate a new class named `RegClass`. The initial attributes of your class must be `X`, `y`, and `intercept`. `X` (covariables) must be a **pd.DataFrame** and `y` (endog) must be a **pd.Series** and `intercept` can be `True` or `False`, by default `True`. Also, you must specify the type of your class' parameters, and output and It must raise an error if the inputs do not meet the requirements.  This class should have two methods: `reg_OLS` (You can add more if you think it is necessary). These methods should return the same output as the previously defined functions with the same name. The method `reg_OLS` does not need an extra parameter to be executed. After executing the method `reg_OLS`, any user should access the following attributes: `beta_OLS` and `var_OLS`. They should look like `beta_OLS_output` and `var_OLS_output`.

$$
\widehat{\boldsymbol{\varepsilon}} = \mathbf{Y} - \mathbf{X} \widehat{\boldsymbol{\beta}}_{OLS}
$$


$$
\widehat{\sigma}^2_{OLS} = \dfrac{\widehat{\boldsymbol{\varepsilon}}^\top \widehat{\boldsymbol{\varepsilon}}}{N - (k)}
$$


$$
\begin{aligned}
\mathbb{V}{\rm ar} (\widehat{\boldsymbol{\beta}}^{(OLS)}) = 
\sigma^2 \left( \mathbf{X}^\top  \mathbf{X}\right)^{-1}
\end{aligned}
$$

$$
\begin{aligned} 
lnCT &= \beta_{0}+\beta_{q}lnq+ \beta_{qq}(lnq)^2+\beta_{q1}lnqlnp_1+\beta_{q2}lnqlnp_2+ \beta_{q3}lnqlnp_{3} +\beta_{1}lnp_1+\beta_{2}lnp_2+ \beta_{3}lnp_3 \\
& + \beta_{11}(lnp_{1})^2+ \beta_{22}(lnp_{2})^2+ \beta_{33}(lnp_{3})^2 + \beta_{12}lnp_{1}lnp_{2}+ \beta_{13}lnp_{1}lnp_{3}+\beta_{23}lnp_{2}lnp_{3} 
\end{aligned}
$$


In [10]:
class RegClass( object ):
    
    def __init__( self, X : pd.DataFrame , y : pd.Series , intercept = True  ):
    
        if not isinstance( X, pd.DataFrame ):
            raise TypeError( "X must be a pd.DataFrame." )

        if not isinstance( y , pd.Series ):
            raise TypeError( "y must be a pd.Series." )
            
        # asignando atributos de la clase
        
        
        self.X = X
        self.y = y
        self.intercept = intercept
    
        if self.intercept:

            self.X[ 'Intercept' ] = 1
            # colcondo la columna Intercept en la primera columna 
            cols = self.X.columns.tolist() # nombre de varaible a lista 
            new_cols_orders = [cols[ -1 ]] + cols[ 0:-1 ] # juntano listas
            
#             new_cols_orders = [cols[ -1 ]].extend(cols[ 0:-1 ]) # append lista a una lista 
            
            # [cols[ -1 ]] la jala la ultima fila , cols[ 0:-1 ]  primera fila hasta la penultima fila 
            
            self.X = self.X.loc[ : , new_cols_orders ] # usamos .loc que filtra por nombre de filas o columnas 

        else:
            pass
            # No se ejecuta nada, la base de datos no se altera
    
        # creando nuevos atributos 
        
        self.X_np = self.X.values  # Dataframe a multi array
        self.y_np = y.values.reshape( -1 , 1 ) # de objeto serie a array columna 
        self.columns = self.X.columns.tolist() # nombre de la base de datos como objeto lista
    
    
    # reshape( - 1, 1 ), primer -1 hace que sea un array fila, el otro 1 define la cantidad de columnas. En esta caso solo 1
    # reshape( 10, 2 ) se puede usar reshape( -1, 2 )
    
    
    def reg_beta_OLS( self ):
        # X, y en Matrix, y vector columna respectivamente 
        
        X_np = self.X_np
        y_np = self.y_np

        # beta_ols
        beta_ols = np.linalg.inv( X_np.T @ X_np ) @ ( X_np.T @ y_np )

        # columnas de X
        index_names = self.columns
        # Output
        beta_OLS_output = pd.DataFrame( beta_ols , index = index_names , columns = [ 'Coef.' ] )
        
        # Dataframe de coeffientes como atributo 
        
        self.beta_OLS = beta_OLS_output
        
        return beta_OLS_output
    
    def reg_var_OLS( self ):
    
        # Se corre la función reg_beta_OLS que estima el vector de coeficientes
        
        self.reg_beta_OLS()
        
        X_np = self.X_np
        y_np = self.y_np
        
        
        # beta_ols
        beta_OLS = self.beta_OLS.values.reshape( - 1, 1 ) # Dataframe a vector columna 

        # errors
        e = y_np - ( X_np @ beta_OLS )

        # error variance
        N = X.shape[ 0 ]
        total_parameters = X.shape[ 1 ]
        error_var = ( (e.T @ e)[ 0 ] )/( N - total_parameters )

        # Varianza
        var_OLS =  error_var * np.linalg.inv( X_np.T @ X_np )

        # columns names 
        index_names = self.columns
        # output
        var_OLS_output = pd.DataFrame( var_OLS , index = index_names , columns = index_names )
        ## variance output como nuevo atributo del objeto
        self.var_OLS = var_OLS_output

        
    def reg_OLS( self ):
        
        # Se corren las funciones
        self.reg_beta_OLS()
        self.reg_var_OLS()
        X = self.X_np
        
        # var y beta
        beta_OLS = self.beta_OLS.values.reshape( -1, 1 )
        var_OLS = self.var_OLS.values
        
        # standard errors
        beta_se = np.sqrt( np.diag( var_OLS ) )

        # Se calcula el test statistic para cada coeficiente
        t_stat = beta_OLS.ravel() / beta_se.ravel()
        
        
        # .ravel() te multiarray a simple array
        
        # p-value:
        N = X.shape[ 0 ]
        k = beta_OLS.size
        self.nk = N-k
        pvalue = (1 - t.cdf(t_stat, df= N - k) ) * 2

        # Confidence interval
        
        up_bd = beta_OLS.ravel() + 1.96*beta_se
        lw_bd = beta_OLS.ravel() - 1.96*beta_se

        table_data ={  'Coef.'    : beta_OLS.ravel() ,  # .ravel() :: .flatten()
                       "Std.Err." : beta_se.ravel(),
                       "t"        : t_stat.ravel(),
                       "P>|t|"    : pvalue.ravel(), 
                       "[0.025"   : lw_bd.ravel(),
                       "0.975]"   : up_bd.ravel()
                    }
        
        # defining index names
        index_names = self.columns
        
        # defining a pandas dataframe 
        reg_OLS = pd.DataFrame( table_data , index = index_names )

        return reg_OLS
    


In [335]:
A = RegClass( X, y )

# Atributo de las variables 

A.X


Unnamed: 0,Intercept,lnq,(lnq)^2,(lnq)(lnp1),(lnq)(lnp2),(lnq)(lnp3),lnp1,lnp2,lnp3,(lnp1)^2,(lnp2)^2,(lnp3)^2,(lnp1)(lnp2),(lnp1)(lnp3),(lnp2)(lnp3)
0,1,2.079442,4.324077,18.371538,6.010359,8.678634,8.834842,2.890372,4.173541,78.054437,8.354249,17.418442,25.535978,36.872574,12.063084
1,1,6.767343,45.796933,61.127805,20.624885,28.577410,9.032763,3.047708,4.222840,81.590803,9.288523,17.832381,27.529222,38.143915,12.869984
2,1,7.252762,52.602563,65.146469,27.026810,26.878966,8.982297,3.726416,3.706032,80.681665,13.886177,13.734670,33.471777,33.288677,13.810215
3,1,4.174387,17.425509,37.994654,13.989505,15.526556,9.101852,3.351272,3.719481,82.843703,11.231021,13.834542,30.502777,33.854168,12.464992
4,1,5.686975,32.341689,51.263140,20.863674,24.316554,9.014131,3.668677,4.275832,81.254554,13.459189,18.282743,33.069932,38.542913,15.686647
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,1,6.852877,46.961918,63.543838,27.006249,25.870001,9.272579,3.940863,3.775057,85.980717,15.530402,14.251056,36.541963,35.004515,14.876983
154,1,5.932245,35.191533,52.877557,20.820004,25.542382,8.913583,3.509633,4.305685,79.451954,12.317525,18.538927,31.283405,38.379082,15.111376
155,1,5.968708,35.625470,51.749248,22.671977,26.025919,8.670093,3.798473,4.360394,75.170509,14.428401,19.013039,32.933118,37.805024,16.562842
156,1,8.578665,73.593485,78.650266,32.031471,37.375623,9.168125,3.733853,4.356811,84.054510,13.941657,18.981805,34.232428,39.943790,16.267692


In [336]:
A.y

0     -1.546463
1      1.112745
2      2.241337
3     -0.273648
4      0.814789
         ...   
153    1.921222
154    1.324552
155    1.381834
156    3.407444
157    4.217391
Name: ln_cost, Length: 158, dtype: float64

In [337]:
A.reg_beta_OLS()  # Resultado del método reg OLS

Unnamed: 0,Coef.
Intercept,-76.259258
lnq,-1.080425
(lnq)^2,0.026489
(lnq)(lnp1),0.131041
(lnq)(lnp2),0.058652
(lnq)(lnp3),0.040144
lnp1,14.718292
lnp2,-0.894733
lnp3,6.380797
(lnp1)^2,-0.769264


In [339]:
A.beta_OLS # El mismo resultado pero como atributo 

Unnamed: 0,Coef.
Intercept,-76.259258
lnq,-1.080425
(lnq)^2,0.026489
(lnq)(lnp1),0.131041
(lnq)(lnp2),0.058652
(lnq)(lnp3),0.040144
lnp1,14.718292
lnp2,-0.894733
lnp3,6.380797
(lnp1)^2,-0.769264


In [342]:
A.var_OLS

Unnamed: 0,Intercept,lnq,(lnq)^2,(lnq)(lnp1),(lnq)(lnp2),(lnq)(lnp3),lnp1,lnp2,lnp3,(lnp1)^2,(lnp2)^2,(lnp3)^2,(lnp1)(lnp2),(lnp1)(lnp3),(lnp2)(lnp3)
Intercept,1465.361105,0.192537,-0.011698,-0.026677,0.015949,0.039538,-262.932564,-8.392679,-128.192045,11.880856,-0.266944,3.540639,0.990253,10.90244,0.197435
lnq,0.192537,0.141034,0.000308,-0.013459,0.000161,-0.006089,-0.186223,0.14832,-0.090714,0.015321,0.000381,-0.000898,-0.015046,0.017546,-0.00357
(lnq)^2,-0.011698,0.000308,6e-06,-3.6e-05,-5e-06,-1.1e-05,0.001491,0.001052,0.000862,-4.3e-05,1.3e-05,-6e-05,-9.9e-05,-1.4e-05,-4.8e-05
(lnq)(lnp1),-0.026677,-0.013459,-3.6e-05,0.001466,-0.000168,0.000348,0.021561,-0.015127,0.00543,-0.001961,1.9e-05,0.000285,0.00185,-0.001141,-8.5e-05
(lnq)(lnp2),0.015949,0.000161,-5e-06,-0.000168,0.000653,-0.000189,-0.008293,0.003306,0.008035,0.000587,-0.00017,-0.000103,-0.00017,-0.000144,-0.001375
(lnq)(lnp3),0.039538,-0.006089,-1.1e-05,0.000348,-0.000189,0.000888,-0.000337,-0.008853,0.000268,0.000211,-3.3e-05,-6.7e-05,0.000102,-0.001537,0.002212
lnp1,-262.932564,-0.186223,0.001491,0.021561,-0.008293,-0.000337,50.194721,-1.253531,19.099542,-2.388986,0.070493,-0.420154,0.038416,-1.788158,0.132157
lnp2,-8.392679,0.14832,0.001052,-0.015127,0.003306,-0.008853,-1.253531,8.078742,-0.214871,0.161643,-0.058647,-0.033147,-0.68505,0.199482,-0.367388
lnp3,-128.192045,-0.090714,0.000862,0.00543,0.008035,0.000268,19.099542,-0.214871,20.513714,-0.705214,0.023665,-0.76305,0.036229,-1.555553,-0.072591
(lnp1)^2,11.880856,0.015321,-4.3e-05,-0.001961,0.000587,0.000211,-2.388986,0.161643,-0.705214,0.119855,-0.003079,0.011353,-0.013413,0.070381,-0.006656


In [341]:
A.reg_var_OLS()

In [66]:
A.reg_OLS()

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
Intercept,-76.259258,38.280035,-1.992142,1.95174,-151.288127,-1.230389
lnq,-1.080425,0.375545,-2.876952,1.995369,-1.816494,-0.344357
(lnq)^2,0.026489,0.002357,11.24006,0.0,0.02187,0.031108
(lnq)(lnp1),0.131041,0.038284,3.422854,0.000808,0.056004,0.206077
(lnq)(lnp2),0.058652,0.025549,2.29565,0.02315,0.008576,0.108729
(lnq)(lnp3),0.040144,0.029791,1.347518,0.179945,-0.018247,0.098535
lnp1,14.718292,7.084823,2.07744,0.039549,0.832038,28.604545
lnp2,-0.894733,2.842313,-0.31479,1.24662,-6.465666,4.6762
lnp3,6.380797,4.529207,1.408811,0.161062,-2.496449,15.258042
(lnp1)^2,-0.769264,0.346201,-2.222011,1.972147,-1.447819,-0.090709


In [11]:
# Regresión sin intercepto
X = data.iloc[ : , 1: ]
y = data.ln_cost


Reg_nitc = RegClass( X, y , intercept = False)
Reg_nitc.X



Unnamed: 0,lnq,(lnq)^2,(lnq)(lnp1),(lnq)(lnp2),(lnq)(lnp3),lnp1,lnp2,lnp3,(lnp1)^2,(lnp2)^2,(lnp3)^2,(lnp1)(lnp2),(lnp1)(lnp3),(lnp2)(lnp3)
0,2.079442,4.324077,18.371538,6.010359,8.678634,8.834842,2.890372,4.173541,78.054437,8.354249,17.418442,25.535978,36.872574,12.063084
1,6.767343,45.796933,61.127805,20.624885,28.577410,9.032763,3.047708,4.222840,81.590803,9.288523,17.832381,27.529222,38.143915,12.869984
2,7.252762,52.602563,65.146469,27.026810,26.878966,8.982297,3.726416,3.706032,80.681665,13.886177,13.734670,33.471777,33.288677,13.810215
3,4.174387,17.425509,37.994654,13.989505,15.526556,9.101852,3.351272,3.719481,82.843703,11.231021,13.834542,30.502777,33.854168,12.464992
4,5.686975,32.341689,51.263140,20.863674,24.316554,9.014131,3.668677,4.275832,81.254554,13.459189,18.282743,33.069932,38.542913,15.686647
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,6.852877,46.961918,63.543838,27.006249,25.870001,9.272579,3.940863,3.775057,85.980717,15.530402,14.251056,36.541963,35.004515,14.876983
154,5.932245,35.191533,52.877557,20.820004,25.542382,8.913583,3.509633,4.305685,79.451954,12.317525,18.538927,31.283405,38.379082,15.111376
155,5.968708,35.625470,51.749248,22.671977,26.025919,8.670093,3.798473,4.360394,75.170509,14.428401,19.013039,32.933118,37.805024,16.562842
156,8.578665,73.593485,78.650266,32.031471,37.375623,9.168125,3.733853,4.356811,84.054510,13.941657,18.981805,34.232428,39.943790,16.267692


In [13]:
Reg_nitc.reg_OLS()

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
lnq,-1.070405,0.379362,-2.821591,1.994546,-1.813956,-0.326855
(lnq)^2,0.02588,0.002361,10.96282,0.0,0.021253,0.030508
(lnq)(lnp1),0.129652,0.03867,3.352769,0.001022,0.053859,0.205446
(lnq)(lnp2),0.059482,0.025808,2.304809,0.022607,0.008899,0.110066
(lnq)(lnp3),0.042202,0.030079,1.403049,0.162755,-0.016752,0.101156
lnp1,1.034946,1.754539,0.589868,0.556203,-2.403949,4.473842
lnp2,-1.331499,2.862906,-0.465086,1.357428,-6.942794,4.279797
lnp3,-0.290481,3.080744,-0.094289,1.074989,-6.32874,5.747778
(lnp1)^2,-0.150969,0.154961,-0.974238,1.668428,-0.454692,0.152755
(lnp2)^2,-0.050645,0.091683,-0.552394,1.418465,-0.230343,0.129053


In [16]:
Reg_nitc.nk 

100

In [15]:
# Añadiendo nuevos atributos 

Reg_nitc.nk = 100

In [17]:
# Añadiendo nuevas funciones a la clase RegClass

def new_funtion(x): 
    return x**2

Reg_nitc.nk1 = new_funtion

In [18]:
Reg_nitc.nk1(25)

625

In [19]:
from sklearn import linear_model

In [20]:
print(dir(linear_model))

# permite conocer los atributos y métodos de lasliberias en Python

['ARDRegression', 'BayesianRidge', 'ElasticNet', 'ElasticNetCV', 'GammaRegressor', 'Hinge', 'Huber', 'HuberRegressor', 'Lars', 'LarsCV', 'Lasso', 'LassoCV', 'LassoLars', 'LassoLarsCV', 'LassoLarsIC', 'LinearRegression', 'Log', 'LogisticRegression', 'LogisticRegressionCV', 'ModifiedHuber', 'MultiTaskElasticNet', 'MultiTaskElasticNetCV', 'MultiTaskLasso', 'MultiTaskLassoCV', 'OrthogonalMatchingPursuit', 'OrthogonalMatchingPursuitCV', 'PassiveAggressiveClassifier', 'PassiveAggressiveRegressor', 'Perceptron', 'PoissonRegressor', 'QuantileRegressor', 'RANSACRegressor', 'Ridge', 'RidgeCV', 'RidgeClassifier', 'RidgeClassifierCV', 'SGDClassifier', 'SGDOneClassSVM', 'SGDRegressor', 'SquaredLoss', 'TheilSenRegressor', 'TweedieRegressor', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '_base', '_bayes', '_cd_fast', '_coordinate_descent', '_glm', '_huber', '_least_angle', '_logistic', '_omp', '_passive_aggressive', '

## <a id='2.1'>2.1. Inheritance</a>
La Inheritance es la capacidad de una clase para derivar o heredar las propiedades de otra clase. Los beneficios de la herencia son:

- Representa bien las relaciones del mundo real.
- Proporciona reutilización de un código. No tenemos que escribir el mismo código una y otra vez. Además, nos permite agregar más características a una clase sin modificarla.
- Es de naturaleza transitiva, lo que significa que si la clase B hereda de otra clase A, entonces todas las subclases de B heredarán automáticamente de la clase A.
- Te ayudará a comprender cómo funciona Sklearn Class (libreria de modelos estadísticos).

El modelo base se llama clase Base o Super o Padre.

Diferentes formas de Inheritance:
1. Herencia única: cuando una clase secundaria hereda solo de una clase principal, se denomina herencia única. 
2. Herencia múltiple: cuando una clase secundaria hereda de varias clases principales, se denomina herencia múltiple.
A diferencia de Java y como C++, Python admite la herencia múltiple. Especificamos todas las clases principales como una lista separada por comas entre paréntesis.

### <a id='2.2.'> 2.2 Single Inheritance </a> 
In single inheritance, la clase derivada usa las características o miembros de la clase base única
![alt text](../_images/inh1.png)

In [289]:
class RegClass( object ):
    
    def __init__( self, X : pd.DataFrame , y : pd.Series ):
    
        if not isinstance( X, pd.DataFrame ):
            raise TypeError( "X must be a pd.DataFrame." )

        if not isinstance( y , pd.Series ):
            raise TypeError( "y must be a pd.Series." )
            
        # asignando atributos de la clase
        
        
        self.X = X
        self.y = y
    
        # creando nuevos atributos 
        
        self.X_np = self.X.values  # Dataframe a multi array
        self.y_np = y.values.reshape( -1 , 1 ) # de objeto serie a array columna 
        self.columns = self.X.columns.tolist() # nombre de la base de datos como objeto lista
    
    
    # reshape( - 1, 1 ), primer -1 hace que sea un array fila, el otro 1 define la cantidad de columnas. En esta caso solo 1
    # reshape( 10, 2 ) se puede usar reshape( -1, 2 )
    
    
    def reg_beta_OLS( self, M ):
        # X, y en Matrix, y vector columna respectivamente 
        
        M = self.X_np
        y_np = self.y_np

        # beta_ols
        beta_ols = np.linalg.inv( M.T @ M ) @ ( M.T @ y_np )
        
        return beta_ols

    

In [256]:
  
    
class reg_intercepto_OLS(RegClass):
    
        def __init__( self,  X : pd.DataFrame , y : pd.Series , intercept = True ):
    
            self.inter = intercept
    
    
            RegClass.__init__(self, X, y) 
            RegClass2.__init__(self,W)
            RegClass2.__init__(self, Z) 

            # Se corre la función reg_beta_OLS que estima el vector de coeficientes

            if self.inter:

                self.X[ 'Intercept' ] = 1
                cols = self.X.columns.tolist()
                new_cols_orders = [cols[ -1 ]] + cols[ 0:-1 ]
                self.X = self.X.loc[ : , new_cols_orders ] 

            else:
                pass

        
        def OLS_intercepto(self):
            
            X_int = self.X
            
            beta_int =  self.reg_beta_OLS(X_int)
            index_names = self.columns
            beta_OLS_int_output = pd.DataFrame( beta_int , index = index_names , columns = [ 'Coef. with intercept' ] )
        
            return beta_OLS_int_output


In [257]:
A = reg_intercepto_OLS( X, y , intercept = True)

In [260]:
A.OLS_intercepto()

Unnamed: 0,Coef. with intercept
lnq,-1.080426
(lnq)^2,0.026489
(lnq)(lnp1),0.131041
(lnq)(lnp2),0.058652
(lnq)(lnp3),0.040144
lnp1,14.718292
lnp2,-0.894733
lnp3,6.380797
(lnp1)^2,-0.769264
(lnp2)^2,-0.036753


In [259]:
A.inter

True

In [294]:
Out = reg_intercepto_Var_OLS(X,y, intercept = True)

In [296]:
Out.OLS_residual()

Unnamed: 0,0
0,799.356502
1,562.617623
2,658.942020
3,728.191255
4,842.027939
...,...
153,756.789791
154,779.780193
155,882.364753
156,689.737900


### <a id='2.3'> 2.3 Multiple Inheritance </a> 

Multiple inheritance es aquella en la que la clase derivada adquiere dos o más clases base. En la herencia múltiple, la clase derivada puede usar las características conjuntas de las clases base heredadas. _**Sklearn** usa herencia múltiple._

 ![alt text](../_images/inh2.png)

## <a id='2.4'>2.4 Private Variables and methods </a> 

Las variables privadas nos permiten especificar variables que no queremos que los usuarios o socios del proyecto cambien.


Las variables de instancia "privadas" son variables a las que no se puede acceder excepto desde dentro de un objeto. Sin embargo, no existe la existencia de variables “Privadas”. En python, la mayoría de los códigos y codificadores de Python siguen una convención, es decir, un nombre con el prefijo de un guión bajo, por ej. \__diploma debe tratarse como una parte no pública de la API o cualquier código de Python, ya sea una función, un método o un miembro de datos.

In [278]:
dir(np)

['ALLOW_THREADS',
 'AxisError',
 'BUFSIZE',
 'Bytes0',
 'CLIP',
 'DataSource',
 'Datetime64',
 'ERR_CALL',
 'ERR_DEFAULT',
 'ERR_IGNORE',
 'ERR_LOG',
 'ERR_PRINT',
 'ERR_RAISE',
 'ERR_WARN',
 'FLOATING_POINT_SUPPORT',
 'FPE_DIVIDEBYZERO',
 'FPE_INVALID',
 'FPE_OVERFLOW',
 'FPE_UNDERFLOW',
 'False_',
 'Inf',
 'Infinity',
 'MAXDIMS',
 'MAY_SHARE_BOUNDS',
 'MAY_SHARE_EXACT',
 'MachAr',
 'NAN',
 'NINF',
 'NZERO',
 'NaN',
 'PINF',
 'PZERO',
 'RAISE',
 'SHIFT_DIVIDEBYZERO',
 'SHIFT_INVALID',
 'SHIFT_OVERFLOW',
 'SHIFT_UNDERFLOW',
 'ScalarType',
 'Str0',
 'Tester',
 'TooHardError',
 'True_',
 'UFUNC_BUFSIZE_DEFAULT',
 'UFUNC_PYVALS_NAME',
 'Uint64',
 'WRAP',
 '_NoValue',
 '_UFUNC_API',
 '__NUMPY_SETUP__',
 '__all__',
 '__builtins__',
 '__cached__',
 '__config__',
 '__deprecated_attrs__',
 '__dir__',
 '__doc__',
 '__expired_functions__',
 '__file__',
 '__getattr__',
 '__git_version__',
 '__loader__',
 '__mkl_version__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '__version__',
 '_

In [280]:
class RegClass( object ):
    
    def __init__( self, X : pd.DataFrame , y : pd.Series , intercept = True  ):
    
        if not isinstance( X, pd.DataFrame ):
            raise TypeError( "X must be a pd.DataFrame." )

        if not isinstance( y , pd.Series ):
            raise TypeError( "y must be a pd.Series." )
            
        # asignando atributos de la clase
        
        
        self.__X = X  # privatización del atributo, luego debe ajustarse en el resto de self.__X
        self.y = y
        self.intercept = intercept
    
        if self.intercept:

            self.__X[ 'Intercept' ] = 1 # X privatizado 
            # colcondo la columna Intercept en la primera columna 
            cols = self.__X.columns.tolist()
            new_cols_orders = [cols[ -1 ]] + cols[ 0:-1 ] # juntano listas
#             new_cols_orders = [cols[ -1 ]].extend(cols[ 0:-1 ]) # append lista a una lista 
            
            # [cols[ -1 ]] la jala la ultima fila , cols[ 0:-1 ]  primera fila hasta la penultima fila 
            
            self.X = self.__X.loc[ : , new_cols_orders ] # usamos .loc que filtra por nombre de filas o columnas 

        else:
            pass
            # No se ejecuta nada, la base de datos no se altera
    
        # creando nuevos atributos 
        
        self.X_np = self.__X.values  # Dataframe a multi array
        self.y_np = y.values.reshape( -1 , 1 ) # de objeto serie a array columna 
        self.columns = self.__X.columns.tolist() # nombre de la base de datos como objeto lista
    
    
    # reshape( - 1, 1 ), primer -1 hace que sea un array fila, el otro 1 define la cantidad de columnas. En esta caso solo 1
    # reshape( 10, 2 ) se puede usar reshape( -1, 2 )
    
    
    def _reg_beta_OLS( self ): # privatización del método 
        # X, y en Matrix, y vector columna respectivamente 
        
        X_np = self.X_np
        y_np = self.y_np

        # beta_ols
        beta_ols = np.linalg.inv( X_np.T @ X_np ) @ ( X_np.T @ y_np )

        # columnas de X
        index_names = self.columns
        # Output
        beta_OLS_output = pd.DataFrame( beta_ols , index = index_names , columns = [ 'Coef.' ] )
        
        # Dataframe de coeffientes como atributo 
        
        self.beta_OLS = beta_OLS_output
        
        return beta_OLS_output
    
    def reg_var_OLS( self ):
    
        # Se corre la función reg_beta_OLS que estima el vector de coeficientes
        
        self.reg_beta_OLS()
        
        X_np = self.X_np
        y_np = self.y_np
        
        
        # beta_ols
        beta_OLS = self.beta_OLS.values.reshape( - 1, 1 ) # Dataframe a vector columna 

        # errors
        e = y_np - ( X_np @ beta_OLS )

        # error variance
        N = X.shape[ 0 ]
        total_parameters = X.shape[ 1 ]
        error_var = ( (e.T @ e)[ 0 ] )/( N - total_parameters )

        # Varianza
        var_OLS =  error_var * np.linalg.inv( X_np.T @ X_np )

        # columns names 
        index_names = self.columns
        # output
        var_OLS_output = pd.DataFrame( var_OLS , index = index_names , columns = index_names )
        ## variance output como nuevo atributo del objeto
        self.var_OLS = var_OLS_output

        
    def reg_OLS( self ):
        
        # Se corren las funciones
        self.reg_beta_OLS()
        self.reg_var_OLS()
        X = self.X_np
        
        # var y beta
        beta_OLS = self.beta_OLS.values.reshape( -1, 1 )
        var_OLS = self.var_OLS.values
        
        # standard errors
        beta_se = np.sqrt( np.diag( var_OLS ) )

        # Se calcula el test statistic para cada coeficiente
        t_stat = beta_OLS.ravel() / beta_se.ravel()
        
        
        # .ravel() te multiarray a simple array

        # p-value:
        N = X.shape[ 0 ]
        k = beta_OLS.size
        pvalue = (1 - t.cdf(t_stat, df= N - k) ) * 2

        # Confidence interval
        
        up_bd = beta_OLS.ravel() + 1.96*beta_se
        lw_bd = beta_OLS.ravel() - 1.96*beta_se

        table_data ={  'Coef.'    : beta_OLS.ravel() ,
                       "Std.Err." : beta_se.ravel(),
                       "t"        : t_stat.ravel(),
                       "P>|t|"    : pvalue.ravel(), 
                       "[0.025"   : lw_bd.ravel(),
                       "0.975]"   : up_bd.ravel()
                    }
        
        # defining index names
        index_names = self.columns
        
        # defining a pandas dataframe 
        reg_OLS = pd.DataFrame( table_data , index = index_names )

        return reg_OLS
    


In [276]:
A = RegClass( X, y )

In [277]:
A.reg_beta_OLS()

AttributeError: 'RegClass' object has no attribute 'reg_beta_OLS'

In [274]:
A.__X

AttributeError: 'RegClass' object has no attribute '__X'

In [281]:
dir(RegClass)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_reg_beta_OLS',
 'reg_OLS',
 'reg_var_OLS']