In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from IPython.display import display, Markdown

## Preparación y lectura de datos

In [3]:
df = pd.read_excel('Regresión Lineal.xlsx', sheet_name = 1)
df['B0'] = 1
df['est+e'] = df['Estatura'] + np.random.normal(0,0.01,size = len(df['Estatura']))
for i in range(2, 10):
    df[f'x^{i}'] = df['Estatura'] ** i
df['C'] = 4
df.head()

Unnamed: 0,Estatura,Peso,B0,est+e,x^2,x^3,x^4,x^5,x^6,x^7,x^8,x^9,C
0,1.62,63,1,1.623493,2.6244,4.251528,6.887475,11.15771,18.07549,29.282294,47.437317,76.848453,4
1,1.83,71,1,1.836797,3.3489,6.128487,11.215131,20.52369,37.558353,68.731786,125.779168,230.175878,4
2,1.72,64,1,1.716168,2.9584,5.088448,8.752131,15.053665,25.892303,44.534761,76.599789,131.751638,4
3,1.71,67,1,1.713505,2.9241,5.000211,8.550361,14.621117,25.00211,42.753608,73.10867,125.015826,4
4,1.84,85,1,1.838548,3.3856,6.229504,11.462287,21.090609,38.80672,71.404365,131.384032,241.746618,4


## Explicación de matrices

- $ \beta = (X^TX)^{-1}X^TY $
- $ c = X^TX $
- $ d = X^TY $
- $ c_{inv} = (X^TX)^{-1} $
- $ beta = c_{inv}d $

Para que esto funcione $X^TX$ tiene que ser invertible.

Si c ($X^TT$) tiene un valor propio que es 0, no se puede invertir y su determinante no puede ser 0.

## Calculo de betas

In [7]:
variables = ['B0', 'Estatura','x^2','x^3','x^4']
def regresion(df):
    x = np.array(df[variables])
    y = np.array(df[['Peso']])

    c = np.dot(x.T,x) # X^T X
    d = np.dot(x.T, y) # X^T y
    valores_propios, vectores_propios = np.linalg.eig(c)
    det = np.linalg.det(c)
    
    if det > 0:
        c_inv = np.linalg.inv(c) # (X^T X)^-1
        beta = np.dot(c_inv, d)
        betas = []
        for i in range(len(beta)):
            betas.append(beta[i][0])
        for i, b in enumerate(betas):
            print(f"B{i} = {b}")
    else:
        print('X^T T no es invertible')

In [8]:
regresion(df)

B0 = -482881.4871146482
B1 = 1159755.177067822
B2 = -1042813.5571411513
B3 = 416076.12811319623
B4 = -62150.97092024267


## Análisis de ruido

In [10]:
x_new = df[['Estatura','est+e']]
display(Markdown("**Matriz de Covarianza**"))
display(x_new.cov())
display(Markdown("**Matriz de Correlación**"))
display(x_new.corr())

**Matriz de Covarianza**

Unnamed: 0,Estatura,est+e
Estatura,0.009135,0.009326
est+e,0.009326,0.009566


**Matriz de Correlación**

Unnamed: 0,Estatura,est+e
Estatura,1.0,0.997717
est+e,0.997717,1.0


In [11]:
np.linalg.det(x_new.cov())

3.985434229222063e-07

## Uso de $\lambda I$

In [13]:
variables = ['B0', 'Estatura','C']

x = np.array(df[variables])
y = np.array(df[['Peso']])

c = np.dot(x.T,x) # X^T X
d = np.dot(x.T, y) # X^T y
#valores_propios, vectores_propios = np.linalg.eig(c)
#det = np.linalg.det(c)

### $X^TX$ y valores propios

In [15]:
c

array([[ 21.    ,  35.83  ,  84.    ],
       [ 35.83  ,  61.3155, 143.32  ],
       [ 84.    , 143.32  , 336.    ]])

In [16]:
valores_propios, vectores_propios = np.linalg.eig(c)
valores_propios

array([ 4.18159526e+02, -8.07683128e-15,  1.55974445e-01])

### $X^TX + \lambda I$ y valores propios

In [18]:
lambda_I = 1 * np.identity(3)
c_i = c+lambda_I
c_i

array([[ 22.    ,  35.83  ,  84.    ],
       [ 35.83  ,  62.3155, 143.32  ],
       [ 84.    , 143.32  , 337.    ]])

In [19]:
valores_propios, vectores_propios = np.linalg.eig(c_i)
valores_propios

array([419.15952555,   1.        ,   1.15597445])

### $\beta 's$ para la regresión

In [21]:
c_inv = np.linalg.inv(c_i) # (X^T X)^-1 + lambda I
beta = np.dot(c_inv, d)
betas = []

for i in range(len(beta)):
    betas.append(beta[i][0])
for i, b in enumerate(betas):
    print(f"B{i} = {b}")

B0 = 2.1100426241337766
B1 = 17.432451592518543
B2 = 8.440170496537144
