In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

In [2]:
dados = pd.read_csv('https://raw.githubusercontent.com/Cayan-Portela/ceub/main/dados/mtcars.csv')
dados.head()

Unnamed: 0,car,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


$\text{mpg}_i = \beta_0 = \beta_1 + \text{wt}_i + e_i$

In [3]:
X = dados[['wt']]
y = dados['mpg']

In [4]:
regr = LinearRegression()
regr.fit(X,y)

In [5]:
b0 = regr.intercept_
b1 = regr.coef_[0]

print(f"LinearRegression()")
print(f"b0: {np.round(b0, 4)}\nb1: {np.round(b1, 4)}")

LinearRegression()
b0: 37.2851
b1: -5.3445


---

Calculando $\hat{\beta_0}$ e $\hat{\beta_1}$ na "mão"

$\hat{\beta}_0 = \bar{y} - \hat{\beta}_1 \bar{x}$

In [6]:
def beta_zero(x, y, b1):

    x_barra = np.mean(x)
    y_barra = np.mean(y)
    b0 = y_barra - b1 * x_barra

    return(b0)

$\hat{\beta_1} = \frac{\sum_{i=1}^{n} (x_i - \bar{x})(y_i - \bar{y})}{\sum_{i=1}^{n}(x_i - \bar{x})^2}$

In [7]:
def beta_um(x, y):
    
    x_barra = np.mean(x)
    y_barra = np.mean(y)

    b1_cima = ((x - x_barra) * (y - y_barra)).sum()
    b1_baixo = ((x - x_barra)**2).sum()
    b1 = b1_cima / b1_baixo

    return b1

In [8]:
# Definindo x e y
x_ = dados['wt']
y_ = dados['mpg']

In [9]:
b1_ = beta_um(x_, y_)
b0_ = beta_zero(x_, y_, b1 = b1_)

In [10]:
print(f"LinearRegression()")
print(f"b0: {np.round(b0, 4)}\nb1: {np.round(b1, 4)}")

print(f"\nNossos betas")
print(f"b0: {np.round(b0_, 4)}\nb1: {np.round(b1_, 4)}")

LinearRegression()
b0: 37.2851
b1: -5.3445

Nossos betas
b0: 37.2851
b1: -5.3445


---

Matricialmente

In [11]:
from numpy.linalg import inv

$\hat{\beta} = (X'X)^{-1}X'Y$

In [12]:
n_ = dados.shape[0]
col_1 = np.ones(n_)

In [13]:
def matriz_x(colunas, dados):

    n_ = dados.shape[0]
    col_1 = np.ones(n_)
    col_var = dados[colunas]

    X_mat = np.c_[col_1, np.array(col_var)]

    return X_mat

In [14]:
X_mat = matriz_x(colunas=["wt"], dados=dados)

Criando $X'$ e $(X'X)^{-1}$

In [15]:
Xl = np.transpose(X_mat)
Xlx = Xl @ X_mat

$\hat{\beta} = (X'X)^{-1}X'Y$

In [16]:
beta_mat = inv(Xlx) @ Xl @ y_
beta_mat

array([37.28512617, -5.34447157])

In [17]:
print(f"Betas matricialmente")
print(f"b0: {np.round(beta_mat[0], 4)}\nb1: {np.round(beta_mat[1], 4)}")

Betas matricialmente
b0: 37.2851
b1: -5.3445
