In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import BayesianRidge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

In [6]:
# Carregar os dados
df_Construcao = pd.read_excel("/Users/fabicampanari/Desktop/class_12- Bayesian-KNN Regression-Model Persistence/Consumo.xlsx")
df_Construcao

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,year,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
387,27.0,4,140.0,86,2790,15.6,82,1,ford mustang gl
388,44.0,4,97.0,52,2130,24.6,82,2,vw pickup
389,32.0,4,135.0,84,2295,11.6,82,1,dodge rampage
390,28.0,4,120.0,79,2625,18.6,82,1,ford ranger


In [8]:
# Separar variáveis independentes e dependente
X = df_Construcao[['mpg', 'cylinders', 'displacement', 'horsepower']].values
y = df_Construcao['weight'].values.reshape(-1, 1)

In [9]:
# Normalização
scaler_X = StandardScaler()
scaler_y = StandardScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

# Adicionando bias manualmente (intercepto)
X_scaled_b = np.c_[np.ones((X_scaled.shape[0], 1)), X_scaled]

In [10]:
# Função de Gradiente Descendente
def gradient_descent(X, y, lr, n_iter):
    m = len(y)
    theta = np.random.randn(X.shape[1], 1)
    for i in range(n_iter):
        gradients = 2/m * X.T @ (X @ theta - y)
        theta -= lr * gradients
    return theta

# Função de Gradiente Estocástico Descendente
def stochastic_gradient_descent(X, y, lr, n_iter):
    m = len(y)
    theta = np.random.randn(X.shape[1], 1)
    for epoch in range(n_iter):
        for i in range(m):
            rand_index = np.random.randint(m)
            xi = X[rand_index:rand_index+1]
            yi = y[rand_index:rand_index+1]
            gradients = 2 * xi.T @ (xi @ theta - yi)
            theta -= lr * gradients
    return theta

In [11]:
# Avaliação com diferentes combinações
results = []

for lr in [0.01, 0.02, 0.05]:
    for iters in [1000, 10000]:
        theta_gd = gradient_descent(X_scaled_b, y_scaled, lr, iters)
        y_pred_gd = X_scaled_b @ theta_gd
        mse_gd = mean_squared_error(y_scaled, y_pred_gd)

        theta_sgd = stochastic_gradient_descent(X_scaled_b, y_scaled, lr, iters)
        y_pred_sgd = X_scaled_b @ theta_sgd
        mse_sgd = mean_squared_error(y_scaled, y_pred_sgd)

        results.append({
            'Method': 'GD',
            'Learning Rate': lr,
            'Iterations': iters,
            'MSE': mse_gd
        })
        results.append({
            'Method': 'SGD',
            'Learning Rate': lr,
            'Iterations': iters,
            'MSE': mse_sgd
        })

# Resultado em DataFrame
df_results = pd.DataFrame(results)
print(df_results.sort_values(by='MSE'))

   Method  Learning Rate  Iterations       MSE
6      GD           0.02       10000  0.108936
10     GD           0.05       10000  0.108936
2      GD           0.01       10000  0.108936
8      GD           0.05        1000  0.108944
4      GD           0.02        1000  0.110826
1     SGD           0.01        1000  0.110965
0      GD           0.01        1000  0.111062
7     SGD           0.02       10000  0.113757
5     SGD           0.02        1000  0.117009
3     SGD           0.01       10000  0.118778
9     SGD           0.05        1000  0.131393
11    SGD           0.05       10000  0.166738


In [12]:
# Melhor cenário
best = df_results.loc[df_results['MSE'].idxmin()]
print("\n🔥 Melhor resultado:")
print(best)


🔥 Melhor resultado:
Method                 GD
Learning Rate        0.02
Iterations          10000
MSE              0.108936
Name: 6, dtype: object


### 🇺🇸Analysis - Exercise 3: Sales study of the store chain `CONSTRUCAO`

**Dependent variable:**
- `qt_venda` (quantity sold)

**Independent variables:**
- `gast_prop`, `n_cont`, `n_marc`, `n_loj`

**Applied models:**
- Custom Gradient Descent with:
    - Iterations: 1000 and 10000
    - Learning rates: 0.01, 0.02, 0.05
- Stochastic Gradient Descent (SGD) with the same parameters

**Evaluation:**
- The different scenarios were properly tested.
- The code compares the errors (MSE) between the scenarios and identifies which one performed best.

🛸๋*ੈ✩* 🔭✮☾𖤓.☘︎ ݁˖⁷⁷⁷ㅤ✮ ⋆ ˚｡𖦹 ⋆｡°✩ ✮ ⋆ ˚｡𖦹 ⋆｡°✩  *ੈ✩‧₊🛸๋*ੈ✩* 🔭✮☾𖤓.☘︎ ݁˖⁷⁷⁷ㅤ✮ ⋆ ˚｡𖦹 ⋆｡°✩ ✮ ⋆ ˚｡𖦹 ⋆｡°✩  *ੈ✩‧₊

### 🇧🇷 Analise - Execicio  3: Estudo de vendas da rede de lojas `CONSTRUCAO`

**Variável dependente:**
- `qt_venda` (quantidade vendida)

**Variáveis independentes:**
- `gast_prop`, `n_cont`, `n_marc`, `n_loj`

**Modelos aplicados:**
- Gradiente Descendente customizado com:
  - Iterações: 1000 e 10000
  - Learning rates: 0.01, 0.02, 0.05
- Gradiente Descendente Estocástico (SGD) com os mesmos parâmetros

**Avaliação:**
- Os diferentes cenários foram devidamente testados.
- O código compara os erros (MSE) entre os cenários e identifica qual apresentou melhor desempenho.