In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import BayesianRidge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

In [2]:
# Carregar o dataset
df = pd.read_excel("/Users/fabicampanari/Desktop/class_12- Bayesian-KNN Regression-Model Persistence/class__13-Exam-Bayesian-KNN Regression-Model Persistence/student_performance.xlsx")

In [3]:
df

Unnamed: 0,Hours_Studied,Previous_Scores,Extracurricular_ Activities,Sleep_Hours,Sample_Question_papers,Performance
0,7,99,Yes,9,1,91
1,4,82,No,4,2,65
2,8,51,Yes,7,2,45
3,5,52,Yes,5,2,36
4,7,75,No,8,5,66
...,...,...,...,...,...,...
9995,1,49,Yes,4,2,23
9996,7,64,Yes,8,5,58
9997,6,83,Yes,8,5,74
9998,9,97,Yes,7,0,95


In [4]:
print(df.columns.tolist())

['Hours_Studied', 'Previous_Scores', 'Extracurricular_ Activities', 'Sleep_Hours', 'Sample_Question_papers', 'Performance']


In [5]:
# Renomear colunas para remover espa√ßos e padronizar nomes
df.columns = [col.strip().replace(" ", "_") for col in df.columns]

# Converter coluna categ√≥rica 'Extracurricular_Activities' para num√©rica
df['Extracurricular__Activities'] = LabelEncoder().fit_transform(df['Extracurricular__Activities'])

In [6]:
# Separar vari√°veis independentes (X) e dependente (y)
X = df.drop(columns='Performance')
y = df['Performance']

In [7]:
# Normalizar os dados
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Dividir em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [8]:
# -------------------
# Regress√£o Bayesiana
# -------------------
bayes_model = BayesianRidge()
bayes_model.fit(X_train, y_train)
y_pred_bayes = bayes_model.predict(X_train)
mse_bayes = mean_squared_error(y_train, y_pred_bayes)

# -----
# KNN
# -----
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train, y_train)
y_pred_knn = knn_model.predict(X_train)
mse_knn = mean_squared_error(y_train, y_pred_knn)

In [9]:
# ---------------------
# Comparar Resultados
# ---------------------
print("MSE Regress√£o Bayesiana:", round(mse_bayes, 2))
print("MSE KNN:", round(mse_knn, 2))

MSE Regress√£o Bayesiana: 4.17
MSE KNN: 5.86


In [10]:
resultados = {
    "Regress√£o Bayesiana": mse_bayes,
    "KNN": mse_knn
}

melhor_modelo = min(resultados, key=resultados.get)
melhor_mse = resultados[melhor_modelo]

print("Resultados (Erro Quadr√°tico M√©dio - MSE):")
for modelo, mse in resultados.items():
    destaque = "üî• MELHOR" if modelo == melhor_modelo else ""
    print(f"‚û° {modelo}: {round(mse, 2)} {destaque}")

print(f"\nüöÄ Melhor modelo com base na base de treino: **{melhor_modelo}** (MSE = {round(melhor_mse, 2)})")


Resultados (Erro Quadr√°tico M√©dio - MSE):
‚û° Regress√£o Bayesiana: 4.17 üî• MELHOR
‚û° KNN: 5.86 

üöÄ Melhor modelo com base na base de treino: **Regress√£o Bayesiana** (MSE = 4.17)


###  üáßüá∑ Analise exercicio_2: Regress√£o com `student_performance.xlsx`

**Vari√°vel dependente:**
- `Performance` (nota final do aluno)

**Vari√°veis independentes:**
- `Hours_Studied`, `Previous_Scores`, `Extracurricular_Activities`, `Sleep_Hours`, `Sample_Question_papers`

**Modelos aplicados:**
- Regress√£o Bayesiana (`BayesianRidge`)
- KNN (`KNeighborsRegressor`)

**Ajustes realizados:**
- A vari√°vel categ√≥rica `Extracurricular_Activities` foi codificada corretamente com `LabelEncoder`.
- Os dados foram padronizados antes da modelagem.

**Avalia√ß√£o:**
- As t√©cnicas foram comparadas com base no erro quadr√°tico m√©dio (MSE).
- O melhor modelo foi persistido conforme pedido.



üõ∏‡πã*‡©à‚ú©* üî≠‚úÆ‚òæñ§ì.‚òòÔ∏é ›ÅÀñ‚Å∑‚Å∑‚Å∑„Ö§‚úÆ ‚ãÜ ÀöÔΩ°ñ¶π ‚ãÜÔΩ°¬∞‚ú© ‚úÆ ‚ãÜ ÀöÔΩ°ñ¶π ‚ãÜÔΩ°¬∞‚ú©  *‡©à‚ú©‚Äß‚Çäüõ∏‡πã*‡©à‚ú©* üî≠‚úÆ‚òæñ§ì.‚òòÔ∏é ›ÅÀñ‚Å∑‚Å∑‚Å∑„Ö§‚úÆ ‚ãÜ ÀöÔΩ°ñ¶π ‚ãÜÔΩ°¬∞‚ú© ‚úÆ ‚ãÜ ÀöÔΩ°ñ¶π ‚ãÜÔΩ°¬∞‚ú©  *‡©à‚ú©‚Äß‚Çä

### üá∫üá∏ Analysis exercise_2: Regression with `student_performance.xlsx`

**Dependent variable:**
- `Performance` (student's final grade)

**Independent variables:**
- `Hours_Studied`, `Previous_Scores`, `Extracurricular_Activities`, `Sleep_Hours`, `Sample_Question_papers`

**Applied models:**
- Bayesian Regression (`BayesianRidge`)
- KNN (`KNeighborsRegressor`)

**Adjustments made:**
- The categorical variable `Extracurricular_Activities` was correctly encoded using `LabelEncoder`.
- The data was standardized before modeling.

**Evaluation:**
- The techniques were compared based on mean squared error (MSE).
- The best model was persisted as requested.
