In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

# 1. Carregando os dados
df = pd.read_csv('sdss.csv', skiprows=1)

# 2. Mapeando as classes para valores numéricos
class_mapping = {'STAR': 0, 'GALAXY': 1, 'QSO': 2}
df['class'] = df['class'].map(class_mapping)

# 3. Selecionando as features
features = ['redshift', 'i', 'u', 'g', 'r', 'z']
X = df[features]
y = df['class']

# 4. Dividindo os dados em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1, stratify=y)

# 5. Normalizando os dados
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 6. Treinando o modelo Gradient Boosting com features selecionadas
modelo_gb_selecionado = HistGradientBoostingClassifier(random_state=1)
modelo_gb_selecionado.fit(X_train, y_train)

# 7. Fazendo previsões
y_pred = modelo_gb_selecionado.predict(X_test)

# 8. Calculando as métricas
acuracia = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
roc_auc = roc_auc_score(y_test, modelo_gb_selecionado.predict_proba(X_test), multi_class='ovr')

# 9. Exibindo as métricas
print(f"Acurácia: {acuracia:.4f}")
print(f"F1-Score: {f1:.4f}")
print(f"ROC-AUC: {roc_auc:.4f}")
