In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score

# === 1. Carregamento dos dados ===
df = pd.read_csv('df_2024.csv')

# === 2. Criação da variável alvo ===
df['HomeWin'] = (df['Res'] == 'H').astype(int)

# === 3. Seleção de features relevantes ===
feature_cols = [
    'PSCH', 'PSCD', 'PSCA',
    'MaxCH', 'MaxCD', 'MaxCA',
    'AvgCH', 'AvgCD', 'AvgCA'
]

# === 4. Filtrar dataset para modelagem ===
df_model = df[feature_cols + ['HomeWin']].dropna()

X = df_model.drop(columns=['HomeWin'])
y = df_model['HomeWin']

# === 5. Divisão treino/teste ===
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# === 6. Normalização dos dados ===
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# === 7. Definição dos modelos ===
models = {
    "Logistic Regression": LogisticRegression(random_state=42),
    "Random Forest": RandomForestClassifier(random_state=42),
    "SVM": SVC(probability=True, random_state=42)
}

results = {}

# === 8. Treinamento e Avaliação ===
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    y_proba = model.predict_proba(X_test_scaled)[:, 1]

    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    roc = roc_auc_score(y_test, y_proba)

    results[name] = {
        "Accuracy": acc,
        "Confusion Matrix": cm,
        "ROC AUC": roc
    }

# === 9. Exibir resultados ===
for name, metrics in results.items():
    print(f"\nModelo: {name}")
    print(f"Accuracy: {metrics['Accuracy']:.4f}")
    print(f"ROC AUC: {metrics['ROC AUC']:.4f}")
    print("Confusion Matrix:")
    print(metrics['Confusion Matrix'])



Modelo: Logistic Regression
Accuracy: 0.6184
ROC AUC: 0.7507
Confusion Matrix:
[[26 22]
 [ 7 21]]

Modelo: Random Forest
Accuracy: 0.6184
ROC AUC: 0.6916
Confusion Matrix:
[[29 19]
 [10 18]]

Modelo: SVM
Accuracy: 0.6711
ROC AUC: 0.7894
Confusion Matrix:
[[30 18]
 [ 7 21]]
