In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss, f1_score, roc_auc_score

def logistic_regression():
    # Read the data
    df = pd.read_csv('pca_transformed_data.csv')

    # Split into X (features) and y (result)
    X = df.drop('result', axis=1)  
    y = df['result']

    # Train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

    # Make the model
    model = LogisticRegression()
    # Fit training data to model
    model.fit(X_train, y_train)

    # Make model predictions
    y_pred = model.predict(X_test)

    # Calculate metrics
    l_loss = log_loss(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred)
    

    # Print results
    print("Model Performance:")
    print(f"Log Loss: {l_loss:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"ROC AUC Score: {roc_auc:.4f}")
    

    # Look at which components matter most
    importance = pd.DataFrame({
        'Component': X.columns,
        'Coefficient': abs(model.coef_[0])
    })
    importance = importance.sort_values('Coefficient', ascending=False)
    print("\nComponent Importance:")
    print(importance)



logistic_regression()

Model Performance:
Log Loss: 1.9515
F1 Score: 0.9439
ROC AUC Score: 0.9459

Component Importance:
                               Component  Coefficient
3                       Grubs_and_Herald     1.300627
0              Gold_Advantage_and_Towers     1.179765
7  Early_to_Late_Game_Objective_Sequence     0.432457
6   Laning_Phase_and_Early_Baron_Control     0.369337
2                      Teamfight_Metrics     0.262815
8             Late_Game_Elder_Teamfights     0.234132
4                         Herald_Control     0.217521
5              Monster_Objective_Control     0.187883
1                         Vision_Control     0.015455
