In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import log_loss, f1_score, roc_auc_score

def gradient_boosted_decision_tree():
    # Read the data
    df = pd.read_csv('pca_transformed_data.csv')

    # Split into X (features) and y (result)
    X = df.drop('result', axis=1)  
    y = df['result']

    # Train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

    # Make the model
    model = GradientBoostingClassifier(random_state=10)
    # Fit training data to model
    model.fit(X_train, y_train)

    # Make model predictions
    y_pred = model.predict(X_test)
    
    # Parameter grid for grid search
    param_grid = {
        'n_estimators': [100, 150, 200],
        'learning_rate': [0.01, 0.1, 0.3],
        'max_depth': [3, 5, 7],
        'subsample': [0.75, 1.0]
    }
    
    # Grid search object
    grid_search = GridSearchCV(
        estimator=model,
        param_grid=param_grid,
        cv=5,
        scoring='f1'
    )
    
    # Perform grid search
    grid_search.fit(X_train, y_train)
    
    # Print best parameters
    print("\nBest parameters:")
    print(grid_search.best_params_)
    
    # Get the best model
    best_model = grid_search.best_estimator_
    
    # Make predictions with the best model
    y_pred = best_model.predict(X_test)
    
    # Calculate metrics
    l_loss = log_loss(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred)
    

    # Print results
    print("Model Performance:")
    print(f"Log Loss: {l_loss:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"ROC AUC Score: {roc_auc:.4f}")

    # See what are the most important features
    feature_importances = model.feature_importances_
    
    # Look at which components matter most
    importance = pd.DataFrame({
        'Component': X.columns,
        'Feature_Importance': feature_importances
    })
    importance = importance.sort_values('Feature_Importance', ascending=False)
    
    print("\nComponent Importance")
    print(importance)



gradient_boosted_decision_tree()


Best parameters:
{'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 150, 'subsample': 0.75}
Model Performance:
Log Loss: 1.8976
F1 Score: 0.9456
ROC AUC Score: 0.9475

Component Importance
                               Component  Feature_Importance
0              Gold_Advantage_and_Towers            0.850305
3                       Grubs_and_Herald            0.124914
2                      Teamfight_Metrics            0.009135
1                         Vision_Control            0.004327
6   Laning_Phase_and_Early_Baron_Control            0.003700
8             Late_Game_Elder_Teamfights            0.003066
7  Early_to_Late_Game_Objective_Sequence            0.001788
5              Monster_Objective_Control            0.001474
4                         Herald_Control            0.001291
