In [68]:
import pandas as pd
import xgboost as xgb
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score



In [78]:
def train_best_model():
    # Load the data
    data = pd.read_csv('sample_data/data_model.csv', encoding='ISO-8859-1')

    # Map actual labels to class labels
    class_mapping = {'d': 0, 'l': 1, 'w': 2}
    data['RESULTS'] = data['RESULTS'].map(class_mapping)

    # Preprocess the data
    features = data[['W', 'D', 'L']]
    target = data['RESULTS']

    # Initialize variables
    best_accuracy = 0.0
    best_random_state = None
    max_iterations = 200  # You can adjust this value
    iteration = 0

    while iteration < max_iterations:
        # Split the data into training and testing sets with a different random state each time
        X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=iteration + 1, stratify=target)

        # Standardize features
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

        # Define the XGBoost model
        model = xgb.XGBClassifier(
            objective='multi:softmax',
            num_class=5,
            early_stopping_rounds=15,
            reg_lambda=0.2,
            learning_rate=0.1,
            n_estimators=1000,
            tree_method='hist',
            device='cuda',
            eval_metric='merror'
        )

        # Train the model
        model.fit(X_train, y_train,  eval_set=[(X_test, y_test)])

        # Make predictions on the test set
        y_pred = model.predict(X_test)

        # Evaluate the model
        current_accuracy = accuracy_score(y_test, y_pred)
        print(f"Iteration {iteration + 1}: Accuracy = {current_accuracy * 100:.2f}%")

        # Update the best accuracy and random state if the current accuracy is higher
        if current_accuracy > best_accuracy:
            best_accuracy = current_accuracy
            best_random_state = iteration + 1
            best_model = model  # Save the best model
            best_scaler = scaler  # Save the best scaler

        # Update iteration count
        iteration += 1

    # Print the final result
    if best_accuracy >= 60.0:
        print(f"Highest Accuracy: {best_accuracy * 100:.2f}% (Random State: {best_random_state})")
    else:
        print(f"No iteration achieved 60% accuracy. Best Accuracy: {best_accuracy * 100:.2f}% (Random State: {best_random_state})")

    return best_model, best_scaler

In [None]:
# Call the function to get the best model and scaler
best_model, best_scaler = train_best_model()


In [89]:
# Now you can use the best model and scaler for predictions on new data
new_match = pd.DataFrame({'W': [2.00], 'D': [3.44], 'L': [3.92]})
new_match_standardized = best_scaler.transform(new_match)
new_prediction = best_model.predict(new_match_standardized)
predicted_outcome = [k for k, v in class_mapping.items() if v == new_prediction][0]
print("Predicted Outcome:", predicted_outcome)

Predicted Outcome: w
