In [1]:
import pandas as pd
import xgboost as xgb
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score



In [3]:
# Load the data
data = pd.read_csv('sample_data/data_model.csv', encoding='ISO-8859-1')
# Map actual labels to class labels
class_mapping = {'d': 0, 'l': 1, 'w': 2}
data['RESULTS'] = data['RESULTS'].map(class_mapping)
# Preprocess the data
features = data[['W', 'D', 'L']]
target = data['RESULTS']


In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Define the XGBoost model
model = xgb.XGBClassifier(objective='multi:softmax', num_class=3)  # Assumes 3 classes: win, draw, lose

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)



In [5]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))



Accuracy: 46.71%


In [15]:
# Accept input for new match data
new_match = pd.DataFrame({'W': [1.37], 'D': [5.00], 'L': [27.70	]})  # Replace with the actual values for the new match

# Use the trained model to predict the outcome for the new match
new_prediction = model.predict(new_match)

# Map the predicted outcome back to the actual labels
predicted_outcome = [k for k, v in class_mapping.items() if v == new_prediction][0]

# Print the predicted outcome
print("Predicted Outcome:", predicted_outcome)

Predicted Outcome: d


In [None]:

# Visualize the distribution of outcomes
outcome_counts = data['RESULTS'].value_counts()
plt.figure(figsize=(8, 8))
plt.bar(outcome_counts.index, outcome_counts.values)
plt.title('Distribution of Outcomes')
plt.xlabel('Outcome')
plt.ylabel('Count')
plt.show()

# Visualize the betting odds for each outcome
outcomes = data['RESULTS'].unique()
for outcome in outcomes:
    subset = data[data['RESULTS'] == outcome]
    plt.figure(figsize=(10, 10))
    plt.hist(subset['W'], bins=20, alpha=0.5, label='Win Odds')
    plt.hist(subset['D'], bins=20, alpha=0.5, label='Draw Odds')
    plt.hist(subset['L'], bins=20, alpha=0.5, label='Lose Odds')
    plt.title(f'Betting Odds Distribution for {outcome}')
    plt.xlabel('Betting Odds')
    plt.ylabel('Frequency')
    plt.legend()
    plt.show()
