In [33]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score
import pandas as pd

In [34]:
df = pd.read_csv('export.csv')

In [35]:
df.rename(columns=lambda col: col.replace('winner', 'own_team').replace('loser', 'enemy_team'), inplace=True)
df['win'] = True
# Create a duplicate DataFrame for the swapped data
swapped_df = df.copy()

# Swap my_team and enemy_team columns
swapped_columns = {col: col.replace('own_team', 'temp').replace('enemy_team', 'own_team').replace('temp', 'enemy_team') 
                   for col in df.columns if 'own_team' in col or 'enemy_team' in col}
swapped_df.rename(columns=swapped_columns, inplace=True)

# Set the 'win' column to False for the swapped data
swapped_df['win'] = False

# Append the swapped data to the original DataFrame
df = pd.concat([df, swapped_df], ignore_index=True)

# Display the resulting DataFrame
print(df.head())
print(df.tail())  # Show some of the appended rows



           matchId own_team_BOTTOM own_team_JUNGLE own_team_MIDDLE  \
0  EUW1_7183889725           Kaisa           Shaco           Yasuo   
1  EUW1_7183871808            Jhin           Shaco            Hwei   
2  EUW1_7183851435            Zeri           Elise             Vex   
3  EUW1_7183844492            Jinx          Rengar           Diana   
4  EUW1_7183522220         Caitlyn           Neeko           Sylas   

  own_team_TOP own_team_UTILITY enemy_team_BOTTOM enemy_team_JUNGLE  \
0      DrMundo             Sona          Tristana            Khazix   
1      DrMundo           Thresh       MissFortune          Nocturne   
2      DrMundo            Karma              Jhin               Zac   
3        Garen              Lux              Zeri           Ambessa   
4       Darius            Milio             Swain           Skarner   

  enemy_team_MIDDLE enemy_team_TOP enemy_team_UTILITY   win  
0            Veigar        Warwick               Lulu  True  
1             Galio        S

In [36]:
df.to_csv('test.csv', index=False)

In [None]:

categorical_columns = [col for col in df.columns if col not in ['matchId', 'win']]

X = df[categorical_columns]
y = df['win']

encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
X_encoded = encoder.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Train a RandomForestClassifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Model Accuracy: {accuracy:.2f}")

feature_importances = model.feature_importances_
important_features = sorted(zip(encoder.get_feature_names_out(), feature_importances), key=lambda x: -x[1])

print("Top 10 Important Features:")
for feature, importance in important_features[:10]:
    print(f"{feature}: {importance:.4f}")

Model Accuracy: 0.53
Top 10 Important Features:
enemy_team_BOTTOM_Caitlyn: 0.0064
own_team_BOTTOM_Caitlyn: 0.0063
enemy_team_BOTTOM_Jinx: 0.0056
own_team_BOTTOM_Jinx: 0.0055
own_team_BOTTOM_Jhin: 0.0051
enemy_team_BOTTOM_Jhin: 0.0048
enemy_team_BOTTOM_Ashe: 0.0047
own_team_BOTTOM_Ashe: 0.0047
own_team_UTILITY_Lux: 0.0044
enemy_team_UTILITY_Lux: 0.0044


In [None]:
import numpy as np


def predict_win_probability(input_data, model, encoder, categorical_columns):
    """
    Predict the probability of a win given input data.

    Parameters:
        input_data (dict): A dictionary where keys are column names and values are the input values.
        model (sklearn model): The trained model.
        encoder (OneHotEncoder): The encoder used to preprocess the data.
        categorical_columns (list): List of categorical column names.

    Returns:
        float: Probability of winning.
    """
    # Convert input_data to a DataFrame
    input_df = pd.DataFrame([input_data])

    # Ensure the input columns match the training data
    input_df = input_df[categorical_columns]

    # One-hot encode the input data
    input_encoded = encoder.transform(input_df)

    # Predict the probability of a win
    win_probability = model.predict_proba(input_encoded)[0][1]  # Probability of the positive class (win)
    return win_probability

# Example usage
# Replace these with real input values
example_input = {
    'own_team_BOTTOM': 'Kaisa',
    'own_team_JUNGLE': 'Graves',
    'own_team_MIDDLE': 'Tristana',
    'own_team_TOP': 'Renekton',
    'own_team_UTILITY': 'Veigar',
    'enemy_team_BOTTOM': 'MissFortune',
    'enemy_team_JUNGLE': 'Lillia',
    'enemy_team_MIDDLE': 'Lux',
    'enemy_team_TOP': 'Kled',
    'enemy_team_UTILITY': 'Shaco',
}

# Predict the probability
probability = predict_win_probability(example_input, model, encoder, categorical_columns)
print(f"Win Probability: {probability:.2f}")

Win Probability: 0.53


In [None]:
def evaluate_top_pick_win_probabilities(all_champions, model, encoder, categorical_columns, example_input):
    """
    Evaluate win probabilities for every possible champion as the 'my_team_TOP' pick.

    Parameters:
        all_champions (list): List of all possible champion names.
        model (sklearn model): The trained model.
        encoder (OneHotEncoder): The encoder used to preprocess the data.
        categorical_columns (list): List of categorical column names.
        example_input (dict): A dictionary with the current input values (excluding 'my_team_TOP').

    Returns:
        None
    """
    win_probabilities = []

    for champion in all_champions:
        # Create a copy of the input data and update 'my_team_TOP' with the current champion
        input_data = example_input.copy()
        input_data['own_team_TOP'] = champion

        # Predict the probability of winning for this configuration
        win_probability = predict_win_probability(input_data, model, encoder, categorical_columns)

        # Store the result
        win_probabilities.append((champion, win_probability))

    # Sort by win probability in descending order
    sorted_win_probabilities = sorted(win_probabilities, key=lambda x: x[1], reverse=True)

    # Print the results
    print("Top Pick Win Probabilities (sorted by highest probability):")
    for champion, probability in sorted_win_probabilities:
        print(f"{champion}: {probability:.2f}")




Top Pick Win Probabilities (sorted by highest probability):
Garen: 0.55
Jayce: 0.54
Galio: 0.51
Teemo: 0.50
Ahri: 0.49
TahmKench: 0.49
Vladimir: 0.49
Kayle: 0.49
Jax: 0.48
MissFortune: 0.47
Jinx: 0.47
Morgana: 0.47
Orianna: 0.47
Jhin: 0.47
Gwen: 0.47
Kaisa: 0.46
Tristana: 0.46
Renekton: 0.46
Veigar: 0.46
Lux: 0.46
Kled: 0.46
Quinn: 0.46
Aphelios: 0.46
FiddleSticks: 0.46
Lucian: 0.46
Briar: 0.46
Draven: 0.46
Vayne: 0.46
Warwick: 0.46
Swain: 0.46
Mordekaiser: 0.46
Xayah: 0.46
LeeSin: 0.46
Varus: 0.46
Volibear: 0.46
Xerath: 0.46
Yone: 0.46
Malphite: 0.46
Graves: 0.45
Shaco: 0.45
Brand: 0.45
Azir: 0.45
Yasuo: 0.45
Nasus: 0.45
Nami: 0.45
Zed: 0.45
Sett: 0.45
Qiyana: 0.45
Belveth: 0.45
Fizz: 0.45
Blitzcrank: 0.45
Renata: 0.45
Lulu: 0.45
RekSai: 0.45
Kassadin: 0.45
Seraphine: 0.45
Kindred: 0.45
Janna: 0.45
Ivern: 0.44
Nocturne: 0.44
Ekko: 0.44
MonkeyKing: 0.44
Caitlyn: 0.44
Zeri: 0.44
Akshan: 0.44
Lillia: 0.43


In [None]:
all_champions = ['Kaisa', 'Graves', 'Tristana', 'Renekton', 'Veigar', 'MissFortune', 'Lillia', 
                 'Lux', 'Kled', 'Shaco', 'Jinx', 'Ivern', 'Ahri', 'Quinn', 'Brand', 'Aphelios', 
                 'FiddleSticks', 'Azir', 'Garen', 'Morgana', 'Lucian', 'Briar', 'Yasuo', 'Nasus', 
                 'Nami', 'Draven', 'Nocturne', 'Zed', 'Sett', 'Ekko', 'Jayce', 'Teemo', 'Vayne', 
                 'Warwick', 'Qiyana', 'Orianna', 'TahmKench', 'Jhin', 'MonkeyKing', 'Swain', 'Caitlyn', 
                 'Belveth', 'Fizz', 'Galio', 'Mordekaiser', 'Blitzcrank', 'Xayah', 'Vladimir', 'Renata', 
                 'Zeri', 'LeeSin', 'Kayle', 'Lulu', 'Varus', 'RekSai', 'Kassadin', 'Volibear', 'Xerath', 
                 'Yone', 'Gwen', 'Seraphine', 'Kindred', 'Akshan', 'Jax', 'Malphite', 'Janna']

example_input = {
    'own_team_BOTTOM': 'Kaisa',
    'own_team_JUNGLE': 'Graves',
    'own_team_MIDDLE': 'Tristana',
    'own_team_UTILITY': 'Veigar',
    'enemy_team_BOTTOM': 'MissFortune',
    'enemy_team_JUNGLE': 'Lillia',
    'enemy_team_MIDDLE': 'Lux',
    'enemy_team_TOP': 'Renekton',
    'enemy_team_UTILITY': 'Shaco'
}

evaluate_top_pick_win_probabilities(all_champions, model, encoder, categorical_columns, example_input)