In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


data = pd.read_csv("results.csv")

data = data.apply(lambda x: x.str.lower() if x.dtype == "object" else x)
data['date'] = pd.to_datetime(data['date'], errors='coerce')
data = data[data['date'].dt.year >= 2010]

data['match_outcome'] = data.apply(
    lambda row: 'home_win' if row['home_score'] > row['away_score'] else ('away_win' if row['home_score'] < row['away_score'] else 'draw'),
    axis=1
)
print(data['match_outcome'].value_counts())

In [None]:
data = pd.get_dummies(data, columns=['home_team', 'away_team'], drop_first=True)
data.head(2)

In [None]:
X = data.drop(columns=['date', 'home_score', 'away_score', 'match_outcome', 'tournament', 'city', 'country'])
y = data['match_outcome']
y = y.map({'home_win': 1, 'draw': 0, 'away_win': -1})

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

y_pred = rf_model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

In [None]:
groups = {
    "Group A": ["germany", "hungary", "scotland", "switzerland"],
    "Group B": ["albania", "croatia", "italy", "spain"],
    "Group C": ["denmark", "england", "serbia", "slovenia"],
    "Group D": ["austria", "france", "netherlands", "poland"],
    "Group E": ["belgium", "romania", "slovakia", "ukraine"],
    "Group F": ["czech republic", "portugal", "turkey", "georgia"]
}

In [None]:
from itertools import combinations

def simulate_group_matches(group_teams, model, data_columns):
    points = {team: 0 for team in group_teams}

    for home_team, away_team in combinations(group_teams, 2):
        match_data = pd.DataFrame(columns=data_columns)

        for column in data_columns:
            if f'home_team_{home_team}' in column:
                match_data.at[0, column] = 1
            elif f'away_team_{away_team}' in column:
                match_data.at[0, column] = 1
            else:
                match_data.at[0, column] = 0

        prediction = model.predict(match_data)


        if prediction == 1:
            points[home_team] += 3
        elif prediction == -1:
            points[away_team] += 3
        else:
            points[home_team] += 1
            points[away_team] += 1

    ranked_teams = sorted(points.items(), key=lambda x: x[1], reverse=True)
    return ranked_teams

In [None]:
group_rankings = {}
for group_name, group_teams in groups.items():
    ranked_teams = simulate_group_matches(group_teams, rf_model, X.columns)
    group_rankings[group_name] = ranked_teams
group_rankings

In [None]:
achtelfinale_matches = [ # Just demo :)
    ('spain', 'poland'),
    ('switzerland', 'england'),
    ('turkey', 'hungary'),
    ('netherlands', 'slovakia'),
    ('belgium', 'croatia'),
    ('france', 'portugal'),
    ('denmark', 'czech republic'),
    ('germany', 'italy')
]

In [None]:
def simulate_knockout_match(home_team, away_team, model, data_columns):
    match_data = pd.DataFrame(columns=data_columns)

    for column in data_columns:
        if f'home_team_{home_team}' in column:
            match_data.at[0, column] = 1
        elif f'away_team_{away_team}' in column:
            match_data.at[0, column] = 1
        else:
            match_data.at[0, column] = 0

    prediction = model.predict(match_data)
    if prediction == 1:
        return home_team
    elif prediction == -1:
        return away_team
    else:
        return home_team if model.predict_proba(match_data)[0][1] > model.predict_proba(match_data)[0][2] else away_team


achtelfinale_results = {}
print("Achtelfinale Results:")
for home_team, away_team in achtelfinale_matches:
    winner = simulate_knockout_match(home_team, away_team, rf_model, X.columns)
    achtelfinale_results[f"{home_team} vs {away_team}"] = winner
    print(f"{home_team} vs {away_team} -> Winner: {winner}")

print("\nAchtelfinale Results Dictionary:")
print(achtelfinale_results)

In [None]:
achtelfinale_winners = list(achtelfinale_results.values())
viertelfinale_matches = [
    (achtelfinale_winners[3], achtelfinale_winners[1]),
    (achtelfinale_winners[5], achtelfinale_winners[4]),
    (achtelfinale_winners[6], achtelfinale_winners[7]),
    (achtelfinale_winners[2], achtelfinale_winners[0])
]


viertelfinale_results = {}
print("\nViertelfinale Results:")
for home_team, away_team in viertelfinale_matches:
    winner = simulate_knockout_match(home_team, away_team, rf_model, X.columns)
    viertelfinale_results[f"{home_team} vs {away_team}"] = winner
    print(f"{home_team} vs {away_team} -> Winner: {winner}")

In [None]:
viertelfinale_winners = list(viertelfinale_results.values())
halbfinale_matches = [
    (viertelfinale_winners[0], viertelfinale_winners[1]),
    (viertelfinale_winners[3], viertelfinale_winners[2])
]


halbfinale_results = {}
print("\nHalbfinale Results:")
for home_team, away_team in halbfinale_matches:
    winner = simulate_knockout_match(home_team, away_team, rf_model, X.columns)
    halbfinale_results[f"{home_team} vs {away_team}"] = winner
    print(f"{home_team} vs {away_team} -> Winner: {winner}")

In [None]:
halbfinale_winners = list(halbfinale_results.values())
finale_match = (halbfinale_winners[0], halbfinale_winners[1])

print("\nFinale Result:")
finale_winner = simulate_knockout_match(finale_match[0], finale_match[1], rf_model, X.columns)
print(f"{finale_match[0]} vs {finale_match[1]} -> Winner: {finale_winner}")

print("\nFinale Winner:")
print(finale_winner)