In [63]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
import pickle
from tabulate import tabulate
import random

# Load the data
historical_results = pd.read_csv('cleaned_euro_results.csv')
fixtures = pd.read_csv('cleaned_euro_fixture.csv')
group_tables = pickle.load(open('dict_table','rb'))

In [64]:
#Prepared data
def prepare_data(df):
    X = df[['Home Team', 'Away Team']]
    Y = np.where(df['Home Goals'] > df['Away Goals'], 'Home Win',
                 np.where(df['Home Goals'] < df['Away Goals'], 'Away Win', 'Draw'))
    return X, Y

In [65]:
#Got goal data ready
def prepare_goal_data(df):
    X = df[['Home Team', 'Away Team']]
    Y_home = df['Home Goals']
    Y_away = df['Away Goals']
    return X, Y_home, Y_away

X, Y = prepare_data(historical_results)
X_goals, Y_home_goals, Y_away_goals = prepare_goal_data(historical_results)

In [66]:
# Get all unique teams
all_teams = set(X['Home Team']) | set(X['Away Team'])
for group in group_tables.values():
    all_teams |= set(group['Team'])

# Encode team names
le = LabelEncoder()
le.fit(list(all_teams))

# Create clear copies
X = X[['Home Team', 'Away Team']].copy()
X_goals = X_goals[['Home Team', 'Away Team']].copy()

In [67]:
# Replace 'Germany (H)' with 'Germany since Germany Hosted'
X.loc[:, 'Home Team'] = X['Home Team'].replace('Germany (H)', 'Germany')
X.loc[:, 'Away Team'] = X['Away Team'].replace('Germany (H)', 'Germany')
X_goals.loc[:, 'Home Team'] = X_goals['Home Team'].replace('Germany (H)', 'Germany')
X_goals.loc[:, 'Away Team'] = X_goals['Away Team'].replace('Germany (H)', 'Germany')

# Do needed transformation
X.loc[:, 'Home Team'] = le.transform(X['Home Team'])
X.loc[:, 'Away Team'] = le.transform(X['Away Team'])
X_goals.loc[:, 'Home Team'] = le.transform(X_goals['Home Team'])
X_goals.loc[:, 'Away Team'] = le.transform(X_goals['Away Team'])

# Define models as global variables to reduce error
model = None
model_home_goals = None
model_away_goals = None

In [68]:
def split_train():
    global model, model_home_goals, model_away_goals
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
    X_goals_train, X_goals_test, Y_home_goals_train, Y_home_goals_test, Y_away_goals_train, Y_away_goals_test = train_test_split(
        X_goals, Y_home_goals, Y_away_goals, test_size=0.2, random_state=42)

    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, Y_train)

    model_home_goals = RandomForestRegressor(n_estimators=100, random_state=42)
    model_home_goals.fit(X_goals_train, Y_home_goals_train)

    model_away_goals = RandomForestRegressor(n_estimators=100, random_state=42)
    model_away_goals.fit(X_goals_train, Y_away_goals_train)

split_train()
#Train model on data

In [69]:
#Uses ml algorithms based on trained model to stimulate matches
def predict_match(home_team, away_team):
    home_team = 'Germany' if home_team == 'Germany (H)' else home_team
    away_team = 'Germany' if away_team == 'Germany (H)' else away_team
    try:
        home_encoded = le.transform([home_team])[0]
        away_encoded = le.transform([away_team])[0]

        match_features = pd.DataFrame({'Home Team': [home_encoded], 'Away Team': [away_encoded]})

        probs = model.predict_proba(match_features)[0]

        home_goals = model_home_goals.predict(match_features)[0]
        away_goals = model_away_goals.predict(match_features)[0]

        home_goals *= (probs[0] + probs[2] / 2)  # Home win + half of draw prob
        away_goals *= (probs[1] + probs[2] / 2)  # Away win + half of draw prob

        home_goals += np.random.poisson(1)
        away_goals += np.random.poisson(1)

        home_goals = max(0, home_goals)
        away_goals = max(0, away_goals)

        home_goals = round(home_goals)
        away_goals = round(away_goals)

        if home_goals > away_goals:
            winner = home_team
        elif home_goals < away_goals:
            winner = away_team
        else:
            winner = np.random.choice([home_team, away_team])

        return home_goals, away_goals, winner

    except ValueError:
        home_goals = np.random.randint(0, 5)
        away_goals = np.random.randint(0, 5)
        winner = home_team if home_goals > away_goals else away_team if away_goals > home_goals else np.random.choice(
            [home_team, away_team])
        return home_goals, away_goals, winner

In [70]:
#Setup group tables
def initialize_group_tables(group_tables):
    for group, table in group_tables.items():
        for _, row in table.iterrows():
            row['Pld'] = 0
            row['W'] = 0
            row['D'] = 0
            row['L'] = 0
            row['GF'] = 0
            row['GA'] = 0
            row['GD'] = 0
            row['Pts'] = 0
    return group_tables

group_tables = initialize_group_tables(group_tables)

In [71]:
#Update based on results of matches
def update_group_table(group, home_team, away_team, home_goals, away_goals):
    table = group_tables[group]

    for team, goals_for, goals_against in [(home_team, home_goals, away_goals), (away_team, away_goals, home_goals)]:
        team_index = table.index[table['Team'] == team]

        table.loc[team_index, 'Pld'] += 1
        table.loc[team_index, 'GF'] += goals_for
        table.loc[team_index, 'GA'] += goals_against
        table.loc[team_index, 'GD'] = table.loc[team_index, 'GF'] - table.loc[team_index, 'GA']

        if goals_for > goals_against:
            table.loc[team_index, 'W'] += 1
            table.loc[team_index, 'Pts'] += 3
        elif goals_for < goals_against:
            table.loc[team_index, 'L'] += 1
        else:
            table.loc[team_index, 'D'] += 1
            table.loc[team_index, 'Pts'] += 1

    table = table.sort_values(by=['Pts', 'GD', 'GF'], ascending=False)
    table['Pos'] = range(1, len(table) + 1)
    group_tables[group] = table

In [72]:
#Display the results
def display_group_table(group):
    table = group_tables[group]
    print(f"\nGroup {group} Standings:")
    print(tabulate(table, headers='keys', tablefmt='pretty', showindex=False))

In [73]:
print("Group Stage Results:")
for group, table in group_tables.items():
    print(f"\nGroup {group}:")
    teams = table['Team'].tolist()
    for i in range(len(teams)):
        for j in range(i + 1, len(teams)):
            home_team, away_team = teams[i], teams[j]
            home_goals, away_goals, _ = predict_match(home_team, away_team)
            update_group_table(group, home_team, away_team, home_goals, away_goals)
            print(f"{home_team} {home_goals} - {away_goals} {away_team}")

    display_group_table(group)

knockout_teams = {}
best_third_place = []

Group Stage Results:

Group Group A:
Germany (H) 0 - 1 Scotland
Germany (H) 2 - 3 Hungary
Germany (H) 2 - 1 Switzerland
Scotland 3 - 0 Hungary
Scotland 2 - 0 Switzerland
Hungary 2 - 1 Switzerland

Group Group A Standings:
+-----+-------------+-----+---+---+---+----+----+----+-----+
| Pos |    Team     | Pld | W | D | L | GF | GA | GD | Pts |
+-----+-------------+-----+---+---+---+----+----+----+-----+
|  1  |  Scotland   |  3  | 3 | 0 | 0 | 6  | 0  | 6  |  9  |
|  2  |   Hungary   |  3  | 2 | 0 | 1 | 5  | 6  | -1 |  6  |
|  3  | Germany (H) |  3  | 1 | 0 | 2 | 4  | 5  | -1 |  3  |
|  4  | Switzerland |  3  | 0 | 0 | 3 | 2  | 6  | -4 |  0  |
+-----+-------------+-----+---+---+---+----+----+----+-----+

Group Group B:
Spain 0 - 2 Croatia
Spain 1 - 3 Italy
Spain 1 - 1 Albania
Croatia 2 - 2 Italy
Croatia 1 - 1 Albania
Italy 2 - 1 Albania

Group Group B Standings:
+-----+---------+-----+---+---+---+----+----+----+-----+
| Pos |  Team   | Pld | W | D | L | GF | GA | GD | Pts |
+-----+-------

In [82]:
#Finds the positon of group stage teams
def find_knockout_teams():
    global knockout_teams, best_third_place
    knockout_teams = {}
    third_place_teams = []

    for group, table in group_tables.items():
        knockout_teams[f"Winner {group}"] = table.iloc[0]['Team']
        knockout_teams[f"Runner-up {group}"] = table.iloc[1]['Team']
        third_place_teams.append(table.iloc[2]['Team'])

    print("\nTeams advancing to knockout stage:")
    for key, value in knockout_teams.items():
        print(f"{key}: {value}")

In [83]:

def find_knockout_team():
    global knockout_teams, third_place_assignments
    knockout_teams = {}
    third_place_teams = []
        
    for group, table in group_tables.items():
        knockout_teams[f"Winner {group}"] = table.iloc[0]['Team']
        knockout_teams[f"Runner-up {group}"] = table.iloc[1]['Team']
        third_place_teams.append((group, table.iloc[2]['Team'], table.iloc[2]['Pts'], table.iloc[2]['GD'], table.iloc[2]['GF']))

    # Selects the top 3 third place teams to advance
    third_place_teams.sort(key=lambda x: (x[2], x[3], x[4]), reverse=True)
    best_third_place = third_place_teams[:4]

    # Assign 3rd place teams to specific slots
    third_place_assignments = {
        '3rd Group A/B/C': best_third_place[0][1],
        '3rd Group A/B/C/D': best_third_place[1][1],
        '3rd Group D/E/F': best_third_place[2][1],
        '3rd Group A/D/E/F': best_third_place[3][1]
    }

    knockout_teams.update(third_place_assignments)

    print("\nTeams advancing to knockout stage:")
    team_data = [(key, value) for key, value in knockout_teams.items() if not key.startswith('Match')]
    print(tabulate(team_data, headers=["Position", "Team"], tablefmt="pretty"))


In [84]:
def get_knockout_team(team_description):
    if team_description in knockout_teams:
        return knockout_teams[team_description]
    elif 'Winner Match' in team_description:
        match_number = team_description.split()[-1]
        return knockout_teams.get(f"Match {match_number}", team_description)
    else:
        return team_description #Responsible for updating team names

In [90]:

#Function to predict the knockout stages
def predict_knockout():
    print("\n=== Knockout Stage Results ===\n")
    total_matches = len(fixtures)
    knockout_matches = fixtures.iloc[-15:]  #We will assume the knockout stage lasts 15 matches
    match_counter = total_matches - 14  # Start counter for knockout stage
    current_round = ""
    
    for index, row in knockout_matches.iterrows():
        home_team = get_knockout_team(row['Home Team'])
        away_team = get_knockout_team(row['Away Team'])
        
        if home_team == away_team:
            continue
        
        home_goals, away_goals, winner = predict_match(home_team, away_team)
        #determining rounds
        if match_counter == total_matches:
            round_name = "Final"
        elif match_counter >= total_matches - 2:
            round_name = "Semi-finals"
        elif match_counter >= total_matches - 6:
            round_name = "Quarter-finals"
        else:
            round_name = "Round of 16"
        
        if round_name != current_round:
            print(f"\n{'-'*20} {round_name} {'-'*20}")
            current_round = round_name
        
        result = f"{home_team} {home_goals} - {away_goals} {away_team}"
        print(f"{result:^50}")
        if home_goals == away_goals:
            print(f"{'* ' + winner + ' wins on penalties':^50}")
        else:
            print(f"{'Winner: ' + winner:^50}")
        print('-' * 50)
        
        knockout_teams[f"Match {match_counter}"] = winner
        match_counter += 1

    return knockout_teams[f"Match {total_matches}"]

# Run the functions
find_knockout_team()
tournament_winner = predict_knockout()

print("\n" + "=" * 50)
print(f"{'Tournament Winner':^50}")
print("=" * 50)
print(f"{tournament_winner:^50}")
print("=" * 50)


Teams advancing to knockout stage:
+-------------------+----------------+
|     Position      |      Team      |
+-------------------+----------------+
|  Winner Group A   |    Scotland    |
| Runner-up Group A |    Hungary     |
|  Winner Group B   |     Italy      |
| Runner-up Group B |    Croatia     |
|  Winner Group C   |    Denmark     |
| Runner-up Group C |    England     |
|  Winner Group D   |  Netherlands   |
| Runner-up Group D |     France     |
|  Winner Group E   |    Belgium     |
| Runner-up Group E |    Ukraine     |
|  Winner Group F   |    Portugal    |
| Runner-up Group F |    Georgia     |
|  3rd Group A/B/C  |    Romania     |
| 3rd Group A/B/C/D |    Austria     |
|  3rd Group D/E/F  | Czech Republic |
| 3rd Group A/D/E/F |  Germany (H)   |
+-------------------+----------------+

=== Knockout Stage Results ===


-------------------- Round of 16 --------------------
              Hungary 3 - 2 Croatia               
                 Winner: Hungary             