In [67]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
import pickle
from tabulate import tabulate 

# Load the data
historical_results = pd.read_csv('cleaned_euro_results.csv')
fixtures = pd.read_csv('cleaned_euro_fixture.csv')
group_tables = pickle.load(open('dict_table','rb'))

In [68]:
# Prepare the data for machine learning
def prepare_data(df):
    X = df[['Home Team', 'Away Team']]
    Y = np.where(df['Home Goals'] > df['Away Goals'], 'Home Win',
                 np.where(df['Home Goals'] < df['Away Goals'], 'Away Win', 'Draw'))
    return X, y

In [69]:
def prepare_goal_data(df):
    X = df[['Home Team', 'Away Team']]
    Y_home = df['Home Goals']
    Y_away = df['Away Goals']
    return X, Y_home, Y_away

X, Y = prepare_data(historical_results)
X_goals, Y_home_goals, Y_away_goals = prepare_goal_data(historical_results)

In [70]:
# Get all unique teams
all_teams = set(X['Home Team']) | set(X['Away Team'])
for group in group_tables.values():
    all_teams |= set(group['Team'])

In [71]:
# Encode team names
le = LabelEncoder()
le.fit(list(all_teams))

# Create explicit copies
X = X[['Home Team', 'Away Team']].copy()
X_goals = X_goals[['Home Team', 'Away Team']].copy()

# Replace 'Germany (H)' with 'Germany'
X.loc[:, 'Home Team'] = X['Home Team'].replace('Germany (H)', 'Germany')
X.loc[:, 'Away Team'] = X['Away Team'].replace('Germany (H)', 'Germany')
X_goals.loc[:, 'Home Team'] = X_goals['Home Team'].replace('Germany (H)', 'Germany')
X_goals.loc[:, 'Away Team'] = X_goals['Away Team'].replace('Germany (H)', 'Germany')

# Transform team names
X.loc[:, 'Home Team'] = le.transform(X['Home Team'])
X.loc[:, 'Away Team'] = le.transform(X['Away Team'])
X_goals.loc[:, 'Home Team'] = le.transform(X_goals['Home Team'])
X_goals.loc[:, 'Away Team'] = le.transform(X_goals['Away Team'])

In [72]:
    # Split the data and train models
def split_train():
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
        X_goals_train, X_goals_test, Y_home_goals_train, Y_home_goals_test, Y_away_goals_train, Y_away_goals_test = train_test_split(X_goals, Y_home_goals, Y_away_goals, test_size=0.2, random_state=42)
    
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X_train, y_train)
        
        model_home_goals = RandomForestRegressor(n_estimators=100, random_state=42)
        model_home_goals.fit(X_goals_train, y_home_goals_train)
        
        model_away_goals = RandomForestRegressor(n_estimators=100, random_state=42)
        model_away_goals.fit(X_goals_train, y_away_goals_train)
split_train()



In [73]:
def predict_match(home_team, away_team):
    home_team = 'Germany' if home_team == 'Germany (H)' else home_team
    away_team = 'Germany' if away_team == 'Germany (H)' else away_team
    try:
        home_encoded = le.transform([home_team])[0]
        away_encoded = le.transform([away_team])[0]

        # Create a DataFrame with named features
        match_features = pd.DataFrame({'Home Team': [home_encoded], 'Away Team': [away_encoded]})

        # Predict outcome probabilities
        probs = model.predict_proba(match_features)[0]

        # Predict goals
        home_goals = model_home_goals.predict(match_features)[0]
        away_goals = model_away_goals.predict(match_features)[0]

        # Adjust goals based on outcome probabilities
        home_goals *= (probs[0] + probs[2] / 2)  # Home win + half of draw prob
        away_goals *= (probs[1] + probs[2] / 2)  # Away win + half of draw prob

        # Add some randomness to increase variety
        home_goals += np.random.poisson(1)
        away_goals += np.random.poisson(1)

        # Ensure goals are non-negative
        home_goals = max(0, home_goals)
        away_goals = max(0, away_goals)

        # Round goals to nearest integer
        home_goals = round(home_goals)
        away_goals = round(away_goals)

        # Determine winner
        if home_goals > away_goals:
            winner = home_team
        elif home_goals < away_goals:
            winner = away_team
        else:
            winner = np.random.choice([home_team, away_team])

        return home_goals, away_goals, winner

    except ValueError:
        # If a team is not in the training data, use a random prediction
        home_goals = np.random.randint(0, 5)
        away_goals = np.random.randint(0, 5)
        winner = home_team if home_goals > away_goals else away_team if away_goals > home_goals else np.random.choice(
            [home_team, away_team])
        return home_goals, away_goals, winner

In [74]:
# Initialize group tables
def initialize_group_tables(group_tables):
    for group, table in group_tables.items():
        for _, row in table.iterrows():
            row['Pld'] = 0
            row['W'] = 0
            row['D'] = 0
            row['L'] = 0
            row['GF'] = 0
            row['GA'] = 0
            row['GD'] = 0
            row['Pts'] = 0
    return group_tables

group_tables = initialize_group_tables(group_tables)

In [75]:
def update_group_table(group, home_team, away_team, home_goals, away_goals):
    table = group_tables[group]

    for team, goals_for, goals_against in [(home_team, home_goals, away_goals), (away_team, away_goals, home_goals)]:
        team_index = table.index[table['Team'] == team]
        
        table.loc[team_index, 'Pld'] += 1
        table.loc[team_index, 'GF'] += goals_for
        table.loc[team_index, 'GA'] += goals_against
        table.loc[team_index, 'GD'] = table.loc[team_index, 'GF'] - table.loc[team_index, 'GA']

        if goals_for > goals_against:
            table.loc[team_index, 'W'] += 1
            table.loc[team_index, 'Pts'] += 3
        elif goals_for < goals_against:
            table.loc[team_index, 'L'] += 1
        else:
            table.loc[team_index, 'D'] += 1
            table.loc[team_index, 'Pts'] += 1

    table = table.sort_values(by=['Pts', 'GD', 'GF'], ascending=False)
    table['Pos'] = range(1, len(table) + 1)
    group_tables[group] = table

In [76]:
# Function to display group table
def display_group_table(group):
    table = group_tables[group]
    print(f"\nGroup {group} Standings:")
    print(tabulate(table, headers='keys', tablefmt='pretty', showindex=False))

In [77]:
# Simulate group stage
print("Group Stage Results:")
for group, table in group_tables.items():
    print(f"\nGroup {group}:")
    teams = table['Team'].tolist()
    for i in range(len(teams)):
        for j in range(i+1, len(teams)):
            home_team, away_team = teams[i], teams[j]
            home_goals, away_goals, _ = predict_match(home_team, away_team)
            update_group_table(group, home_team, away_team, home_goals, away_goals)
            print(f"{home_team} {home_goals} - {away_goals} {away_team}")
    
    display_group_table(group)

Group Stage Results:

Group Group A:
Germany (H) 0 - 3 Scotland
Germany (H) 2 - 4 Hungary
Germany (H) 2 - 1 Switzerland
Scotland 2 - 3 Hungary
Scotland 1 - 3 Switzerland
Hungary 1 - 1 Switzerland

Group Group A Standings:
+-----+-------------+-----+---+---+---+----+----+----+-----+
| Pos |    Team     | Pld | W | D | L | GF | GA | GD | Pts |
+-----+-------------+-----+---+---+---+----+----+----+-----+
|  1  |   Hungary   |  3  | 2 | 1 | 0 | 8  | 5  | 3  |  7  |
|  2  | Switzerland |  3  | 1 | 1 | 1 | 5  | 4  | 1  |  4  |
|  3  |  Scotland   |  3  | 1 | 0 | 2 | 6  | 6  | 0  |  3  |
|  4  | Germany (H) |  3  | 1 | 0 | 2 | 4  | 8  | -4 |  3  |
+-----+-------------+-----+---+---+---+----+----+----+-----+

Group Group B:
Spain 0 - 0 Croatia
Spain 1 - 0 Italy
Spain 1 - 0 Albania
Croatia 2 - 1 Italy
Croatia 2 - 2 Albania
Italy 2 - 0 Albania

Group Group B Standings:
+-----+---------+-----+---+---+---+----+----+----+-----+
| Pos |  Team   | Pld | W | D | L | GF | GA | GD | Pts |
+-----+-------

In [78]:
def find_knockout_team():
        knockout_teams = {}
        for group, table in group_tables.items():
            knockout_teams[f"Winner {group}"] = table.iloc[0]['Team']
            knockout_teams[f"Runner-up {group}"] = table.iloc[1]['Team']
        
        # Determine four best third-placed teams
        third_place_teams = []
        for group, table in group_tables.items():
            third_place_teams.append(table.iloc[2])
        third_place_teams.sort(key=lambda x: (x['Pts'], x['GD'], x['GF']), reverse=True)
        best_third_place = [team['Team'] for team in third_place_teams[:4]]
        
        knockout_teams.update({
            '3rd Place 1': best_third_place[0],
            '3rd Place 2': best_third_place[1],
            '3rd Place 3': best_third_place[2],
            '3rd Place 4': best_third_place[3]
        })
find_knockout_team()


In [79]:
def get_knockout_team(team_description):
    if team_description in knockout_teams:
        return knockout_teams[team_description]
    elif 'Winner Match' in team_description:
        match_number = team_description.split()[-1]
        return knockout_teams.get(f"Match {match_number}", team_description)
    elif '3rd Group' in team_description:
        return np.random.choice(best_third_place)
    else:
        return team_description

In [87]:


def predict_knockout():
    # Simulate knockout stage
    print("\n=== Knockout Stage Results ===\n")
    total_matches = len(fixtures)
    knockout_matches = fixtures.iloc[-15:]  # Assuming last 15 matches are knockout stage
    match_counter = total_matches - 14  # Start counter for knockout stage
    current_round = ""
    
    for index, row in knockout_matches.iterrows():
        home_team = get_knockout_team(row['Home Team'])
        away_team = get_knockout_team(row['Away Team'])
        
        if home_team == away_team:
            continue
        
        home_goals, away_goals, winner = predict_match(home_team, away_team)
        
        if match_counter == total_matches:
            round_name = "Final"
        elif match_counter >= total_matches - 2:
            round_name = "Semi-finals"
        elif match_counter >= total_matches - 6:
            round_name = "Quarter-finals"
        else:
            round_name = "Round of 16"
        
        if round_name != current_round:
            print(f"\n--- {round_name} ---")
            current_round = round_name
        
        print(f"{home_team} {home_goals} - {away_goals} {away_team}")
        if home_goals == away_goals:
            print(f"  * {winner} wins on penalties")
        else:
            print(f"  Winner: {winner}")
        
        knockout_teams[f"Match {match_counter}"] = winner
        match_counter += 1
predict_knockout()

print("\n=== Tournament Winner ===")
print(knockout_teams[f"Match {total_matches}"])


=== Knockout Stage Results ===


--- Round of 16 ---
Scotland 1 - 1 Spain
  * Spain wins on penalties
Switzerland 1 - 2 Denmark
  Winner: Denmark
England 2 - 1 Poland
  Winner: England
Albania 2 - 1 Slovakia
  Winner: Albania
Netherlands 3 - 2 Romania
  Winner: Netherlands
Georgia 2 - 3 Slovakia
  Winner: Slovakia
Belgium 2 - 0 Poland
  Winner: Belgium
Austria 4 - 1 Turkey
  Winner: Austria

--- Quarter-finals ---
England 5 - 3 Spain
  Winner: England
Netherlands 3 - 2 Slovakia
  Winner: Netherlands
Albania 2 - 1 Denmark
  Winner: Albania
Belgium 1 - 2 Austria
  Winner: Austria

--- Semi-finals ---
England 3 - 0 Netherlands
  Winner: England
Albania 1 - 1 Austria
  * Austria wins on penalties

--- Final ---
England 4 - 0 Austria
  Winner: England

=== Tournament Winner ===
England
