In [113]:
import pandas as pd
import numpy as np
import random


In [114]:
file_path = 'football_stats/players_data-2024_2025.csv'
df = pd.read_csv(file_path)
columns_needed = ['Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'MP', 'Gls', 'Ast', 'Tkl', 'Int', 'Cmp%', 'Save%']
df = df[columns_needed].dropna(subset=['Nation'])

top_teams = df.groupby('Nation').agg({
    'Gls': 'sum', 
    'Ast': 'sum', 
    'Tkl': 'sum', 
    'Cmp%': 'mean', 
    'Save%': 'mean'
}).dropna().sort_values(by='Gls', ascending=False).head(6)
top_teams_nations = top_teams.index.tolist()


In [121]:
def compute_best_xi(team_df):
    best_xi = []
    gk = team_df[team_df['Pos'].str.contains('GK', case=False, na=False)]
    if not gk.empty:
        best_gk = gk.sort_values(by='Save%', ascending=False).iloc[0]
        best_xi.append(best_gk)
    defenders = team_df[team_df['Pos'].str.contains('DF', case=False, na=False)]
    if not defenders.empty:
        best_def = defenders.sort_values(by='Tkl', ascending=False).head(4)
        best_xi.extend(best_def.to_dict('records'))
    midfielders = team_df[team_df['Pos'].str.contains('MF', case=False, na=False)]
    if not midfielders.empty:
        best_mf = midfielders.sort_values(by='Ast', ascending=False).head(3)
        best_xi.extend(best_mf.to_dict('records'))
    forwards = team_df[team_df['Pos'].str.contains('FW', case=False, na=False)]
    if not forwards.empty:
        best_fw = forwards.sort_values(by='Gls', ascending=False).head(3)
        best_xi.extend(best_fw.to_dict('records'))
    return best_xi

teams_info = {}
for nation in top_teams_nations:
    team_df = df[df['Nation'] == nation]
    best_xi = compute_best_xi(team_df)
    attack_rating = 0
    defense_rating = 0
    for player in best_xi:
        pos = player['Pos'].upper()
        if 'FW' in pos:
            attack_rating += player['Gls']
        elif 'MF' in pos:
            attack_rating += 0.5 * player['Gls']
        if 'DF' in pos:
            defense_rating += player['Tkl']
        if 'GK' in pos:
            defense_rating += player['Save%'] / 2
    teams_info[nation] = {
        'best_xi': best_xi,
        'Attack': attack_rating,
        'Defense': defense_rating,
        'Fitness': 100,
        'Morale': 100,
        'Points': 0
    }
for nation, data in teams_info.items():
    print(f"Country: {nation}")
    for player in data['best_xi']:
        print(f"Player: {player['Player']} - Position: {player['Pos']}")
    print("\n")

Country: fr FRA
Player: Grégoire Coudert - Position: GK
Player: Valentin Rosier - Position: DF
Player: Dennis Appiah - Position: DF
Player: Gautier Lloris - Position: DF
Player: Jules Koundé - Position: DF
Player: Michael Olise - Position: FW,MF
Player: Rayan Cherki - Position: FW,MF
Player: Gaëtan Perrin - Position: MF,FW
Player: Ousmane Dembélé - Position: FW
Player: Kylian Mbappé - Position: FW
Player: Marcus Thuram - Position: FW


Country: es ESP
Player: David Soria - Position: GK
Player: Iglesias - Position: DF
Player: Carmona - Position: DF
Player: Diego Rico - Position: DF
Player: Sergio Gómez - Position: MF,DF
Player: Alex Baena - Position: MF,FW
Player: Álex Berenguer - Position: FW,MF
Player: Saúl Ñíguez - Position: MF
Player: Ayoze Pérez - Position: FW
Player: Kiké - Position: FW
Player: Javi Puado - Position: FW,MF


Country: de GER
Player: Finn Dahmen - Position: GK
Player: Benedikt Gimber - Position: DF
Player: Robin Gosens - Position: DF
Player: Dominik Kohr - Position:

In [122]:
def scoring_potential(player):
    pos = player['Pos'].upper()
    if 'FW' in pos:
        return player['Gls']
    elif 'MF' in pos:
        return 0.5 * player['Gls']
    else:
        return 0.1


In [123]:
def compute_player_performance(player, goal_bonus=0):
    pos = player['Pos'].upper()
    if 'GK' in pos:
        base = (player['Save%'] * 0.5) + (player['Cmp%'] * 0.2)
    elif 'DF' in pos:
        base = (player['Tkl'] * 0.3) + (player['Cmp%'] * 0.1)
    elif 'MF' in pos:
        base = (player['Ast'] * 0.4) + (player['Gls'] * 0.2)
    elif 'FW' in pos:
        base = (player['Gls'] * 0.5) + (player['Ast'] * 0.2)
    else:
        base = 0
    performance = base + np.random.normal(0, 5) + goal_bonus
    return performance

def get_player_breakdown(player):
    pos = player['Pos'].upper()
    if 'GK' in pos:
        return f"Save%: {player['Save%']:.1f}"
    elif 'DF' in pos:
        return f"Tkl: {player['Tkl']}"
    elif 'MF' in pos:
        return f"Ast: {player['Ast']}, Gls: {player['Gls']}"
    elif 'FW' in pos:
        return f"Gls: {player['Gls']}, Ast: {player['Ast']}"
    else:
        return ""

def adjust_team_stats(team, result):
    if result == 'win':
        team['Morale'] = min(team['Morale'] + 5, 100)
        team['Fitness'] = max(team['Fitness'] - 5, 50)
    elif result == 'draw':
        team['Morale'] = min(team['Morale'] + 2, 100)
    else:
        team['Morale'] = max(team['Morale'] - 5, 0)
        team['Fitness'] = max(team['Fitness'] - 10, 50)


In [124]:
def simulate_match(teamA, teamB):
    teamA_attack = teamA['Attack'] * (teamA['Fitness'] / 100) * (teamA['Morale'] / 100)
    teamB_attack = teamB['Attack'] * (teamB['Fitness'] / 100) * (teamB['Morale'] / 100)
    teamA_defense = teamA['Defense'] * (teamA['Fitness'] / 100) * (teamA['Morale'] / 100)
    teamB_defense = teamB['Defense'] * (teamB['Fitness'] / 100) * (teamB['Morale'] / 100)
    
    scaling_factor = 10.0
    lambdaA = max(0.1, (teamA_attack - teamB_defense) / scaling_factor)
    lambdaB = max(0.1, (teamB_attack - teamA_defense) / scaling_factor)
    
    goalsA = np.random.poisson(lambdaA)
    goalsB = np.random.poisson(lambdaB)
    
    scorersA = {}
    scorersB = {}
    
    if goalsA > 0:
        potentials = [scoring_potential(p) for p in teamA['best_xi']]
        total = sum(potentials)
        weights = potentials if total != 0 else [1]*len(potentials)
        for _ in range(goalsA):
            scorer = random.choices(teamA['best_xi'], weights=weights, k=1)[0]
            scorer_name = scorer['Player']
            scorersA[scorer_name] = scorersA.get(scorer_name, 0) + 1
    if goalsB > 0:
        potentials = [scoring_potential(p) for p in teamB['best_xi']]
        total = sum(potentials)
        weights = potentials if total != 0 else [1]*len(potentials)
        for _ in range(goalsB):
            scorer = random.choices(teamB['best_xi'], weights=weights, k=1)[0]
            scorer_name = scorer['Player']
            scorersB[scorer_name] = scorersB.get(scorer_name, 0) + 1
            
    if goalsA > goalsB:
        teamA['Points'] += 3
        adjust_team_stats(teamA, 'win')
        adjust_team_stats(teamB, 'loss')
    elif goalsB > goalsA:
        teamB['Points'] += 3
        adjust_team_stats(teamA, 'loss')
        adjust_team_stats(teamB, 'win')
    else:
        teamA['Points'] += 1
        teamB['Points'] += 1
        adjust_team_stats(teamA, 'draw')
        adjust_team_stats(teamB, 'draw')
        
    all_players = teamA['best_xi'] + teamB['best_xi']
    motm_data = {}
    for player in all_players:
        bonus = 0
        if player['Player'] in scorersA:
            bonus += 10 * scorersA[player['Player']]
        if player['Player'] in scorersB:
            bonus += 10 * scorersB[player['Player']]
        score = compute_player_performance(player, goal_bonus=bonus)
        breakdown = get_player_breakdown(player)
        motm_data[player['Player']] = (score, breakdown, player)
    motm_player_name, (motm_score, breakdown, motm_player) = max(motm_data.items(), key=lambda x: x[1][0])
    motm_str = f"{motm_player_name} ({motm_player['Pos']}, {motm_player['Nation']}) Score: {motm_score:.1f} ({breakdown})"
    
    return goalsA, goalsB, scorersA, scorersB, motm_str



In [125]:
def simulate_group_stage(teams_info):
    team_names = list(teams_info.keys())
    match_results = []
    print("\nMATCH RESULTS\n" + "="*40)
    for i in range(len(team_names)):
        for j in range(i+1, len(team_names)):
            teamA_name = team_names[i]
            teamB_name = team_names[j]
            goalsA, goalsB, scorersA, scorersB, motm = simulate_match(teams_info[teamA_name], teams_info[teamB_name])
            result_lines = []
            result_lines.append(f"{teamA_name} {goalsA} - {goalsB} {teamB_name}")
            if scorersA:
                scorers_str = ", ".join([f"{p} ({cnt})" for p, cnt in scorersA.items()])
                result_lines.append(f"  {teamA_name} scorers: {scorers_str}")
            if scorersB:
                scorers_str = ", ".join([f"{p} ({cnt})" for p, cnt in scorersB.items()])
                result_lines.append(f"  {teamB_name} scorers: {scorers_str}")
            result_lines.append(f"  MOTM: {motm}")
            result_str = "\n".join(result_lines)
            match_results.append(result_str)
            print(result_str)
            print("-"*40)
    print("\nFINAL STANDINGS\n" + "="*40)
    print_standings_table(teams_info)

def print_standings_table(teams_info):
    data = []
    for team_name, stats in teams_info.items():
        data.append([team_name, stats['Points'], stats['Attack'], stats['Defense'], stats['Morale'], stats['Fitness']])
    df_standings = pd.DataFrame(data, columns=['Team', 'Points', 'Attack', 'Defense', 'Morale', 'Fitness'])
    df_standings = df_standings.sort_values(by='Points', ascending=False)
    print(df_standings)


In [126]:
simulate_group_stage(teams_info)


MATCH RESULTS
fr FRA 1 - 0 es ESP
  fr FRA scorers: Ousmane Dembélé (1)
  MOTM: David Soria (GK, es ESP) Score: 56.0 (Save%: 79.1)
----------------------------------------
fr FRA 0 - 0 de GER
  MOTM: Grégoire Coudert (GK, fr FRA) Score: 57.3 (Save%: 85.7)
----------------------------------------
fr FRA 0 - 0 eng ENG
  MOTM: Grégoire Coudert (GK, fr FRA) Score: 54.5 (Save%: 85.7)
----------------------------------------
fr FRA 0 - 0 it ITA
  MOTM: Grégoire Coudert (GK, fr FRA) Score: 65.4 (Save%: 85.7)
----------------------------------------
fr FRA 0 - 0 br BRA
  MOTM: Grégoire Coudert (GK, fr FRA) Score: 59.6 (Save%: 85.7)
----------------------------------------
es ESP 0 - 0 de GER
  MOTM: David Soria (GK, es ESP) Score: 53.2 (Save%: 79.1)
----------------------------------------
es ESP 0 - 0 eng ENG
  MOTM: Jason Steele (GK, eng ENG) Score: 54.5 (Save%: 75.0)
----------------------------------------
es ESP 0 - 0 it ITA
  MOTM: Elia Caprile (GK, it ITA) Score: 60.6 (Save%: 87.5)
---