In [20]:

def process_goals_data(goals_df):
    """
    Process goals data to create datasets for games before and on the last match day.
    Adds 'men' variable based on men_years, calculates home and away goals,
    and adds a 'won' column indicating match result.
    """
    
    # Add the 'men' column based on whether the 'year' is in men_years
    goals_df['men'] = goals_df['year'].apply(lambda x: 1 if x in men_years else 0)
    
    # Step 1: Find the last match date for each tournament and group
    last_dates = goals_df.groupby(['year', 'stage', 'men'])['short_date'].max().reset_index()

    # Step 2: Create the dataset with all games excluding the last match day (goals_before_last_day)
    goals_before_last_day = goals_df.merge(
        last_dates, on=['year', 'stage', 'short_date', 'men'], how='left', indicator=True
    )
    goals_before_last_day = goals_before_last_day[goals_before_last_day['_merge'] == 'left_only'].drop(columns=['_merge'])

    # Step 3: Create the dataset with only the last match day games (goals_last_day)
    goals_last_day = goals_df.merge(last_dates, on=['men', 'year', 'stage', 'short_date'], how='inner')

    # Step 4: Sort goals_last_day by 'goal_minute'
    goals_last_day_sorted = goals_last_day.sort_values(by=['short_date', 'goal_minute'], ascending=True)

    # Ensure that goals_last_day_sorted has no duplicates
    goals_last_day_sorted = goals_last_day_sorted.drop_duplicates()

    # Initialize an empty list to store the results for games before the last day
    results = []

    # Iterate over each match in goals_before_last_day
    for match_id, group in goals_before_last_day.groupby(['year', 'stage', 'home_team', 'away_team']):
        # Initialize goals_home and goals_away for each match
        goals_home = 0
        goals_away = 0
        
        # Extract local_time and score from the first row in the group
        local_time = group.iloc[0]['local_time']
        score = group.iloc[0]['score']
        
        # Extract short_date from local_time (convert to date)
        short_date = pd.to_datetime(local_time).date()
        
        # Loop through each row to count goals for home and away teams
        for _, row in group.iterrows():
            if row['home_team'] == row['scorer_nationality']:
                goals_home += 1
            elif row['away_team'] == row['scorer_nationality']:
                goals_away += 1

        # Calculate the expected score and normalize both scores
        calculated_score = f"{goals_home}-{goals_away}"
        normalized_score = score.replace("–", "-").replace("—", "-")
        
        # Check if the normalized score matches the calculated score
        score_match = normalized_score == calculated_score

        # Append the results to the list
        results.append({
            'year': match_id[0],
            'stage': match_id[1],
            'home_team': match_id[2],
            'away_team': match_id[3],
            'local_time': local_time,
            'short_date': short_date,
            'goals_home': goals_home,
            'goals_away': goals_away,
            'original_score': score,
            'calculated_score': calculated_score,
            'score_match': score_match,
            'men': 1 if match_id[0] in men_years else 0
        })

    # Convert results list into a DataFrame
    agg_goals_before_last_day = pd.DataFrame(results)

    # Add 'won' column based on comparison of goals_home and goals_away
    agg_goals_before_last_day['won'] = agg_goals_before_last_day.apply(
        lambda row: 1 if row['goals_home'] > row['goals_away'] else (-1 if row['goals_home'] < row['goals_away'] else 0), 
        axis=1
    )

    return agg_goals_before_last_day, goals_last_day_sorted


In [21]:
def calculate_points(results, years, win_result):
    points = []
    for result, year in zip(results, years):
        if result == win_result:  # Win condition (1 for home, -1 for away)
            points.append(2 if year <= 1992 else 3)
        elif result == 0:  # Draw condition
            points.append(1)
        else:  # Loss condition
            points.append(0)
    return sum(points)

In [22]:

def aggregate_home_away_points(agg_goals_before_last_day):
    """
    Aggregate goals scored, goals conceded, points, match count, and 'men' variable
    for both home and away games based on historical point system.
    Returns two DataFrames: home_games and away_games.
    """
    
    # Step 1: Home games aggregation with match count, conditional points, and 'men' variable
    home_games = agg_goals_before_last_day.groupby(['year', 'stage', 'home_team', 'men']).agg(
        goals_scored=('goals_home', 'sum'),
        goals_conceded=('goals_away', 'sum'),
        points_home=('won', lambda x: calculate_points(
            x, agg_goals_before_last_day.loc[x.index, 'year'], 1
        )),
        match_count_home=('home_team', 'count')
    ).reset_index()

    # Step 2: Away games aggregation with match count, conditional points, and 'men' variable
    away_games = agg_goals_before_last_day.groupby(['year', 'stage', 'away_team', 'men']).agg(
        goals_scored=('goals_away', 'sum'),
        goals_conceded=('goals_home', 'sum'),
        points_away=('won', lambda x: calculate_points(
            x, agg_goals_before_last_day.loc[x.index, 'year'], -1
        )),
        match_count_away=('away_team', 'count')
    ).reset_index()
    
    return home_games, away_games


In [23]:
def uefa_before_last(home_games, away_games, agg_goals_before_last_day):
    """
    Process home and away games data to aggregate goals, points, and standings, including handling ties.
    The function also keeps track of the 'men' variable to differentiate between men's and women's tournaments.
    """
    
    # Step 1: Merge home_games and away_games on year, stage, home_team with away_team, and men
    all_games_before_last = pd.merge(
        home_games,
        away_games,
        left_on=['year', 'stage', 'home_team', 'men'],
        right_on=['year', 'stage', 'away_team', 'men'],
        how='outer',
        suffixes=('_home', '_away')
    )

    # Step 2: For teams that only appear in home_games, assign home_team to team and copy relevant columns
    all_games_before_last['team'] = all_games_before_last['home_team'].fillna(all_games_before_last['away_team'])

    # Step 3: For goals_scored, goals_conceded, and points, handle missing values
    all_games_before_last['goals_scored'] = all_games_before_last['goals_scored_home'].fillna(0) + all_games_before_last['goals_scored_away'].fillna(0)
    all_games_before_last['goals_conceded'] = all_games_before_last['goals_conceded_home'].fillna(0) + all_games_before_last['goals_conceded_away'].fillna(0)
    all_games_before_last['points'] = all_games_before_last['points_home'].fillna(0) + all_games_before_last['points_away'].fillna(0)

    # Step 4: Add goals_difference column
    all_games_before_last['goals_difference'] = all_games_before_last['goals_scored'] - all_games_before_last['goals_conceded']

    # Step 5: Sum match_count_home and match_count_away to get total match count for each team
    all_games_before_last['total_matches'] = all_games_before_last['match_count_home'].fillna(0) + all_games_before_last['match_count_away'].fillna(0)

    # Step 6: Adjust points if total_matches is 1 (indicating a likely 0-0 draw)
    all_games_before_last.loc[all_games_before_last['total_matches'] == 1, 'points'] += 1

    # Step 7: Drop unnecessary columns used in the merge process
    all_games_before_last = all_games_before_last[['year', 'stage', 'team', 'goals_scored', 'goals_conceded', 'points', 'goals_difference', 'total_matches', 'men']]

    # Step 8: Initial sort by year, stage, points, and men
    all_games_before_last = all_games_before_last.sort_values(
        by=['year', 'stage', 'men', 'points'],
        ascending=[True, True, True, False]
    ).reset_index(drop=True)

    # Step 9: Initialize a new column for tie-break results, which will store either the tie-break result or 'no need'
    all_games_before_last['tiebreaker'] = 'no need'

    # Step 10: Define the function to check the tie-breaker
    def check_tiebreaker(row1, row2, agg_data):
        """
        Check the tie-breaker based on the head-to-head match result from agg_goals_before_last_day.
        Return the team that won (if any) or 'tie'.
        """
        match = agg_data[((agg_data['home_team'] == row1['team']) & (agg_data['away_team'] == row2['team'])) |
                         ((agg_data['home_team'] == row2['team']) & (agg_data['away_team'] == row1['team']))]

        if not match.empty:
            if match.iloc[0]['won'] == 1:
                return row1['team']  # Home team won
            elif match.iloc[0]['won'] == -1:
                return row2['team']  # Away team won
            else:
                return 'tie'  # It's a draw
        else:
            return 'no result'  # No match found

    # Step 11: Apply the tie-breaker and if still tied, check goals_difference and goals_scored
    for i in range(len(all_games_before_last) - 1):
        row1 = all_games_before_last.iloc[i]
        row2 = all_games_before_last.iloc[i + 1]

        # Get the year, stage, and men variable for the current comparison
        year = row1['year']
        stage = row1['stage']
        men = row1['men']

        # Check if the two rows have identical values for points
        if row1['points'] == row2['points'] and row1['men'] == row2['men']:
            print(f"Tie in {year}, Stage: {stage}, Men: {men}, between {row1['team']} and {row2['team']} (Points: {row1['points']})")

            # Apply the tie-breaker by checking the head-to-head match result
            tiebreak_result = check_tiebreaker(row1, row2, agg_goals_before_last_day)

            if tiebreak_result != 'tie' and tiebreak_result != 'no result':
                print(f"Tie resolved by head-to-head: {tiebreak_result} won in {year}, Stage: {stage}")
                all_games_before_last.at[i, 'tiebreaker'] = tiebreak_result
                all_games_before_last.at[i + 1, 'tiebreaker'] = tiebreak_result
            else:
                # If still tied after the tiebreaker, check goals_difference and goals_scored
                if row1['goals_difference'] > row2['goals_difference']:
                    print(f"Tie resolved by goals difference: {row1['team']} favored in {year}, Stage: {stage}")
                    all_games_before_last.at[i, 'tiebreaker'] = row1['team']
                    all_games_before_last.at[i + 1, 'tiebreaker'] = row1['team']
                elif row1['goals_difference'] < row2['goals_difference']:
                    print(f"Tie resolved by goals difference: {row2['team']} favored in {year}, Stage: {stage}")
                    all_games_before_last.at[i, 'tiebreaker'] = row2['team']
                    all_games_before_last.at[i + 1, 'tiebreaker'] = row2['team']
                elif row1['goals_scored'] > row2['goals_scored']:
                    print(f"Tie resolved by goals scored: {row1['team']} favored in {year}, Stage: {stage}")
                    all_games_before_last.at[i, 'tiebreaker'] = row1['team']
                    all_games_before_last.at[i + 1, 'tiebreaker'] = row1['team']
                else:
                    print(f"Tie resolved by goals scored: {row2['team']} favored in {year}, Stage: {stage}")
                    all_games_before_last.at[i, 'tiebreaker'] = row2['team']
                    all_games_before_last.at[i + 1, 'tiebreaker'] = row2['team']

    # Step 12: Add group standing by ranking teams within each stage based on points, goals_difference, and goals_scored
    all_games_before_last['standing'] = all_games_before_last.groupby(['year', 'stage', 'men']).apply(
        lambda x: x.sort_values(by=['points', 'goals_difference', 'goals_scored'], ascending=[False, False, False])
    ).reset_index(drop=True).groupby(['year', 'stage', 'men']).cumcount() + 1

    # Convert goals_scored, goals_conceded, points, goals_difference, and total_matches to integers
    all_games_before_last[['goals_scored', 'goals_conceded', 'points', 'goals_difference', 'total_matches']] = all_games_before_last[
        ['goals_scored', 'goals_conceded', 'points', 'goals_difference', 'total_matches']].astype(int)

    return all_games_before_last


In [24]:
def uefa_final_euro(year, stage, men, all_games_before_last, goals_last_day_sorted, agg_goals_before_last_day):
    """
    Process and track team performance for the final stage of a given UEFA Euro tournament,
    including handling the last match day's goals and updating standings.
    """

    # Step 1: Filter the data for the specific year, stage, and men
    group_goals_tracking = all_games_before_last[
        (all_games_before_last['year'] == year) & 
        (all_games_before_last['stage'] == stage) &
        (all_games_before_last['men'] == men)
    ].copy()

    group_goals_last_day = goals_last_day_sorted[
        (goals_last_day_sorted['year'] == year) & 
        (goals_last_day_sorted['stage'] == stage) &
        (goals_last_day_sorted['men'] == men)
    ]

    # Step 2: Initialize columns for tracking team performance
    group_goals_tracking['before_last_game_goals_scored'] = group_goals_tracking['goals_scored']
    group_goals_tracking['before_last_game_goals_conceded'] = group_goals_tracking['goals_conceded']
    group_goals_tracking['before_last_game_standing'] = group_goals_tracking['standing']
    group_goals_tracking['before_last_game_points'] = group_goals_tracking['points']

    group_goals_tracking['last_game_goals_scored'] = 0
    group_goals_tracking['last_game_goals_conceded'] = 0
    group_goals_tracking['total_goals_scored'] = group_goals_tracking['before_last_game_goals_scored']
    group_goals_tracking['total_goals_conceded'] = group_goals_tracking['before_last_game_goals_conceded']
    group_goals_tracking['total_goal_difference'] = group_goals_tracking['total_goals_scored'] - group_goals_tracking['total_goals_conceded']
    group_goals_tracking['last_game_points'] = 0
    group_goals_tracking['total_points'] = group_goals_tracking['before_last_game_points']

    # Initialize last_game_standing to the initial standings
    group_goals_tracking['last_game_standing'] = group_goals_tracking['before_last_game_standing']

    # Add one point to each team for a 0-0 starting score
    group_goals_tracking['total_points'] += 1

    # Initialize position counters based on initial standings
    group_goals_tracking['1st'] = group_goals_tracking['before_last_game_standing'].apply(lambda x: 1 if x == 1 else 0)
    group_goals_tracking['2nd'] = group_goals_tracking['before_last_game_standing'].apply(lambda x: 1 if x == 2 else 0)
    group_goals_tracking['3rd'] = group_goals_tracking['before_last_game_standing'].apply(lambda x: 1 if x == 3 else 0)
    group_goals_tracking['4th'] = group_goals_tracking['before_last_game_standing'].apply(lambda x: 1 if x == 4 else 0)

    group_goals_tracking['changes'] = 0  # Initialize this but will be redefined later as the sum of 1st, 2nd, 3rd, 4th
    group_goals_tracking['tied'] = False  # Initialize a flag to track tied teams
    group_goals_tracking['tied_won'] = 0  # Initialize to track if the team won a tie-breaker match

    # Step 3: Sort group_goals_last_day by goal_minute in ascending order
    group_goals_last_day = group_goals_last_day.sort_values(by='goal_minute', ascending=True)

    # Print the year, stage, men, and standings before starting the loop for last match goals
    print(f"\n=== Initial Standings for Year {year}, {stage}, Men: {men} Before Last Match Goals ===\n")
    display_columns = ['team', 'total_points', 'total_goals_scored', 'total_goals_conceded', 
                       'total_goal_difference', 'before_last_game_points', 'before_last_game_standing']
    print(group_goals_tracking[display_columns].to_string(index=False))
    print("\n====================================================\n")

    # Step 4: Iterate through the sorted and filtered last match goals and update the goals_tracking table
    previous_standings = group_goals_tracking['last_game_standing'].copy()
    first_iteration = True  # Variable to track the first iteration

    for _, goal in group_goals_last_day.iterrows():
        home_team = goal['home_team']
        away_team = goal['away_team']
        player_team = goal['scorer_nationality']

        # Print goal information for each goal
        print(f"Analyzing goal: {goal['goal_minute']} minute, Player team: {player_team}, Home: {home_team}, Away: {away_team}")

        # Update the goals based on who scored the goal
        if player_team == home_team:
            # Home team scored, update home scored and away conceded
            group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'last_game_goals_scored'] += 1
            group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'last_game_goals_conceded'] += 1
        elif player_team == away_team:
            # Away team scored, update away scored and home conceded
            group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'last_game_goals_scored'] += 1
            group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'last_game_goals_conceded'] += 1

        # Step 5: Update total_goals_scored, total_goals_conceded, and total_goal_difference
        group_goals_tracking['total_goals_scored'] = group_goals_tracking['before_last_game_goals_scored'] + group_goals_tracking['last_game_goals_scored']
        group_goals_tracking['total_goals_conceded'] = group_goals_tracking['before_last_game_goals_conceded'] + group_goals_tracking['last_game_goals_conceded']
        group_goals_tracking['total_goal_difference'] = group_goals_tracking['total_goals_scored'] - group_goals_tracking['total_goals_conceded']

        # Step 6: Assign points for the last game dynamically after each goal
        for i, row in group_goals_tracking.iterrows():
            if row['last_game_goals_scored'] > row['last_game_goals_conceded']:
                # Check if the year is <= 1992, award 2 points for a win, else award 3 points
                if year <= 1992:
                    group_goals_tracking.loc[i, 'last_game_points'] = 2  # Win before or during 1992
                else:
                    group_goals_tracking.loc[i, 'last_game_points'] = 3  # Win after 1992
            elif row['last_game_goals_scored'] == row['last_game_goals_conceded']:
                group_goals_tracking.loc[i, 'last_game_points'] = 1  # Draw
            else:
                group_goals_tracking.loc[i, 'last_game_points'] = 0  # Loss

        # Step 7: Update total points
        group_goals_tracking['total_points'] = group_goals_tracking['before_last_game_points'] + group_goals_tracking['last_game_points']

        # Step 8: Mark teams that are tied
        group_goals_tracking['tied'] = group_goals_tracking.duplicated(subset=['total_points'], keep=False)

        # Reset `tied_won` to 0 for all teams
        group_goals_tracking['tied_won'] = 0

        # Step 8b: Calculate tied_won only for tied teams
        tied_teams = group_goals_tracking[group_goals_tracking['tied']]

        if not tied_teams.empty:
            # Iterate over tied teams to resolve standings using head-to-head results
            for index, row in tied_teams.iterrows():
                team1 = row['team']
                for other_index, other_row in tied_teams[tied_teams.index != index].iterrows():
                    team2 = other_row['team']

                    # Check if these two teams played against each other in agg_goals_before_last_day
                    match = agg_goals_before_last_day[
                        ((agg_goals_before_last_day['home_team'] == team1) & (agg_goals_before_last_day['away_team'] == team2)) |
                        ((agg_goals_before_last_day['home_team'] == team2) & (agg_goals_before_last_day['away_team'] == team1)) &
                        (agg_goals_before_last_day['men'] == men)
                    ]

                    if not match.empty:
                        match_result = match.iloc[0]['won']  # Assume the 'won' column holds 1 for home win, -1 for away win, 0 for draw

                        # Resolve the tie using the match result
                        if match_result == 1:
                            if match.iloc[0]['home_team'] == team1:
                                group_goals_tracking.loc[group_goals_tracking['team'] == team1, 'tied_won'] = 1
                            else:
                                group_goals_tracking.loc[group_goals_tracking['team'] == team2, 'tied_won'] = 1
                        elif match_result == -1:
                            if match.iloc[0]['away_team'] == team1:
                                group_goals_tracking.loc[group_goals_tracking['team'] == team1, 'tied_won'] = 1
                            else:
                                group_goals_tracking.loc[group_goals_tracking['team'] == team2, 'tied_won'] = 1

        # Step 8c: Sort teams by total points, tied_won, goal difference, and goals scored
        group_goals_tracking = group_goals_tracking.sort_values(by=['total_points', 'tied_won', 'total_goal_difference', 'total_goals_scored'],
                                                                ascending=[False, False, False, False])

        # Step 9: Assign standings based on the sorting and tie resolution
        group_goals_tracking['last_game_standing'] = group_goals_tracking.reset_index(drop=True).index + 1

        # Step 10: Track changes and update standing positions after each goal is processed
        for i, row in group_goals_tracking.iterrows():
            team = row['team']
            if first_iteration:
                if row['before_last_game_standing'] == row['last_game_standing']:
                    continue
                else:
                    if row['last_game_standing'] == 1:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '1st'] += 1
                    elif row['last_game_standing'] == 2:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '2nd'] += 1
                    elif row['last_game_standing'] == 3:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '3rd'] += 1
                    elif row['last_game_standing'] == 4:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '4th'] += 1
                first_iteration = False
            else:
                if row['last_game_standing'] != previous_standings[i]:  
                    if row['last_game_standing'] == 1:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '1st'] += 1
                    elif row['last_game_standing'] == 2:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '2nd'] += 1
                    elif row['last_game_standing'] == 3:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '3rd'] += 1
                    elif row['last_game_standing'] == 4:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '4th'] += 1

        # Update previous standings after each goal
        previous_standings = group_goals_tracking['last_game_standing'].copy()

        # Step 11: Calculate changes as the sum of 1st, 2nd, 3rd, and 4th
        group_goals_tracking['changes'] = group_goals_tracking[['1st', '2nd', '3rd', '4th']].sum(axis=1)

        # Step 12: Print the updated group_goals_tracking after processing each goal
        print("\n=== Updated Standings After This Goal ===\n")
        display_columns = ['team', 'total_points', 'total_goals_scored', 'total_goals_conceded', 
                           'total_goal_difference', 'last_game_points', 'last_game_standing', 
                           'changes', '1st', '2nd', '3rd', '4th', 'tied', 'tied_won']
        print(group_goals_tracking[display_columns].to_string(index=False))
        print("\n========================================\n")

    # Step 13: Return the final DataFrame
    return group_goals_tracking


In [25]:
def track_composition_changes(year, stage, men, all_games_before_last, goals_last_day_sorted, agg_goals_before_last_day):
    """
    Track composition changes during the last game day for a specific UEFA tournament year and stage.
    Includes differentiation by the 'men' variable to track changes separately for men's and women's tournaments.
    """

    # Step 1: Filter the data for the specific year, stage, and men
    group_goals_tracking = all_games_before_last[
        (all_games_before_last['year'] == year) & 
        (all_games_before_last['stage'] == stage) &
        (all_games_before_last['men'] == men)
    ].copy()

    group_goals_last_day = goals_last_day_sorted[
        (goals_last_day_sorted['year'] == year) & 
        (goals_last_day_sorted['stage'] == stage) &
        (goals_last_day_sorted['men'] == men)
    ]

    # Initialize columns for team performance and standings
    group_goals_tracking['before_last_game_goals_scored'] = group_goals_tracking['goals_scored']
    group_goals_tracking['before_last_game_goals_conceded'] = group_goals_tracking['goals_conceded']
    group_goals_tracking['before_last_game_standing'] = group_goals_tracking['standing']
    group_goals_tracking['before_last_game_points'] = group_goals_tracking['points']
    group_goals_tracking['last_game_goals_scored'] = 0
    group_goals_tracking['last_game_goals_conceded'] = 0
    group_goals_tracking['total_goals_scored'] = group_goals_tracking['before_last_game_goals_scored']
    group_goals_tracking['total_goals_conceded'] = group_goals_tracking['before_last_game_goals_conceded']
    group_goals_tracking['total_goal_difference'] = group_goals_tracking['total_goals_scored'] - group_goals_tracking['total_goals_conceded']
    group_goals_tracking['last_game_points'] = 0
    group_goals_tracking['total_points'] = group_goals_tracking['before_last_game_points']
    group_goals_tracking['tied_won'] = 0  # Initialize tied_won for tiebreak resolution

    # Add one point to each team for a 0-0 starting score
    group_goals_tracking['total_points'] += 1

    # Print initial standings after adding the 0-0 points
    print(f"\n=== Initial Standings for {stage}, {year} (with 0-0 points added) ===")
    print(group_goals_tracking[['team', 'total_points', 'total_goals_scored', 
                                'total_goals_conceded', 'total_goal_difference', 'before_last_game_standing']].to_string(index=False))
    print("\n====================================================\n")

    # Define top standings limit based on the year
    top_standings_limit = 3 if year <= 1994 else 2

    # Step 2: Initialize composition tracking with initial composition (change_num = 0)
    sorted_initial = group_goals_tracking.sort_values(by=['total_points', 'total_goal_difference', 'total_goals_scored'], ascending=[False, False, False])
    initial_top_teams = set(sorted_initial.nsmallest(top_standings_limit, 'before_last_game_standing')['team'])
    composition_changes = [{
        'year': year,
        'stage': stage,
        'men': men,
        'change_num': 0,
        'goal_time': 'initial',
        'home_team': None,
        'away_team': None,
        'scorer_team': None,
        'new_top_teams': list(initial_top_teams),
        '1st': sorted_initial.iloc[0]['team'] if len(sorted_initial) > 0 else None,
        '2nd': sorted_initial.iloc[1]['team'] if len(sorted_initial) > 1 else None,
        '3rd': sorted_initial.iloc[2]['team'] if len(sorted_initial) > 2 else None
    }]
    change_counter = 0  # Counter for the number of composition changes

    # Step 3: Sort goals by regulation time
    group_goals_last_day = group_goals_last_day.sort_values(by=['goal_minute'])

    # Step 4: Iterate through each goal and track changes in composition
    for _, goal in group_goals_last_day.iterrows():
        home_team = goal['home_team']
        away_team = goal['away_team']
        scorer_team = goal['scorer_nationality']

        # Update scores based on who scored the goal
        if scorer_team == home_team:
            # Update home team scoring
            group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'last_game_goals_scored'] += 1
            group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'total_goals_scored'] += 1
            group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'last_game_goals_conceded'] += 1
            group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'total_goals_conceded'] += 1
        elif scorer_team == away_team:
            # Update away team scoring
            group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'last_game_goals_scored'] += 1
            group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'total_goals_scored'] += 1
            group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'last_game_goals_conceded'] += 1
            group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'total_goals_conceded'] += 1

        # Update goal difference
        group_goals_tracking['total_goal_difference'] = group_goals_tracking['total_goals_scored'] - group_goals_tracking['total_goals_conceded']

        # Step 5: Update last_game_points based on the current game state
        group_goals_tracking['last_game_points'] = group_goals_tracking.apply(
            lambda row: 3 if row['last_game_goals_scored'] > row['last_game_goals_conceded'] else (1 if row['last_game_goals_scored'] == row['last_game_goals_conceded'] else 0), 
            axis=1
        )

        # Calculate total points by adding last game points to before_last_game_points
        group_goals_tracking['total_points'] = group_goals_tracking['before_last_game_points'] + group_goals_tracking['last_game_points']

        # Step 6: Resolve ties based on head-to-head results
        group_goals_tracking['tied'] = group_goals_tracking.duplicated(subset=['total_points'], keep=False)
        group_goals_tracking['tied_won'] = 0  # Reset tied_won for all teams

        # Only resolve ties for tied teams with the same total points
        tied_teams = group_goals_tracking[group_goals_tracking['tied']]
        if not tied_teams.empty:
            for _, row in tied_teams.iterrows():
                team1 = row['team']
                for _, other_row in tied_teams[tied_teams['team'] != team1].iterrows():
                    team2 = other_row['team']
                    match = agg_goals_before_last_day[
                        ((agg_goals_before_last_day['home_team'] == team1) & (agg_goals_before_last_day['away_team'] == team2)) |
                        ((agg_goals_before_last_day['home_team'] == team2) & (agg_goals_before_last_day['away_team'] == team1)) &
                        (agg_goals_before_last_day['men'] == men)
                    ]

                    if not match.empty:
                        match_result = match.iloc[0]['won']
                        if match_result == 1 and match.iloc[0]['home_team'] == team1:
                            group_goals_tracking.loc[group_goals_tracking['team'] == team1, 'tied_won'] = 1
                        elif match_result == -1 and match.iloc[0]['away_team'] == team1:
                            group_goals_tracking.loc[group_goals_tracking['team'] == team1, 'tied_won'] = 1
                        elif match_result == 1 and match.iloc[0]['home_team'] == team2:
                            group_goals_tracking.loc[group_goals_tracking['team'] == team2, 'tied_won'] = 1
                        elif match_result == -1 and match.iloc[0]['away_team'] == team2:
                            group_goals_tracking.loc[group_goals_tracking['team'] == team2, 'tied_won'] = 1

        # Step 7: Sort teams by updated points and tie-breaking criteria
        group_goals_tracking = group_goals_tracking.sort_values(
            by=['total_points', 'tied_won', 'total_goal_difference', 'total_goals_scored'], 
            ascending=[False, False, False, False]
        )
        group_goals_tracking['last_game_standing'] = group_goals_tracking.reset_index(drop=True).index + 1

        # Track top teams and composition changes if needed
        current_top_teams = set(group_goals_tracking.nsmallest(top_standings_limit, 'last_game_standing')['team'])
        if current_top_teams != initial_top_teams:
            change_counter += 1
            initial_top_teams = current_top_teams
            composition_changes.append({
                'year': year,
                'stage': stage,
                'men': men,
                'change_num': change_counter,
                'goal_time': goal['goal_minute'],
                'home_team': home_team,
                'away_team': away_team,
                'scorer_team': scorer_team,
                'new_top_teams': list(current_top_teams),
                '1st': current_top_teams.pop() if len(current_top_teams) > 0 else None,
                '2nd': current_top_teams.pop() if len(current_top_teams) > 1 else None,
                '3rd': current_top_teams.pop() if len(current_top_teams) > 2 else None
            })

    return pd.DataFrame(composition_changes)
