# before last match day league standings and points

In [2]:
def process_goals_data(goals_df):
    """
    Process goals data to create datasets for games before and on the last match day.
    Calculates home and away goals, and adds a 'won' column indicating match result.
    """
    
    # Step 1: Find the last match date for each tournament and group
    last_dates = goals_df.groupby(['year', 'stage'])['short_date'].max().reset_index()

    # Step 2: Create the dataset with all games excluding the last match day (goals_before_last_day)
    goals_before_last_day = goals_df.merge(
        last_dates, on=['year', 'stage', 'short_date'], how='left', indicator=True
    )
    goals_before_last_day = goals_before_last_day[goals_before_last_day['_merge'] == 'left_only'].drop(columns=['_merge'])

    # Step 3: Create the dataset with only the last match day games (goals_last_day)
    goals_last_day = goals_df.merge(last_dates, on=['year', 'stage', 'short_date'], how='inner')

    # Step 4: Sort goals_last_day by 'goal_minute'
    goals_last_day_sorted = goals_last_day.sort_values(by=['short_date', 'goal_minute'], ascending=True)

    # Ensure that goals_last_day_sorted has no duplicates
    goals_last_day_sorted = goals_last_day_sorted.drop_duplicates()

    # Initialize an empty list to store the results for games before the last day
    results = []

    # Iterate over each match in goals_before_last_day
    for match_id, group in goals_before_last_day.groupby(['year', 'stage', 'home_team', 'away_team']):
        # Initialize goals_home and goals_away for each match
        goals_home = 0
        goals_away = 0
        
        # Extract local_time and score from the first row in the group
        local_time = group.iloc[0]['local_time']
        score = group.iloc[0]['score']
        
        # Extract short_date from local_time (convert to date)
        short_date = pd.to_datetime(local_time).date()
        
        # Loop through each row to count goals for home and away teams
        for _, row in group.iterrows():
            if row['home_team'] == row['scorer_nationality']:
                goals_home += 1
            elif row['away_team'] == row['scorer_nationality']:
                goals_away += 1

        # Calculate the expected score and normalize both scores
        calculated_score = f"{goals_home}-{goals_away}"
        normalized_score = score.replace("–", "-").replace("—", "-")
        
        # Check if the normalized score matches the calculated score
        score_match = normalized_score == calculated_score

        # Append the results to the list
        results.append({
            'year': match_id[0],
            'stage': match_id[1],
            'home_team': match_id[2],
            'away_team': match_id[3],
            'local_time': local_time,
            'short_date': short_date,
            'goals_home': goals_home,
            'goals_away': goals_away,
            'original_score': score,
            'calculated_score': calculated_score,
            'score_match': score_match
        })

    # Convert results list into a DataFrame
    agg_goals_before_last_day = pd.DataFrame(results)

    # Add 'won' column based on comparison of goals_home and goals_away
    agg_goals_before_last_day['won'] = agg_goals_before_last_day.apply(
        lambda row: 1 if row['goals_home'] > row['goals_away'] else (-1 if row['goals_home'] < row['goals_away'] else 0), 
        axis=1
    )

    return agg_goals_before_last_day, goals_last_day_sorted


In [3]:
def calculate_points(results, years, win_result):
    points = []
    for result, year in zip(results, years):
        if result == win_result:  # Win condition (1 for home, -1 for away)
            points.append(2 if year <= 1992 else 3)
        elif result == 0:  # Draw condition
            points.append(1)
        else:  # Loss condition
            points.append(0)
    return sum(points)

In [4]:
def aggregate_home_away_points(agg_goals_before_last_day):
    """
    Aggregate goals scored, goals conceded, points, and match count
    for both home and away games based on historical point system.
    Only unique matches (different opponents) are counted.
    Returns two DataFrames: home_games and away_games.
    """

    # Remove duplicates to ensure each match is counted only once per home-away combination
    unique_matches = agg_goals_before_last_day.drop_duplicates(subset=['year', 'stage', 'home_team', 'away_team'])

    # Step 1: Home games aggregation with match count and conditional points based on year
    home_games = agg_goals_before_last_day.groupby(['year', 'stage', 'home_team']).agg(
        goals_scored=('goals_home', 'sum'),
        goals_conceded=('goals_away', 'sum'),
        points_home=('won', lambda x: calculate_points(x, agg_goals_before_last_day.loc[x.index, 'year'], 1)),
        match_count_home=('home_team', 'count')
    ).reset_index()


    # Step 2: Away games aggregation with match count and conditional points based on year
    away_games = agg_goals_before_last_day.groupby(['year', 'stage', 'away_team']).agg(
        goals_scored=('goals_away', 'sum'),
        goals_conceded=('goals_home', 'sum'),
        points_away=('won', lambda x: calculate_points(x, agg_goals_before_last_day.loc[x.index, 'year'], -1)),
        match_count_away=('away_team', 'count')
    ).reset_index()

    return home_games, away_games


In [5]:
def uefa_before_last(home_games, away_games, agg_goals_before_last_day, team_counts):
    """
    Process home and away games data to aggregate goals, points, and standings, including handling ties
    and adjusting for unplayed or 0-0 draws.
    """

    # Step 1: Merge home_games and away_games on year, stage, home_team with away_team
    all_games_before_last = pd.merge(
        home_games,
        away_games,
        left_on=['year', 'stage', 'home_team'],
        right_on=['year', 'stage', 'away_team'],
        how='outer',
        suffixes=('_home', '_away')
    )

    # Step 2: Assign teams and handle missing values
    # Combine home_team and away_team into a single 'team' column and calculate goals, points, and matches
    all_games_before_last['team'] = all_games_before_last['home_team'].fillna(all_games_before_last['away_team'])
    all_games_before_last['goals_scored'] = all_games_before_last['goals_scored_home'].fillna(0) + all_games_before_last['goals_scored_away'].fillna(0)
    all_games_before_last['goals_conceded'] = all_games_before_last['goals_conceded_home'].fillna(0) + all_games_before_last['goals_conceded_away'].fillna(0)
    all_games_before_last['points'] = all_games_before_last['points_home'].fillna(0) + all_games_before_last['points_away'].fillna(0)
    all_games_before_last['total_matches'] = all_games_before_last['match_count_home'].fillna(0) + all_games_before_last['match_count_away'].fillna(0)

    # Step 3: Ensure all teams from team_counts (using team_list) are included
    team_counts = team_counts.explode('team_list').rename(columns={'team_list': 'team'})

    # Select unique teams and merge
    all_teams = team_counts[['year', 'stage', 'team']].drop_duplicates()
    all_games_before_last = all_teams.merge(
        all_games_before_last,
        on=['year', 'stage', 'team'],
        how='left'
    )

    # Step 4: Fill missing values for teams with no activity
    all_games_before_last = all_games_before_last.fillna({'goals_scored': 0, 'goals_conceded': 0, 'points': 0, 'goals_difference': 0, 'total_matches': 0})
    all_games_before_last['tiebreaker'] = 'no-tie'

    # Step 5: Calculate goal difference
    all_games_before_last['goals_difference'] = all_games_before_last['goals_scored'] - all_games_before_last['goals_conceded']

    # Step 6: Drop unnecessary columns
    all_games_before_last = all_games_before_last[['year', 'stage', 'team', 'goals_scored', 'goals_conceded', 'points', 'goals_difference', 'total_matches']]

    # Step 7a: Adjust points for teams with one match (assume 0-0 draw)
    all_games_before_last.loc[all_games_before_last['total_matches'] == 1, 'points'] += 1  # One match: add 1 point
    all_games_before_last.loc[all_games_before_last['total_matches'] == 1, 'total_matches'] = 2  # Update total_matches to 2

    # Step 7b: Adjust points for teams with no matches (assume two 0-0 draws)
    all_games_before_last.loc[all_games_before_last['total_matches'] == 0, 'points'] += 2  # Zero matches: add 2 points
    all_games_before_last.loc[all_games_before_last['total_matches'] == 0, 'total_matches'] = 2  # Assume they played two 0-0 draws

    # Step 8: Initial sorting by points
    all_games_before_last = all_games_before_last.sort_values(
        by=['year', 'stage', 'points'],
        ascending=[True, True, False]
    ).reset_index(drop=True)

    # Step 9: Define a function to apply the tie-breaker criteria
    def apply_tiebreaker(row1, row2, agg_data):
        """
        Applies tie-breaker criteria to resolve ties between two rows within the same year and stage.
        """
        # Look for the head-to-head match within the same year and stage
        match = agg_data[
            (agg_data['year'] == row1['year']) & (agg_data['stage'] == row1['stage']) &
            (((agg_data['home_team'] == row1['team']) & (agg_data['away_team'] == row2['team'])) |
             ((agg_data['home_team'] == row2['team']) & (agg_data['away_team'] == row1['team'])))
        ]

        # First criterion: Head-to-head result
        if not match.empty:
            match_result = match.iloc[0]  # There should only be one relevant match
            if match_result['won'] == 1:
                return row1['team'], 1, 0  # row1's team wins the tie-break
            elif match_result['won'] == -1:
                return row2['team'], 0, 1  # row2's team wins the tie-break
        
        # Second criterion: Goal difference
        if row1['goals_difference'] > row2['goals_difference']:
            return row1['team'], 1, 0
        elif row1['goals_difference'] < row2['goals_difference']:
            return row2['team'], 0, 1
        
        # Third criterion: Goals scored
        if row1['goals_scored'] > row2['goals_scored']:
            return row1['team'], 1, 0
        elif row1['goals_scored'] < row2['goals_scored']:
            return row2['team'], 0, 1
        
        # If all criteria are still tied, mark as a tie
        return 'tie', 0, 0

    # Step 10: Apply tie-breaking for each pair of tied teams
    all_games_before_last['tiebreaker'] = 'no need'
    all_games_before_last['tie_won'] = 0

    for i in range(len(all_games_before_last) - 1):
        row1 = all_games_before_last.iloc[i]
        row2 = all_games_before_last.iloc[i + 1]

        # Check if rows are from the same year and stage, and have identical points
        if row1['year'] == row2['year'] and row1['stage'] == row2['stage'] and row1['points'] == row2['points']:
            # Apply the tie-breaking criteria
            tiebreak_result, tie_won_row1, tie_won_row2 = apply_tiebreaker(row1, row2, agg_goals_before_last_day)

            if tiebreak_result != 'tie':
                all_games_before_last.at[i, 'tiebreaker'] = tiebreak_result
                all_games_before_last.at[i, 'tie_won'] = tie_won_row1
                all_games_before_last.at[i + 1, 'tiebreaker'] = tiebreak_result
                all_games_before_last.at[i + 1, 'tie_won'] = tie_won_row2

    # Step 11: Determine final standing based on points, tie_won, and other criteria
    all_games_before_last = all_games_before_last.sort_values(
        by=['year', 'stage', 'points', 'tie_won', 'goals_difference', 'goals_scored'],
        ascending=[True, True, False, False, False, False]
    ).reset_index(drop=True)

    # Step 12: Assign standings within each group
    all_games_before_last['standing'] = all_games_before_last.groupby(['year', 'stage']).cumcount() + 1

    # Step 13: Convert numerical columns to integers
    all_games_before_last[['goals_scored', 'goals_conceded', 'points', 'goals_difference', 'total_matches']] = all_games_before_last[
        ['goals_scored', 'goals_conceded', 'points', 'goals_difference', 'total_matches']].astype(int)
    
    return all_games_before_last


# last day league standing and changes

In [6]:
def uefa_final_euro(year, stage, all_games_before_last, goals_last_day_sorted, agg_goals_before_last_day):
    """
    Process and track team performance for the final stage of a given UEFA Euro tournament,
    including handling the last match day's goals and updating standings.
    """

    # Step 1: Filter the data for the specific year and stage
    group_goals_tracking = all_games_before_last[
        (all_games_before_last['year'] == year) & 
        (all_games_before_last['stage'] == stage)
    ].copy()

    group_goals_last_day = goals_last_day_sorted[
        (goals_last_day_sorted['year'] == year) & 
        (goals_last_day_sorted['stage'] == stage)
    ]

    # Step 2: Initialize columns for tracking team performance
    group_goals_tracking['before_last_game_goals_scored'] = group_goals_tracking['goals_scored']
    group_goals_tracking['before_last_game_goals_conceded'] = group_goals_tracking['goals_conceded']
    group_goals_tracking['before_last_game_standing'] = group_goals_tracking['standing']
    group_goals_tracking['before_last_game_points'] = group_goals_tracking['points']

    group_goals_tracking['last_game_goals_scored'] = 0
    group_goals_tracking['last_game_goals_conceded'] = 0
    group_goals_tracking['total_goals_scored'] = group_goals_tracking['before_last_game_goals_scored']
    group_goals_tracking['total_goals_conceded'] = group_goals_tracking['before_last_game_goals_conceded']
    group_goals_tracking['total_goal_difference'] = group_goals_tracking['total_goals_scored'] - group_goals_tracking['total_goals_conceded']
    group_goals_tracking['last_game_points'] = 0
    group_goals_tracking['total_points'] = group_goals_tracking['before_last_game_points']

    # Initialize last_game_standing to the initial standings
    group_goals_tracking['last_game_standing'] = group_goals_tracking['before_last_game_standing']

    # Add one point to each team for a 0-0 starting score
    group_goals_tracking['total_points'] += 1

    # Initialize position counters based on initial standings
    group_goals_tracking['1st'] = group_goals_tracking['before_last_game_standing'].apply(lambda x: 1 if x == 1 else 0)
    group_goals_tracking['2nd'] = group_goals_tracking['before_last_game_standing'].apply(lambda x: 1 if x == 2 else 0)
    group_goals_tracking['3rd'] = group_goals_tracking['before_last_game_standing'].apply(lambda x: 1 if x == 3 else 0)
    group_goals_tracking['4th'] = group_goals_tracking['before_last_game_standing'].apply(lambda x: 1 if x == 4 else 0)

    group_goals_tracking['changes'] = 0  # Initialize this but will be redefined later as the sum of 1st, 2nd, 3rd, 4th
    group_goals_tracking['tied'] = False  # Initialize a flag to track tied teams
    group_goals_tracking['tied_won'] = 0  # Initialize to track if the team won a tie-breaker match

    # Step 3: Sort group_goals_last_day by goal_minute in ascending order
    group_goals_last_day = group_goals_last_day.sort_values(by='goal_minute', ascending=True)

    # Print the year, stage, and standings before starting the loop for last match goals
    print(f"\n=== Initial Standings for Year {year}, {stage} Before Last Match Goals ===\n")
    display_columns = ['team', 'total_points', 'total_goals_scored', 'total_goals_conceded', 
                       'total_goal_difference', 'before_last_game_points', 'before_last_game_standing']
    print(group_goals_tracking[display_columns].to_string(index=False))
    print("\n====================================================\n")

    # Step 4: Iterate through the sorted and filtered last match goals and update the goals_tracking table
    previous_standings = group_goals_tracking['last_game_standing'].copy()
    first_iteration = True  # Variable to track the first iteration

    for _, goal in group_goals_last_day.iterrows():
        home_team = goal['home_team']
        away_team = goal['away_team']
        player_team = goal['scorer_nationality']

        # Print goal information for each goal
        print(f"Analyzing goal: {goal['goal_minute']} minute, Player team: {player_team}, Home: {home_team}, Away: {away_team}")

        # Update the goals based on who scored the goal
        if player_team == home_team:
            # Home team scored, update home scored and away conceded
            group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'last_game_goals_scored'] += 1
            group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'last_game_goals_conceded'] += 1
        elif player_team == away_team:
            # Away team scored, update away scored and home conceded
            group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'last_game_goals_scored'] += 1
            group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'last_game_goals_conceded'] += 1

        # Step 5: Update total_goals_scored, total_goals_conceded, and total_goal_difference
        group_goals_tracking['total_goals_scored'] = group_goals_tracking['before_last_game_goals_scored'] + group_goals_tracking['last_game_goals_scored']
        group_goals_tracking['total_goals_conceded'] = group_goals_tracking['before_last_game_goals_conceded'] + group_goals_tracking['last_game_goals_conceded']
        group_goals_tracking['total_goal_difference'] = group_goals_tracking['total_goals_scored'] - group_goals_tracking['total_goals_conceded']

        # Step 6: Assign points for the last game dynamically after each goal
        for i, row in group_goals_tracking.iterrows():
            if row['last_game_goals_scored'] > row['last_game_goals_conceded']:
                if year <= 1992:
                    group_goals_tracking.loc[i, 'last_game_points'] = 2  # Win before or during 1992
                else:
                    group_goals_tracking.loc[i, 'last_game_points'] = 3  # Win after 1992
            elif row['last_game_goals_scored'] == row['last_game_goals_conceded']:
                group_goals_tracking.loc[i, 'last_game_points'] = 1  # Draw
            else:
                group_goals_tracking.loc[i, 'last_game_points'] = 0  # Loss

        # Step 7: Update total points
        group_goals_tracking['total_points'] = group_goals_tracking['before_last_game_points'] + group_goals_tracking['last_game_points']

        # Step 8: Mark teams that are tied
        group_goals_tracking['tied'] = group_goals_tracking.duplicated(subset=['total_points'], keep=False)

        # Reset `tied_won` to 0 for all teams
        group_goals_tracking['tied_won'] = 0

        # Step 8b: Calculate tied_won only for tied teams
        tied_teams = group_goals_tracking[group_goals_tracking['tied']]

        if not tied_teams.empty:
            # Iterate over tied teams to resolve standings using head-to-head results
            for index, row in tied_teams.iterrows():
                team1 = row['team']
                for other_index, other_row in tied_teams[tied_teams.index != index].iterrows():
                    team2 = other_row['team']

                    # Check if these two teams played against each other in agg_goals_before_last_day
                    match = agg_goals_before_last_day[
                        ((agg_goals_before_last_day['home_team'] == team1) & (agg_goals_before_last_day['away_team'] == team2)) |
                        ((agg_goals_before_last_day['home_team'] == team2) & (agg_goals_before_last_day['away_team'] == team1))
                    ]

                    if not match.empty:
                        match_result = match.iloc[0]['won']  # Assume the 'won' column holds 1 for home win, -1 for away win, 0 for draw

                        # Resolve the tie using the match result
                        if match_result == 1:
                            if match.iloc[0]['home_team'] == team1:
                                group_goals_tracking.loc[group_goals_tracking['team'] == team1, 'tied_won'] = 1
                            else:
                                group_goals_tracking.loc[group_goals_tracking['team'] == team2, 'tied_won'] = 1
                        elif match_result == -1:
                            if match.iloc[0]['away_team'] == team1:
                                group_goals_tracking.loc[group_goals_tracking['team'] == team1, 'tied_won'] = 1
                            else:
                                group_goals_tracking.loc[group_goals_tracking['team'] == team2, 'tied_won'] = 1

        # Step 8c: Sort teams by total points, tied_won, goal difference, and goals scored
        group_goals_tracking = group_goals_tracking.sort_values(by=['total_points', 'tied_won', 'total_goal_difference', 'total_goals_scored'],
                                                                ascending=[False, False, False, False])

        # Step 9: Assign standings based on the sorting and tie resolution
        group_goals_tracking['last_game_standing'] = group_goals_tracking.reset_index(drop=True).index + 1

        # Step 10: Track changes and update standing positions after each goal is processed
        for i, row in group_goals_tracking.iterrows():
            team = row['team']
            if first_iteration:
                if row['before_last_game_standing'] == row['last_game_standing']:
                    continue
                else:
                    if row['last_game_standing'] == 1:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '1st'] += 1
                    elif row['last_game_standing'] == 2:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '2nd'] += 1
                    elif row['last_game_standing'] == 3:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '3rd'] += 1
                    elif row['last_game_standing'] == 4:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '4th'] += 1
                first_iteration = False
            else:
                if row['last_game_standing'] != previous_standings[i]:  
                    if row['last_game_standing'] == 1:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '1st'] += 1
                    elif row['last_game_standing'] == 2:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '2nd'] += 1
                    elif row['last_game_standing'] == 3:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '3rd'] += 1
                    elif row['last_game_standing'] == 4:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '4th'] += 1

        # Update previous standings after each goal
        previous_standings = group_goals_tracking['last_game_standing'].copy()

        # Step 11: Calculate changes as the sum of 1st, 2nd, 3rd, and 4th
        group_goals_tracking['changes'] = group_goals_tracking[['1st', '2nd', '3rd', '4th']].sum(axis=1)

        # Step 12: Print the updated group_goals_tracking after processing each goal
        print("\n=== Updated Standings After This Goal ===\n")
        display_columns = ['team', 'total_points', 'total_goals_scored', 'total_goals_conceded', 
                           'total_goal_difference', 'last_game_points', 'last_game_standing', 
                           'changes', '1st', '2nd', '3rd', '4th', 'tied', 'tied_won']
        print(group_goals_tracking[display_columns].to_string(index=False))
        print("\n========================================\n")

    # Step 13: Return the final DataFrame
    return group_goals_tracking


In [7]:
def track_composition_changes(year, stage, all_games_before_last, goals_last_day_sorted, agg_goals_before_last_day):
    # Step 1: Filter the data for the specific year and stage
    group_goals_tracking = all_games_before_last[
        (all_games_before_last['year'] == year) & 
        (all_games_before_last['stage'] == stage)
    ].copy()

    group_goals_last_day = goals_last_day_sorted[
        (goals_last_day_sorted['year'] == year) & 
        (goals_last_day_sorted['stage'] == stage)
    ]

    # Initialize columns for team performance and standings
    group_goals_tracking['before_last_game_goals_scored'] = group_goals_tracking['goals_scored']
    group_goals_tracking['before_last_game_goals_conceded'] = group_goals_tracking['goals_conceded']
    group_goals_tracking['before_last_game_standing'] = group_goals_tracking['standing']
    group_goals_tracking['before_last_game_points'] = group_goals_tracking['points']
    group_goals_tracking['last_game_goals_scored'] = 0
    group_goals_tracking['last_game_goals_conceded'] = 0
    group_goals_tracking['total_goals_scored'] = group_goals_tracking['before_last_game_goals_scored']
    group_goals_tracking['total_goals_conceded'] = group_goals_tracking['before_last_game_goals_conceded']
    group_goals_tracking['total_goal_difference'] = group_goals_tracking['total_goals_scored'] - group_goals_tracking['total_goals_conceded']
    group_goals_tracking['last_game_points'] = 0
    group_goals_tracking['total_points'] = group_goals_tracking['before_last_game_points']
    group_goals_tracking['tied_won'] = 0  # Initialize tied_won for tiebreak resolution

    # Add one point to each team for a 0-0 starting score
    group_goals_tracking['total_points'] += 1

    # Print initial standings after adding the 0-0 points
    print(f"\n=== Initial Standings for {stage}, {year} (with 0-0 points added) ===")
    print(group_goals_tracking[['team', 'total_points', 'total_goals_scored', 
                                'total_goals_conceded', 'total_goal_difference', 'before_last_game_standing']].to_string(index=False))
    print("\n====================================================\n")

    # Define top standings limit based on the year (3 for 1992 and earlier, 2 for 1994 and later) for World Cup ----------------------------------------------------
    # top_standings_limit = 3 if year <= 1994 else 2 --------------------------------------------------------------------------------------------

     # Define top standings limit based on the year (3 for 2016 and later, 2 for 2014 and earlier) for Euros
    top_standings_limit = 3 if year >= 2016 else 2

    # Step 2: Initialize composition tracking with initial composition (change_num = 0)
    sorted_initial = group_goals_tracking.sort_values(by=['total_points', 'total_goal_difference', 'total_goals_scored'], ascending=[False, False, False])
    initial_top_teams = set(sorted_initial.nsmallest(top_standings_limit, 'before_last_game_standing')['team'])
    composition_changes = [{
        'year': year,
        'stage': stage,
        'change_num': 0,
        'goal_time': 'initial',
        'home_team': None,
        'away_team': None,
        'scorer_team': None,
        'new_top_teams': list(initial_top_teams),
        '1st': sorted_initial.iloc[0]['team'] if len(sorted_initial) > 0 else None,
        '2nd': sorted_initial.iloc[1]['team'] if len(sorted_initial) > 1 else None,
        '3rd': sorted_initial.iloc[2]['team'] if len(sorted_initial) > 2 else None
    }]
    change_counter = 0  # Counter for the number of composition changes

    # Step 3: Sort goals by regulation time
    group_goals_last_day = group_goals_last_day.sort_values(by=['goal_minute'])

    # Step 4: Iterate through each goal and track changes in composition
    for _, goal in group_goals_last_day.iterrows():
        home_team = goal['home_team']
        away_team = goal['away_team']
        scorer_team = goal['scorer_nationality']

        # Update scores based on who scored the goal
        if scorer_team == home_team:
            # Update home team scoring
            group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'last_game_goals_scored'] += 1
            group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'total_goals_scored'] += 1
            group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'last_game_goals_conceded'] += 1
            group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'total_goals_conceded'] += 1
        elif scorer_team == away_team:
            # Update away team scoring
            group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'last_game_goals_scored'] += 1
            group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'total_goals_scored'] += 1
            group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'last_game_goals_conceded'] += 1
            group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'total_goals_conceded'] += 1

        # Update goal difference
        group_goals_tracking['total_goal_difference'] = group_goals_tracking['total_goals_scored'] - group_goals_tracking['total_goals_conceded']

        # Step 5: Update last_game_points based on the current game state
        group_goals_tracking['last_game_points'] = group_goals_tracking.apply(
            lambda row: 3 if row['last_game_goals_scored'] > row['last_game_goals_conceded'] else (1 if row['last_game_goals_scored'] == row['last_game_goals_conceded'] else 0), 
            axis=1
        )

        # Calculate total points by adding last game points to before_last_game_points
        group_goals_tracking['total_points'] = group_goals_tracking['before_last_game_points'] + group_goals_tracking['last_game_points']

        # Step 6: Resolve ties based on head-to-head results
        group_goals_tracking['tied'] = group_goals_tracking.duplicated(subset=['total_points'], keep=False)
        group_goals_tracking['tied_won'] = 0  # Reset tied_won for all teams

        # Only resolve ties for tied teams with the same total points
        tied_teams = group_goals_tracking[group_goals_tracking['tied']]
        if not tied_teams.empty:
            for _, row in tied_teams.iterrows():
                team1 = row['team']
                for _, other_row in tied_teams[tied_teams['team'] != team1].iterrows():
                    team2 = other_row['team']
                    match = agg_goals_before_last_day[
                        ((agg_goals_before_last_day['home_team'] == team1) & (agg_goals_before_last_day['away_team'] == team2)) |
                        ((agg_goals_before_last_day['home_team'] == team2) & (agg_goals_before_last_day['away_team'] == team1))
                    ]

                    if not match.empty:
                        match_result = match.iloc[0]['won']
                        if match_result == 1 and match.iloc[0]['home_team'] == team1:
                            group_goals_tracking.loc[group_goals_tracking['team'] == team1, 'tied_won'] = 1
                        elif match_result == -1 and match.iloc[0]['away_team'] == team1:
                            group_goals_tracking.loc[group_goals_tracking['team'] == team1, 'tied_won'] = 1
                        elif match_result == 1 and match.iloc[0]['home_team'] == team2:
                            group_goals_tracking.loc[group_goals_tracking['team'] == team2, 'tied_won'] = 1
                        elif match_result == -1 and match.iloc[0]['away_team'] == team2:
                            group_goals_tracking.loc[group_goals_tracking['team'] == team2, 'tied_won'] = 1

        # Step 7: Sort teams by updated points and tie-breaking criteria
        group_goals_tracking = group_goals_tracking.sort_values(
            by=['total_points', 'tied_won', 'total_goal_difference', 'total_goals_scored'], 
            ascending=[False, False, False, False]
        )
        group_goals_tracking['last_game_standing'] = group_goals_tracking.reset_index(drop=True).index + 1

        # Print standings after each goal
        print(f"\n=== Standings after goal at minute {goal['goal_minute']} in {stage}, edition {year} ===")
        print(group_goals_tracking[['team', 'total_points', 'total_goals_scored', 'total_goals_conceded', 'total_goal_difference', 'last_game_standing', 'tied_won']].to_string(index=False))
        print("\n========================================\n")

        # Track top teams and composition changes if needed
        top_teams = group_goals_tracking[group_goals_tracking['last_game_standing'] <= 3]
        first_place_team = top_teams.iloc[0]['team'] if len(top_teams) > 0 else None
        second_place_team = top_teams.iloc[1]['team'] if len(top_teams) > 1 else None
        third_place_team = top_teams.iloc[2]['team'] if len(top_teams) > 2 else None

        # Check and record any composition changes
        current_top_teams = set(group_goals_tracking.nsmallest(top_standings_limit, 'last_game_standing')['team'])
        if current_top_teams != initial_top_teams:
            change_counter += 1
            initial_top_teams = current_top_teams
            composition_changes.append({
                'year': year,
                'stage': stage,
                'change_num': change_counter,
                'goal_time': goal['goal_minute'],
                'home_team': home_team,
                'away_team': away_team,
                'scorer_nationality': scorer_team,
                'new_top_teams': list(current_top_teams),
                '1st': first_place_team,
                '2nd': second_place_team,
                '3rd': third_place_team
            })

    return pd.DataFrame(composition_changes)


# best four third_placed

## European Championship

### men

In [8]:
def best_four_third_placed_eu_men(goals_last_day_sorted, all_games_before_last, agg_goals_before_last_day):
    # Apply filter to process only years >= 2016
    all_games_before_last = all_games_before_last[(all_games_before_last['year'] >= 2016)].copy()
    
    # Ensure 'year' column is integer in case it has been stored as float or other format
    all_games_before_last['year'] = all_games_before_last['year'].astype(int)

    # Initialize a list to store data for each year that will be appended into the final DataFrame
    all_years_data = []

    # Group data by year and process year-by-year
    for (year,), year_data in all_games_before_last.groupby(['year']):  # Unpacking year from a tuple
        # Ensure 'year' is an integer
        year = int(year)
        print(f"\n--- Processing Year: {year} ---")
        year_data = year_data.copy()

        # Initialize dictionaries to track third-placed teams and top 4 counts for the current year
        third_teams_count = {}
        top4_count = {}

        # Step 1: Initialize columns for tracking team performance before and after goals
        year_data.loc[:, 'before_last_game_goals_scored'] = year_data['goals_scored']
        year_data.loc[:, 'before_last_game_goals_conceded'] = year_data['goals_conceded']
        year_data.loc[:, 'before_last_game_points'] = year_data['points']

        year_data.loc[:, 'last_game_goals_scored'] = 0
        year_data.loc[:, 'last_game_goals_conceded'] = 0
        year_data.loc[:, 'last_game_points'] = 0
        year_data.loc[:, 'total_goals_scored'] = year_data['before_last_game_goals_scored'] + year_data['last_game_goals_scored']
        year_data.loc[:, 'total_goals_conceded'] = year_data['before_last_game_goals_conceded'] + year_data['last_game_goals_conceded']
        year_data.loc[:, 'total_goal_difference'] = year_data['total_goals_scored'] - year_data['total_goals_conceded']

        # Print the standings before any goal is processed
        print(f"\n=== Initial Standings for Year {year} Before Processing Any Goals ===\n")
        display_columns = ['team', 'stage', 'before_last_game_points', 'before_last_game_goals_scored', 'before_last_game_goals_conceded', 'total_goal_difference']
        print(year_data[display_columns].sort_values(by=['stage', 'before_last_game_points', 'total_goal_difference', 'before_last_game_goals_scored'], ascending=[True, False, False, False]).to_string(index=False))
        print("\n===============================================================\n")

        # Process each goal from the sorted last-day goals for the current year
        goals_last_day_year = goals_last_day_sorted[(goals_last_day_sorted['year'] == year)]

        for i, goal in goals_last_day_year.iterrows():
            print(f"\n--- Analyzing Goal {i + 1} in {goal['stage']}: On date {goal['short_date']}, Minute {goal['goal_minute']} (Player Team: {goal['scorer_nationality']}, Home: {goal['home_team']}, Away: {goal['away_team']}) ---")

            home_team = goal['home_team']
            away_team = goal['away_team']
            player_team = goal['scorer_nationality']

            # Identify the opponent team
            opponent_team = home_team if player_team == away_team else away_team

            year_data.loc[year_data['team'] == player_team, 'last_game_goals_scored'] += 1
            year_data.loc[year_data['team'] == opponent_team, 'last_game_goals_conceded'] += 1

            # Update total goals scored, goals conceded, and goal difference
            year_data.loc[:, 'total_goals_scored'] = year_data['before_last_game_goals_scored'] + year_data['last_game_goals_scored']
            year_data.loc[:, 'total_goals_conceded'] = year_data['before_last_game_goals_conceded'] + year_data['last_game_goals_conceded']
            year_data.loc[:, 'total_goal_difference'] = year_data['total_goals_scored'] - year_data['total_goals_conceded']

            # Assign points for the last game dynamically
            year_data.loc[:, 'last_game_points'] = (year_data['last_game_goals_scored'] > year_data['last_game_goals_conceded']).astype(int) * 3 + \
                                                   (year_data['last_game_goals_scored'] == year_data['last_game_goals_conceded']).astype(int)

            # Update total points
            year_data.loc[:, 'total_points'] = year_data['before_last_game_points'] + year_data['last_game_points']

            # Reset third-placed teams after every goal
            third_teams_per_year = []

            # Recalculate standings after the goal
            for group, group_data in year_data.groupby('stage'):
                sorted_standings = group_data.sort_values(by=['total_points', 'total_goal_difference', 'total_goals_scored'], ascending=[False, False, False])

                # Identify the third-placed teams after each goal, **by group**
                if len(sorted_standings) >= 3:
                    third_placed_team = sorted_standings.iloc[2]
                    third_teams_per_year.append(third_placed_team['team'])

            # Update the count for third-placed teams after each goal
            for team in third_teams_per_year:
                if team not in third_teams_count:
                    third_teams_count[team] = 0
                third_teams_count[team] += 1

            # Create DataFrame of third-placed teams with additional stats
            third_teams_df = year_data[year_data['team'].isin(third_teams_per_year)][
                ['team', 'total_points', 'last_game_points', 'total_goal_difference', 'total_goals_scored']
            ]

            # Identify the best four third-placed teams after each goal
            top4_teams = third_teams_df['team'].value_counts().nlargest(4).index.tolist()

            # Print third-placed teams' additional statistics after each goal
            print(f"Top 4 third-placed teams after this goal: {top4_teams}")
            print(f"Third-placed teams DataFrame after this goal:\n{third_teams_df}\n")

            # Update the count for top 4 third-placed teams after each goal
            for team in top4_teams:
                if team not in top4_count:
                    top4_count[team] = 0
                top4_count[team] += 1

        # After processing all goals for the year, store the results in the final list
        for team, third_count in third_teams_count.items():
            top4_count_for_team = top4_count.get(team, 0)
            all_years_data.append({
                'team': team,
                'year': year,
                'third_place_count': third_count,
                'top4_third_place_count': top4_count_for_team
            })

    # Create a DataFrame from the list of all years' data
    final_df = pd.DataFrame(all_years_data)
    
    return final_df


### women

In [9]:
def best_two_third_placed_eu_women(goals_last_day_sorted, all_games_before_last, agg_goals_before_last_day):
    # Apply filter to process only years 2009 and 2013
    all_games_before_last = all_games_before_last[(all_games_before_last['year'].isin([2009, 2013]))].copy()
    
    # Ensure 'year' column is integer in case it has been stored as float or other format
    all_games_before_last['year'] = all_games_before_last['year'].astype(int)

    # Initialize a list to store data for each year that will be appended into the final DataFrame
    all_years_data = []

    # Group data by year and process year-by-year
    for (year,), year_data in all_games_before_last.groupby(['year']):  # Unpacking year from a tuple
        # Ensure 'year' is an integer
        year = int(year)
        print(f"\n--- Processing Year: {year} ---")
        year_data = year_data.copy()

        # Initialize dictionaries to track third-placed teams and top 2 counts for the current year
        third_teams_count = {}
        top2_count = {}

        # Step 1: Initialize columns for tracking team performance before and after goals
        year_data.loc[:, 'before_last_game_goals_scored'] = year_data['goals_scored']
        year_data.loc[:, 'before_last_game_goals_conceded'] = year_data['goals_conceded']
        year_data.loc[:, 'before_last_game_points'] = year_data['points']

        year_data.loc[:, 'last_game_goals_scored'] = 0
        year_data.loc[:, 'last_game_goals_conceded'] = 0
        year_data.loc[:, 'last_game_points'] = 0
        year_data.loc[:, 'total_goals_scored'] = year_data['before_last_game_goals_scored'] + year_data['last_game_goals_scored']
        year_data.loc[:, 'total_goals_conceded'] = year_data['before_last_game_goals_conceded'] + year_data['last_game_goals_conceded']
        year_data.loc[:, 'total_goal_difference'] = year_data['total_goals_scored'] - year_data['total_goals_conceded']

        # Print the standings before any goal is processed
        print(f"\n=== Initial Standings for Year {year} Before Processing Any Goals ===\n")
        display_columns = ['team', 'stage', 'before_last_game_points', 'before_last_game_goals_scored', 'before_last_game_goals_conceded', 'total_goal_difference']
        print(year_data[display_columns].sort_values(by=['stage', 'before_last_game_points', 'total_goal_difference', 'before_last_game_goals_scored'], ascending=[True, False, False, False]).to_string(index=False))
        print("\n===============================================================\n")

        # Process each goal from the sorted last-day goals for the current year
        goals_last_day_year = goals_last_day_sorted[(goals_last_day_sorted['year'] == year)]

        for i, goal in goals_last_day_year.iterrows():
            print(f"\n--- Analyzing Goal {i + 1} in {goal['stage']}: On date {goal['short_date']}, Minute {goal['goal_minute']} (Player Team: {goal['scorer_nationality']}, Home: {goal['home_team']}, Away: {goal['away_team']}) ---")

            home_team = goal['home_team']
            away_team = goal['away_team']
            player_team = goal['scorer_nationality']

            # Identify the opponent team
            opponent_team = home_team if player_team == away_team else away_team

            year_data.loc[year_data['team'] == player_team, 'last_game_goals_scored'] += 1
            year_data.loc[year_data['team'] == opponent_team, 'last_game_goals_conceded'] += 1

            # Update total goals scored, goals conceded, and goal difference
            year_data.loc[:, 'total_goals_scored'] = year_data['before_last_game_goals_scored'] + year_data['last_game_goals_scored']
            year_data.loc[:, 'total_goals_conceded'] = year_data['before_last_game_goals_conceded'] + year_data['last_game_goals_conceded']
            year_data.loc[:, 'total_goal_difference'] = year_data['total_goals_scored'] - year_data['total_goals_conceded']

            # Assign points for the last game dynamically
            year_data.loc[:, 'last_game_points'] = (year_data['last_game_goals_scored'] > year_data['last_game_goals_conceded']).astype(int) * 3 + \
                                                   (year_data['last_game_goals_scored'] == year_data['last_game_goals_conceded']).astype(int)

            # Update total points
            year_data.loc[:, 'total_points'] = year_data['before_last_game_points'] + year_data['last_game_points']

            # Reset third-placed teams after every goal
            third_teams_per_year = []

            # Recalculate standings after the goal
            for group, group_data in year_data.groupby('stage'):
                sorted_standings = group_data.sort_values(by=['total_points', 'total_goal_difference', 'total_goals_scored'], ascending=[False, False, False])

                # Identify the third-placed teams after each goal, **by group**
                if len(sorted_standings) >= 3:
                    third_placed_team = sorted_standings.iloc[2]
                    third_teams_per_year.append(third_placed_team['team'])

            # Update the count for third-placed teams after each goal
            for team in third_teams_per_year:
                if team not in third_teams_count:
                    third_teams_count[team] = 0
                third_teams_count[team] += 1

            # Create DataFrame of third-placed teams with additional stats
            third_teams_df = year_data[year_data['team'].isin(third_teams_per_year)][
                ['team', 'total_points', 'last_game_points', 'total_goal_difference', 'total_goals_scored']
            ]

            # Identify the best two third-placed teams after each goal
            top2_teams = third_teams_df['team'].value_counts().nlargest(2).index.tolist()

            # Print third-placed teams' additional statistics after each goal
            print(f"Top 2 third-placed teams after this goal: {top2_teams}")
            print(f"Third-placed teams DataFrame after this goal:\n{third_teams_df}\n")

            # Update the count for top 2 third-placed teams after each goal
            for team in top2_teams:
                if team not in top2_count:
                    top2_count[team] = 0
                top2_count[team] += 1

        # After processing all goals for the year, store the results in the final list
        for team, third_count in third_teams_count.items():
            top2_count_for_team = top2_count.get(team, 0)
            all_years_data.append({
                'team': team,
                'year': year,
                'third_place_count': third_count,
                'top2_third_place_count': top2_count_for_team
            })

    # Create a DataFrame from the list of all years' data
    final_df = pd.DataFrame(all_years_data)
    
    return final_df


## World Cup

### men

In [10]:
def best_four_third_placed_wc_men(goals_last_day_sorted, all_games_before_last, agg_goals_before_last_day):
    # Apply filter to process only years <= 1994
    all_games_before_last = all_games_before_last[(all_games_before_last['year'] <= 1994)].copy()
    
    # Ensure 'year' column is integer in case it has been stored as float or other format
    all_games_before_last['year'] = all_games_before_last['year'].astype(int)

    # Initialize a list to store data for each year that will be appended into the final DataFrame
    all_years_data = []

    # Group data by year and process year-by-year
    for (year,), year_data in all_games_before_last.groupby(['year']):  # Unpacking year from a tuple
        # Ensure 'year' is an integer
        year = int(year)
        print(f"\n--- Processing Year: {year} ---")
        year_data = year_data.copy()

        # Initialize dictionaries to track third-placed teams and top 4 counts for the current year
        third_teams_count = {}
        top4_count = {}

        # Step 1: Initialize columns for tracking team performance before and after goals
        year_data.loc[:, 'before_last_game_goals_scored'] = year_data['goals_scored']
        year_data.loc[:, 'before_last_game_goals_conceded'] = year_data['goals_conceded']
        year_data.loc[:, 'before_last_game_points'] = year_data['points']

        year_data.loc[:, 'last_game_goals_scored'] = 0
        year_data.loc[:, 'last_game_goals_conceded'] = 0
        year_data.loc[:, 'last_game_points'] = 0
        year_data.loc[:, 'total_goals_scored'] = year_data['before_last_game_goals_scored'] + year_data['last_game_goals_scored']
        year_data.loc[:, 'total_goals_conceded'] = year_data['before_last_game_goals_conceded'] + year_data['last_game_goals_conceded']
        year_data.loc[:, 'total_goal_difference'] = year_data['total_goals_scored'] - year_data['total_goals_conceded']

        # Print the standings before any goal is processed
        print(f"\n=== Initial Standings for Year {year} Before Processing Any Goals ===\n")
        display_columns = ['team', 'stage', 'before_last_game_points', 'before_last_game_goals_scored', 'before_last_game_goals_conceded', 'total_goal_difference']
        print(year_data[display_columns].sort_values(by=['stage', 'before_last_game_points', 'total_goal_difference', 'before_last_game_goals_scored'], ascending=[True, False, False, False]).to_string(index=False))
        print("\n===============================================================\n")

        # Process each goal from the sorted last-day goals for the current year
        goals_last_day_year = goals_last_day_sorted[(goals_last_day_sorted['year'] == year)]

        for i, goal in goals_last_day_year.iterrows():
            print(f"\n--- Analyzing Goal {i + 1} in {goal['stage']}: On date {goal['short_date']}, Minute {goal['goal_minute']} (Player Team: {goal['scorer_nationality']}, Home: {goal['home_team']}, Away: {goal['away_team']}) ---")

            home_team = goal['home_team']
            away_team = goal['away_team']
            player_team = goal['scorer_nationality']

            # Identify the opponent team
            opponent_team = home_team if player_team == away_team else away_team

            year_data.loc[year_data['team'] == player_team, 'last_game_goals_scored'] += 1
            year_data.loc[year_data['team'] == opponent_team, 'last_game_goals_conceded'] += 1

            # Update total goals scored, goals conceded, and goal difference
            year_data.loc[:, 'total_goals_scored'] = year_data['before_last_game_goals_scored'] + year_data['last_game_goals_scored']
            year_data.loc[:, 'total_goals_conceded'] = year_data['before_last_game_goals_conceded'] + year_data['last_game_goals_conceded']
            year_data.loc[:, 'total_goal_difference'] = year_data['total_goals_scored'] - year_data['total_goals_conceded']

            # Assign points for the last game dynamically
            year_data.loc[:, 'last_game_points'] = (year_data['last_game_goals_scored'] > year_data['last_game_goals_conceded']).astype(int) * 3 + \
                                                   (year_data['last_game_goals_scored'] == year_data['last_game_goals_conceded']).astype(int)

            # Update total points
            year_data.loc[:, 'total_points'] = year_data['before_last_game_points'] + year_data['last_game_points']

            # Reset third-placed teams after every goal
            third_teams_per_year = []

            # Recalculate standings after the goal
            for group, group_data in year_data.groupby('stage'):
                sorted_standings = group_data.sort_values(by=['total_points', 'total_goal_difference', 'total_goals_scored'], ascending=[False, False, False])

                # Identify the third-placed teams after each goal, **by group**
                if len(sorted_standings) >= 3:
                    third_placed_team = sorted_standings.iloc[2]
                    third_teams_per_year.append(third_placed_team['team'])

            # Update the count for third-placed teams after each goal
            for team in third_teams_per_year:
                if team not in third_teams_count:
                    third_teams_count[team] = 0
                third_teams_count[team] += 1

            # Create DataFrame of third-placed teams with additional stats
            third_teams_df = year_data[year_data['team'].isin(third_teams_per_year)][
                ['team', 'total_points', 'last_game_points', 'total_goal_difference', 'total_goals_scored']
            ]

            # Identify the best four third-placed teams after each goal
            top4_teams = third_teams_df['team'].value_counts().nlargest(4).index.tolist()

            # Print third-placed teams' additional statistics after each goal
            print(f"Top 4 third-placed teams after this goal: {top4_teams}")
            print(f"Third-placed teams DataFrame after this goal:\n{third_teams_df}\n")

            # Update the count for top 4 third-placed teams after each goal
            for team in top4_teams:
                if team not in top4_count:
                    top4_count[team] = 0
                top4_count[team] += 1

        # After processing all goals for the year, store the results in the final list
        for team, third_count in third_teams_count.items():
            top4_count_for_team = top4_count.get(team, 0)
            all_years_data.append({
                'team': team,
                'year': year,
                'third_place_count': third_count,
                'top4_third_place_count': top4_count_for_team
            })

    # Create a DataFrame from the list of all years' data
    final_df = pd.DataFrame(all_years_data)
    
    return final_df


### women

In [11]:
import pandas as pd

def best_two_third_placed_wc_women(goals_last_day_sorted, all_games_before_last, agg_goals_before_last_day):
    # Apply filter to process only years == 1991
    all_games_before_last = all_games_before_last[(all_games_before_last['year'] == 1991)].copy()
    
    # Ensure 'year' column is integer in case it has been stored as float or other format
    all_games_before_last['year'] = all_games_before_last['year'].astype(int)

    # Initialize a list to store data for each year that will be appended into the final DataFrame
    all_years_data = []

    # Group data by year and process year-by-year
    for (year,), year_data in all_games_before_last.groupby(['year']):  # Unpacking year from a tuple
        # Ensure 'year' is an integer
        year = int(year)
        print(f"\n--- Processing Year: {year} ---")
        year_data = year_data.copy()

        # Initialize dictionaries to track third-placed teams and top 2 counts for the current year
        third_teams_count = {}
        top2_count = {}

        # Step 1: Initialize columns for tracking team performance before and after goals
        year_data.loc[:, 'before_last_game_goals_scored'] = year_data['goals_scored']
        year_data.loc[:, 'before_last_game_goals_conceded'] = year_data['goals_conceded']
        year_data.loc[:, 'before_last_game_points'] = year_data['points']

        year_data.loc[:, 'last_game_goals_scored'] = 0
        year_data.loc[:, 'last_game_goals_conceded'] = 0
        year_data.loc[:, 'last_game_points'] = 0
        year_data.loc[:, 'total_goals_scored'] = year_data['before_last_game_goals_scored'] + year_data['last_game_goals_scored']
        year_data.loc[:, 'total_goals_conceded'] = year_data['before_last_game_goals_conceded'] + year_data['last_game_goals_conceded']
        year_data.loc[:, 'total_goal_difference'] = year_data['total_goals_scored'] - year_data['total_goals_conceded']

        # Print the standings before any goal is processed
        print(f"\n=== Initial Standings for Year {year} Before Processing Any Goals ===\n")
        display_columns = ['team', 'stage', 'before_last_game_points', 'before_last_game_goals_scored', 'before_last_game_goals_conceded', 'total_goal_difference']
        print(year_data[display_columns].sort_values(by=['stage', 'before_last_game_points', 'total_goal_difference', 'before_last_game_goals_scored'], ascending=[True, False, False, False]).to_string(index=False))
        print("\n===============================================================\n")

        # Process each goal from the sorted last-day goals for the current year
        goals_last_day_year = goals_last_day_sorted[(goals_last_day_sorted['year'] == year)]

        for i, goal in goals_last_day_year.iterrows():
            print(f"\n--- Analyzing Goal {i + 1} in {goal['stage']}: On date {goal['short_date']}, Minute {goal['goal_minute']} (Player Team: {goal['scorer_nationality']}, Home: {goal['home_team']}, Away: {goal['away_team']}) ---")

            home_team = goal['home_team']
            away_team = goal['away_team']
            player_team = goal['scorer_nationality']

            # Identify the opponent team
            opponent_team = home_team if player_team == away_team else away_team

            year_data.loc[year_data['team'] == player_team, 'last_game_goals_scored'] += 1
            year_data.loc[year_data['team'] == opponent_team, 'last_game_goals_conceded'] += 1

            # Update total goals scored, goals conceded, and goal difference
            year_data.loc[:, 'total_goals_scored'] = year_data['before_last_game_goals_scored'] + year_data['last_game_goals_scored']
            year_data.loc[:, 'total_goals_conceded'] = year_data['before_last_game_goals_conceded'] + year_data['last_game_goals_conceded']
            year_data.loc[:, 'total_goal_difference'] = year_data['total_goals_scored'] - year_data['total_goals_conceded']

            # Assign points for the last game dynamically
            year_data.loc[:, 'last_game_points'] = (year_data['last_game_goals_scored'] > year_data['last_game_goals_conceded']).astype(int) * 3 + \
                                                   (year_data['last_game_goals_scored'] == year_data['last_game_goals_conceded']).astype(int)

            # Update total points
            year_data.loc[:, 'total_points'] = year_data['before_last_game_points'] + year_data['last_game_points']

            # Reset third-placed teams after every goal
            third_teams_per_year = []

            # Recalculate standings after the goal
            for group, group_data in year_data.groupby('stage'):
                sorted_standings = group_data.sort_values(by=['total_points', 'total_goal_difference', 'total_goals_scored'], ascending=[False, False, False])

                # Identify the third-placed teams after each goal, **by group**
                if len(sorted_standings) >= 3:
                    third_placed_team = sorted_standings.iloc[2]
                    third_teams_per_year.append(third_placed_team['team'])

            # Update the count for third-placed teams after each goal
            for team in third_teams_per_year:
                if team not in third_teams_count:
                    third_teams_count[team] = 0
                third_teams_count[team] += 1

            # Create DataFrame of third-placed teams with additional stats
            third_teams_df = year_data[year_data['team'].isin(third_teams_per_year)][
                ['team', 'total_points', 'last_game_points', 'total_goal_difference', 'total_goals_scored']
            ]

            # Identify the best two third-placed teams after each goal
            top2_teams = third_teams_df['team'].value_counts().nlargest(2).index.tolist()

            # Print third-placed teams' additional statistics after each goal
            print(f"Top 2 third-placed teams after this goal: {top2_teams}")
            print(f"Third-placed teams DataFrame after this goal:\n{third_teams_df}\n")

            # Update the count for top 2 third-placed teams after each goal
            for team in top2_teams:
                if team not in top2_count:
                    top2_count[team] = 0
                top2_count[team] += 1

        # After processing all goals for the year, store the results in the final list
        for team, third_count in third_teams_count.items():
            top2_count_for_team = top2_count.get(team, 0)
            all_years_data.append({
                'team': team,
                'year': year,
                'third_place_count': third_count,
                'top2_third_place_count': top2_count_for_team
            })

    # Create a DataFrame from the list of all years' data
    final_df = pd.DataFrame(all_years_data)
    
    return final_df


# suspense

In [12]:
def active_suspense(all_games_before_last, goals_last_day_sorted, agg_goals_before_last_day):
    """
    Calculate active suspense for each team in each stage and year, check and report conditions
    after each goal, resolve ties, and return a DataFrame with team, stage, year, active suspense count,
    reasons for not meeting the conditions, and tie status.
    """
    # Initialize a results list to store suspense events
    results = []

    # Step 1: Loop through each unique combination of year and stage
    unique_groups = all_games_before_last[['year', 'stage']].drop_duplicates()
    for _, group in unique_groups.iterrows():
        year = group['year']
        stage = group['stage']

        # Filter the data for the specific year and stage
        group_goals_tracking = all_games_before_last[
            (all_games_before_last['year'] == year) & 
            (all_games_before_last['stage'] == stage)
        ].copy()

        group_goals_last_day = goals_last_day_sorted[
            (goals_last_day_sorted['year'] == year) & 
            (goals_last_day_sorted['stage'] == stage)
        ]

        if group_goals_last_day.empty:
            print(f"No goals recorded for stage {stage} in year {year}. Skipping...")
            continue

        # Initialize tracking columns
        group_goals_tracking['before_last_game_goals_scored'] = group_goals_tracking['goals_scored']
        group_goals_tracking['before_last_game_goals_conceded'] = group_goals_tracking['goals_conceded']
        group_goals_tracking['before_last_game_points'] = group_goals_tracking['points']
        group_goals_tracking['last_game_goals_scored'] = 0
        group_goals_tracking['last_game_goals_conceded'] = 0
        group_goals_tracking['total_goals_scored'] = group_goals_tracking['before_last_game_goals_scored']
        group_goals_tracking['total_goals_conceded'] = group_goals_tracking['before_last_game_goals_conceded']
        group_goals_tracking['total_goal_difference'] = group_goals_tracking['total_goals_scored'] - group_goals_tracking['total_goals_conceded']
        group_goals_tracking['last_game_points'] = 0
        group_goals_tracking['total_points'] = group_goals_tracking['before_last_game_points']

        # Initialize team suspense count
        team_suspense_count = {team: 0 for team in group_goals_tracking['team'].unique()}

        # Sort goals by regulation and stoppage time
        group_goals_last_day = group_goals_last_day.sort_values(by=['goal_minute'])

        # Process each goal and calculate active suspense
        for _, goal in group_goals_last_day.iterrows():
            home_team = goal['home_team']
            away_team = goal['away_team']
            scorer_team = goal['scorer_nationality']
            goal_minute = goal['goal_minute']

            # Update scores based on who scored the goal
            if scorer_team == home_team:
                group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'last_game_goals_scored'] += 1
                group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'last_game_goals_conceded'] += 1
            elif scorer_team == away_team:
                group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'last_game_goals_scored'] += 1
                group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'last_game_goals_conceded'] += 1

            # Update totals dynamically
            group_goals_tracking['total_goals_scored'] = group_goals_tracking['before_last_game_goals_scored'] + group_goals_tracking['last_game_goals_scored']
            group_goals_tracking['total_goals_conceded'] = group_goals_tracking['before_last_game_goals_conceded'] + group_goals_tracking['last_game_goals_conceded']
            group_goals_tracking['total_goal_difference'] = group_goals_tracking['total_goals_scored'] - group_goals_tracking['total_goals_conceded']

            # Update points
            for i, row in group_goals_tracking.iterrows():
                if row['last_game_goals_scored'] > row['last_game_goals_conceded']:
                    group_goals_tracking.loc[i, 'last_game_points'] = 3 if year > 1994 else 2
                elif row['last_game_goals_scored'] == row['last_game_goals_conceded']:
                    group_goals_tracking.loc[i, 'last_game_points'] = 1
                else:
                    group_goals_tracking.loc[i, 'last_game_points'] = 0

            group_goals_tracking['total_points'] = group_goals_tracking['before_last_game_points'] + group_goals_tracking['last_game_points']

            # Step 8: Mark teams that are tied
            group_goals_tracking['tied'] = group_goals_tracking.duplicated(subset=['total_points', 'total_goal_difference', 'total_goals_scored'], keep=False)

            # Reset `tied_won` to 0 for all teams
            group_goals_tracking['tied_won'] = 0

            # Resolve ties using head-to-head results
            tied_teams = group_goals_tracking[group_goals_tracking['tied']]
            if not tied_teams.empty:
                for index, row in tied_teams.iterrows():
                    team1 = row['team']
                    for other_index, other_row in tied_teams[tied_teams.index != index].iterrows():
                        team2 = other_row['team']
                        match = agg_goals_before_last_day[
                            ((agg_goals_before_last_day['home_team'] == team1) & (agg_goals_before_last_day['away_team'] == team2)) |
                            ((agg_goals_before_last_day['home_team'] == team2) & (agg_goals_before_last_day['away_team'] == team1))
                        ]
                        if not match.empty:
                            match_result = match.iloc[0]['won']
                            if match_result == 1:  # Home win
                                if match.iloc[0]['home_team'] == team1:
                                    group_goals_tracking.loc[group_goals_tracking['team'] == team1, 'tied_won'] += 1
                                else:
                                    group_goals_tracking.loc[group_goals_tracking['team'] == team2, 'tied_won'] += 1
                            elif match_result == -1:  # Away win
                                if match.iloc[0]['away_team'] == team1:
                                    group_goals_tracking.loc[group_goals_tracking['team'] == team1, 'tied_won'] += 1
                                else:
                                    group_goals_tracking.loc[group_goals_tracking['team'] == team2, 'tied_won'] += 1

            # Sort teams by total points, goal difference, goals scored, and tied_won
            group_goals_tracking = group_goals_tracking.sort_values(
                by=['total_points', 'total_goal_difference', 'total_goals_scored', 'tied_won'],
                ascending=[False, False, False, False]
            ).reset_index(drop=True)
            group_goals_tracking['last_game_standing'] = group_goals_tracking.index + 1

            # Evaluate active suspense
            for i, row in group_goals_tracking.iterrows():
                current_standing = row['last_game_standing']
                reason = ""
                current_team_status = "drawing" if row['last_game_goals_scored'] == row['last_game_goals_conceded'] else "losing"

                if year < 2016 and current_standing == 3:
                    if not group_goals_tracking[group_goals_tracking['last_game_standing'] == 2].empty:
                        next_team = group_goals_tracking[group_goals_tracking['last_game_standing'] == 2].iloc[0]
                        next_team_status = "drawing" if next_team['last_game_goals_scored'] == next_team['last_game_goals_conceded'] else "losing"

                        if (
                            (next_team['total_points'] - row['total_points'] <= 1 and
                             row['total_goal_difference'] - next_team['total_goal_difference'] <= 1)
                        ):
                            team_suspense_count[row['team']] += 1
                            results.append({
                                'team': row['team'],
                                'stage': stage,
                                'year': year,
                                'goal_minute': goal_minute,
                                'active_suspense_count': 1,
                                'reason': f"Active suspense met ({current_team_status} vs {next_team_status})"
                            })
                        else:
                            reason = f"Condition for moving to 2nd not met ({current_team_status} vs {next_team_status})"
                    else:
                        reason = "No team found in 2nd position for evaluation"

                elif year >= 2016 and current_standing == 4:
                    if not group_goals_tracking[group_goals_tracking['last_game_standing'] == 3].empty:
                        next_team = group_goals_tracking[group_goals_tracking['last_game_standing'] == 3].iloc[0]
                        next_team_status = "drawing" if next_team['last_game_goals_scored'] == next_team['last_game_goals_conceded'] else "losing"

                        if (
                            (next_team['total_points'] - row['total_points'] <= 1 and
                             row['total_goal_difference'] - next_team['total_goal_difference'] <= 1)
                        ):
                            team_suspense_count[row['team']] += 1
                            results.append({
                                'team': row['team'],
                                'stage': stage,
                                'year': year,
                                'goal_minute': goal_minute,
                                'active_suspense_count': 1,
                                'reason': f"Active suspense met ({current_team_status} vs {next_team_status})"
                            })
                        else:
                            reason = f"Condition for moving to 3rd not met ({current_team_status} vs {next_team_status})"
                    else:
                        reason = "No team found in 3rd position for evaluation"

                if reason:
                    results.append({
                        'team': row['team'],
                        'stage': stage,
                        'year': year,
                        'goal_minute': goal_minute,
                        'active_suspense_count': 0,
                        'reason': reason
                    })

    # Convert results to a DataFrame
    return pd.DataFrame(results)
