# Libraries

In [282]:
import pandas as pd
from getpass import getuser
from collections import defaultdict

# Load and inspect dataset

In [283]:
# Get the current user's name
user = getuser()

# Path to the dataset
data_path = rf'C:\Users\{user}\Documents\GitHub\tiebreak_wc\fifa.csv'

# Read the dataset with a different encoding
df = pd.read_csv(data_path, encoding='ISO-8859-1')

# Extract relevant columns

In [284]:
# First, let's extract the year from the 'tournament_id' column and create a new column 'year'
df['year'] = df['tournament_id'].str.extract(r'WC-(\d{4})').astype(int)

# Now, filter the data for tournaments after 1986
filtered_df = df[df['year'] > 2020]

# Extract relevant columns for goal events and match results
goals_df = filtered_df[['tournament_name', 'group_name','match_name', 'match_id', 'player_team_name','match_date', 'minute_regulation', 'minute_stoppage', 
               'team_id', 'own_goal']]

# Remove all observations where group_name is "not applicable"
goals_df = goals_df[goals_df['group_name'] != 'not applicable']

# Convert 'match_date' to datetime format
goals_df['match_date'] = pd.to_datetime(goals_df['match_date'], format='%m/%d/%Y')

# Sort the dataset by 'match_date' in ascending order (oldest first) and 'minute_regulation'
goals_df = goals_df.sort_values(by=['match_date', 'minute_regulation'], ascending=[True, True])

# Display the first few rows to confirm the sorting
goals_df.head()


Unnamed: 0,tournament_name,group_name,match_name,match_id,player_team_name,match_date,minute_regulation,minute_stoppage,team_id,own_goal
2548,2022 FIFA World Cup,Group A,Qatar v Ecuador,M-2022-01,Ecuador,2022-11-20,16,0,T-24,0
2549,2022 FIFA World Cup,Group A,Qatar v Ecuador,M-2022-01,Ecuador,2022-11-20,31,0,T-24,0
2550,2022 FIFA World Cup,Group B,England v Iran,M-2022-02,England,2022-11-21,35,0,T-27,0
2560,2022 FIFA World Cup,Group B,United States v Wales,M-2022-04,United States,2022-11-21,36,0,T-80,0
2551,2022 FIFA World Cup,Group B,England v Iran,M-2022-02,England,2022-11-21,43,0,T-27,0


# Recreate Leauge Table after first two matchdays

### Filter out the matches from the last match day in the goals_df DataFrame.

In [285]:
# Step 1: Find the last match date for each tournament and group
last_dates = goals_df.groupby(['tournament_name', 'group_name'])['match_date'].max().reset_index()

# Step 2: Create the dataset with all games excluding the last match day (goals_before_last_day)
goals_before_last_day = goals_df.merge(last_dates, on=['tournament_name', 'group_name', 'match_date'], how='outer', indicator=True)
goals_before_last_day = goals_before_last_day[goals_before_last_day['_merge'] == 'left_only'].drop(columns=['_merge'])

# Step 3: Create the dataset with only the last match day games (goals_last_day)
goals_last_day = goals_df.merge(last_dates, on=['tournament_name', 'group_name', 'match_date'])

# Step 4: Split match_name into home and away teams and remove match_name column for both datasets

# For goals_before_last_day
goals_before_last_day[['home', 'away']] = goals_before_last_day['match_name'].str.split(' v ', expand=True)
goals_before_last_day = goals_before_last_day.drop(columns=['match_name'])

# For goals_last_day
goals_last_day[['home', 'away']] = goals_last_day['match_name'].str.split(' v ', expand=True)
goals_last_day = goals_last_day.drop(columns=['match_name'])


## create df  of aggregate data for each match

In [286]:
# Initialize an empty list to store the results
results = []

# Iterate over each match in the dataset
for match_id, group in goals_before_last_day.groupby(['tournament_name', 'group_name', 'match_id', 'home', 'away']):
    # Initialize goals_home and goals_away for each match
    goals_home = 0
    goals_away = 0
    
    # Loop through each row in the group
    for _, row in group.iterrows():
        # Check if the team is the home team and no own goal
        if row['home'] == row['player_team_name'] and row['own_goal'] == 0:
            goals_home += 1
        # If the team is the away team and no own goal, increment goals_away
        elif row['away'] == row['player_team_name'] and row['own_goal'] == 0:
            goals_away += 1

    # Append the results to the list, keeping home and away columns
    results.append({
        'tournament_name': match_id[0],
        'group_name': match_id[1],
        'match_id': match_id[2],
        'home': match_id[3],
        'away': match_id[4],
        'goals_home': goals_home,
        'goals_away': goals_away
    })

# Convert the list into a DataFrame
agg_goals_before_last_day = pd.DataFrame(results)

# Add a new column 'won' based on the comparison of goals_home and goals_away
agg_goals_before_last_day['won'] = agg_goals_before_last_day.apply(
    lambda row: 1 if row['goals_home'] > row['goals_away'] else (-1 if row['goals_home'] < row['goals_away'] else 0), 
    axis=1
)

# Display the updated DataFrame (use print if outside of Jupyter)
display(agg_goals_before_last_day)



Unnamed: 0,tournament_name,group_name,match_id,home,away,goals_home,goals_away,won
0,2022 FIFA World Cup,Group A,M-2022-01,Qatar,Ecuador,0,2,-1
1,2022 FIFA World Cup,Group A,M-2022-03,Senegal,Netherlands,0,2,-1
2,2022 FIFA World Cup,Group A,M-2022-18,Qatar,Senegal,1,3,-1
3,2022 FIFA World Cup,Group A,M-2022-19,Netherlands,Ecuador,1,1,0
4,2022 FIFA World Cup,Group B,M-2022-02,England,Iran,6,2,1
5,2022 FIFA World Cup,Group B,M-2022-04,United States,Wales,1,1,0
6,2022 FIFA World Cup,Group B,M-2022-17,Wales,Iran,0,2,-1
7,2022 FIFA World Cup,Group C,M-2022-05,Argentina,Saudi Arabia,1,2,-1
8,2022 FIFA World Cup,Group C,M-2022-22,Poland,Saudi Arabia,2,0,1
9,2022 FIFA World Cup,Group C,M-2022-24,Argentina,Mexico,2,0,1


## aggregate data for home and away games

In [287]:
# Step 1: Home games aggregation with match count
home_games = agg_goals_before_last_day.groupby(['tournament_name', 'group_name', 'home']).agg(
    goals_scored=('goals_home', 'sum'),
    goals_conceded=('goals_away', 'sum'),
    points_home=('won', lambda x: sum(3 if result == 1 else (1 if result == 0 else 0) for result in x)),
    match_count_home=('match_id', 'count')  # Count how many match_id for each team at home
).reset_index()

# Step 2: Away games aggregation with match count
away_games = agg_goals_before_last_day.groupby(['tournament_name', 'group_name', 'away']).agg(
    goals_scored=('goals_away', 'sum'),
    goals_conceded=('goals_home', 'sum'),
    points_away=('won', lambda x: sum(3 if result == -1 else (1 if result == 0 else 0) for result in x)),
    match_count_away=('match_id', 'count')  # Count how many match_id for each team at away
).reset_index()


## aggregate data after first two matches

In [288]:
# Step 1: Merge home_games and away_games on tournament_name, group_name, and home with away
all_games_before_last = pd.merge(
    home_games,
    away_games,
    left_on=['tournament_name', 'group_name', 'home'],
    right_on=['tournament_name', 'group_name', 'away'],
    how='outer',
    suffixes=('_home', '_away')
)

# Step 2: For teams that only appear in home_games, assign home to team and copy relevant columns
all_games_before_last['team'] = all_games_before_last['home'].fillna(all_games_before_last['away'])

# Step 3: For goals_scored, goals_conceded, and points, handle missing values
all_games_before_last['goals_scored'] = all_games_before_last['goals_scored_home'].fillna(0) + all_games_before_last['goals_scored_away'].fillna(0)
all_games_before_last['goals_conceded'] = all_games_before_last['goals_conceded_home'].fillna(0) + all_games_before_last['goals_conceded_away'].fillna(0)
all_games_before_last['points'] = all_games_before_last['points_home'].fillna(0) + all_games_before_last['points_away'].fillna(0)

# Step 4: Add goals_difference column
all_games_before_last['goals_difference'] = all_games_before_last['goals_scored'] - all_games_before_last['goals_conceded']

# Step 5: Sum match_count_home and match_count_away to get total match count for each team
all_games_before_last['total_matches'] = all_games_before_last['match_count_home'].fillna(0) + all_games_before_last['match_count_away'].fillna(0)

# Step 6: Adjust points if total_matches is 1 (indicating a likely 0-0 draw)
# If total_matches == 1, assume the second match ended 0-0, so add 1 point
all_games_before_last.loc[all_games_before_last['total_matches'] == 1, 'points'] += 1

# Step 7: Drop unnecessary columns used in the merge process
all_games_before_last = all_games_before_last[['tournament_name', 'group_name', 'team', 'goals_scored', 'goals_conceded', 'points', 'goals_difference', 'total_matches']]

# Step 8: Initial sort by points, goals_difference, and goals_scored within each tournament and group
all_games_before_last = all_games_before_last.sort_values(
    by=['tournament_name', 'group_name', 'points', 'goals_difference', 'goals_scored'],
    ascending=[True, True, False, False, False]
).reset_index(drop=True)

# Step 9: Initialize a new column for tie-break results, which will store either the tie-break result or 'no need'
all_games_before_last['tiebreaker'] = 'no need'

# Step 10: Identify rows with tied points, goals_difference, and goals_scored and apply the tie-breaker
def check_tiebreaker(row1, row2, agg_data):
    """
    Check the tie-breaker based on the head-to-head match result from agg_goals_before_last_day.
    Return the team that won (if any) or 'tie'.
    """
    # Look for the head-to-head match between the two teams in agg_goals_before_last_day
    match = agg_data[((agg_data['home'] == row1['team']) & (agg_data['away'] == row2['team'])) |
                     ((agg_data['home'] == row2['team']) & (agg_data['away'] == row1['team']))]
    
    if not match.empty:
        if match.iloc[0]['won'] == 1:
            return row1['team']  # Home team won
        elif match.iloc[0]['won'] == -1:
            return row2['team']  # Away team won
        else:
            return 'tie'  # It's a draw
    else:
        return 'no result'  # No match found

# Step 11: Loop through the sorted dataframe and apply the tie-breaker if needed
for i in range(len(all_games_before_last) - 1):
    row1 = all_games_before_last.iloc[i]
    row2 = all_games_before_last.iloc[i + 1]
    
    # Check if the two rows have identical values for points, goal difference, and goals scored
    if (row1['points'] == row2['points'] and
        row1['goals_difference'] == row2['goals_difference'] and
        row1['goals_scored'] == row2['goals_scored']):
        
        # Apply the tie-breaker by checking the head-to-head match result
        tiebreak_result = check_tiebreaker(row1, row2, agg_goals_before_last_day)
        
        # Store the tie-breaker result for both teams involved
        all_games_before_last.at[i, 'tiebreaker'] = tiebreak_result
        all_games_before_last.at[i + 1, 'tiebreaker'] = tiebreak_result

# Step 12: Add group standing by ranking teams within each group based on points, goal difference, and goals scored
all_games_before_last['standing'] = all_games_before_last.groupby(['tournament_name', 'group_name']).apply(
    lambda x: x.sort_values(by=['points', 'goals_difference', 'goals_scored'], ascending=[False, False, False])
).reset_index(drop=True).groupby(['tournament_name', 'group_name']).cumcount() + 1

# Convert goals_scored, goals_conceded, points, goals_difference, and total_matches to integers
all_games_before_last[['goals_scored', 'goals_conceded', 'points', 'goals_difference', 'total_matches']] = all_games_before_last[
    ['goals_scored', 'goals_conceded', 'points', 'goals_difference', 'total_matches']].astype(int)


In [289]:
display(all_games_before_last)

Unnamed: 0,tournament_name,group_name,team,goals_scored,goals_conceded,points,goals_difference,total_matches,tiebreaker,standing
0,2022 FIFA World Cup,Group A,Netherlands,3,1,4,2,2,tie,1
1,2022 FIFA World Cup,Group A,Ecuador,3,1,4,2,2,tie,2
2,2022 FIFA World Cup,Group A,Senegal,3,3,3,0,2,no need,3
3,2022 FIFA World Cup,Group A,Qatar,1,5,0,-4,2,no need,4
4,2022 FIFA World Cup,Group B,England,6,2,4,4,1,no need,1
5,2022 FIFA World Cup,Group B,Iran,4,6,3,-2,2,no need,2
6,2022 FIFA World Cup,Group B,United States,1,1,2,0,1,no need,3
7,2022 FIFA World Cup,Group B,Wales,1,3,1,-2,2,no need,4
8,2022 FIFA World Cup,Group C,Poland,2,0,4,2,1,no need,1
9,2022 FIFA World Cup,Group C,Argentina,3,2,3,1,2,no need,2


# Recreate league table after last match day

# test

In [290]:
def process_goals_for_group_specific(tournament_name, group_name, all_games_before_last, goals_last_day_sorted):
    # Step 1: Filter the data for the specific tournament_name and group_name
    group_goals_tracking = all_games_before_last[
        (all_games_before_last['tournament_name'] == tournament_name) & 
        (all_games_before_last['group_name'] == group_name)
    ].copy()

    group_goals_last_day = goals_last_day_sorted[
        (goals_last_day_sorted['tournament_name'] == tournament_name) & 
        (goals_last_day_sorted['group_name'] == group_name)
    ]

    # Step 2: Initialize columns for tracking team performance
    group_goals_tracking['before_last_game_goals_scored'] = group_goals_tracking['goals_scored']
    group_goals_tracking['before_last_game_goals_conceded'] = group_goals_tracking['goals_conceded']
    group_goals_tracking['before_last_game_standing'] = group_goals_tracking['standing']
    group_goals_tracking['before_last_game_points'] = group_goals_tracking['points']

    group_goals_tracking['last_game_goals_scored'] = 0
    group_goals_tracking['last_game_goals_conceded'] = 0
    group_goals_tracking['total_goals_scored'] = group_goals_tracking['before_last_game_goals_scored']
    group_goals_tracking['total_goals_conceded'] = group_goals_tracking['before_last_game_goals_conceded']
    group_goals_tracking['total_goal_difference'] = group_goals_tracking['total_goals_scored'] - group_goals_tracking['total_goals_conceded']
    group_goals_tracking['last_game_points'] = 0
    group_goals_tracking['total_points'] = group_goals_tracking['before_last_game_points']
    group_goals_tracking['last_game_standing'] = 0
    group_goals_tracking['1st'] = 0
    group_goals_tracking['2nd'] = 0
    group_goals_tracking['3rd'] = 0
    group_goals_tracking['4th'] = 0
    group_goals_tracking['changes'] = 0  # Initialize this but we will redefine it later as the sum of 1st, 2nd, 3rd, 4th

    # Step 3: Sort group_goals_last_day by minute_regulation in ascending order
    group_goals_last_day = group_goals_last_day.sort_values(by=['minute_regulation', 'minute_stoppage'], ascending=[True, True])

    # Step 4: Iterate through the sorted and filtered last match goals and update the goals_tracking table
    previous_standings = group_goals_tracking['last_game_standing'].copy()
    first_iteration = True  # Variable to track the first iteration

    for _, goal in group_goals_last_day.iterrows():
        home_team = goal['home']
        away_team = goal['away']
        player_team = goal['player_team_name']
        own_goal = goal['own_goal']

        # Print goal information for each goal
        print(f"Analyzing goal: {goal['minute_regulation']} minute, {goal['minute_stoppage']} extra time, Player team: {player_team}, Home: {home_team}, Away: {away_team}, Own goal: {own_goal}")

        # Update the goals based on who scored the goal (own goal or normal goal)
        if own_goal == 0:  # Normal goal
            if player_team == home_team:
                # Home team scored, update home scored and away conceded
                group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'last_game_goals_scored'] += 1
                group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'last_game_goals_conceded'] += 1
            elif player_team == away_team:
                # Away team scored, update away scored and home conceded
                group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'last_game_goals_scored'] += 1
                group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'last_game_goals_conceded'] += 1
        elif own_goal == 1:  # Own goal
            if player_team == home_team:
                # Own goal by home team, away team scores
                group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'last_game_goals_scored'] += 1
                group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'last_game_goals_conceded'] += 1
            elif player_team == away_team:
                # Own goal by away team, home team scores
                group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'last_game_goals_scored'] += 1
                group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'last_game_goals_conceded'] += 1

        # Step 5: Update total_goals_scored, total_goals_conceded, and total_goal_difference
        group_goals_tracking['total_goals_scored'] = group_goals_tracking['before_last_game_goals_scored'] + group_goals_tracking['last_game_goals_scored']
        group_goals_tracking['total_goals_conceded'] = group_goals_tracking['before_last_game_goals_conceded'] + group_goals_tracking['last_game_goals_conceded']
        group_goals_tracking['total_goal_difference'] = group_goals_tracking['total_goals_scored'] - group_goals_tracking['total_goals_conceded']

        # Step 6: Assign points for the last game dynamically after each goal
        for i, row in group_goals_tracking.iterrows():
            if row['last_game_goals_scored'] > row['last_game_goals_conceded']:
                group_goals_tracking.loc[i, 'last_game_points'] = 3  # Win
            elif row['last_game_goals_scored'] == row['last_game_goals_conceded']:
                group_goals_tracking.loc[i, 'last_game_points'] = 1  # Draw
            else:
                group_goals_tracking.loc[i, 'last_game_points'] = 0  # Loss

        # Step 7: Update total points
        group_goals_tracking['total_points'] = group_goals_tracking['before_last_game_points'] + group_goals_tracking['last_game_points']

        # Step 8: Sort teams by total points, goal difference, and goals scored
        group_goals_tracking = group_goals_tracking.sort_values(by=['total_points', 'total_goal_difference', 'total_goals_scored'], 
                                                                ascending=[False, False, False])

        # Step 9: Assign standings based on the sorting
        group_goals_tracking['last_game_standing'] = group_goals_tracking.reset_index(drop=True).index + 1

        # Step 10: Track changes and update standing positions after each goal is processed
        for i, row in group_goals_tracking.iterrows():
            team = row['team']

            if first_iteration:
                # Skip the update if the standing did not change
                if row['before_last_game_standing'] == row['last_game_standing']:
                    continue  # Skip updating the counters
                else:
                    # Update the position counters since standing has changed
                    if row['last_game_standing'] == 1:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '1st'] += 1
                    elif row['last_game_standing'] == 2:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '2nd'] += 1
                    elif row['last_game_standing'] == 3:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '3rd'] += 1
                    elif row['last_game_standing'] == 4:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '4th'] += 1

                # Disable first iteration flag after the first goal
                first_iteration = False
            else:
                # Track the standing position counters only if the position differs from the previous state
                if row['last_game_standing'] != previous_standings[i]:  
                    if row['last_game_standing'] == 1:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '1st'] += 1
                    elif row['last_game_standing'] == 2:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '2nd'] += 1
                    elif row['last_game_standing'] == 3:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '3rd'] += 1
                    elif row['last_game_standing'] == 4:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '4th'] += 1

        # Update previous standings after each goal
        previous_standings = group_goals_tracking['last_game_standing'].copy()

        # Step 11: Calculate changes as the sum of 1st, 2nd, 3rd, and 4th
        group_goals_tracking['changes'] = group_goals_tracking[['1st', '2nd', '3rd', '4th']].sum(axis=1)

        # Step 12: Print the updated group_goals_tracking after processing each goal
        print("\n=== Updated Standings After This Goal ===\n")
        display_columns = ['team', 'total_points', 'total_goals_scored', 'total_goals_conceded', 
                           'total_goal_difference', 'last_game_points', 'last_game_standing', 
                           'changes', '1st', '2nd', '3rd', '4th']
        print(group_goals_tracking[display_columns].to_string(index=False))
        print("\n========================================\n")

    # Step 13: Return the final DataFrame
    return group_goals_tracking


In [291]:
# Call the function for "2022 FIFA World Cup" and "Group E"
tournament_name = "2022 FIFA World Cup"
group_name = "Group E"

result = process_goals_for_group_specific(tournament_name, group_name, all_games_before_last, goals_last_day_sorted)

# Display just the first n columns of the result
print(f"Final results for {tournament_name} - {group_name}:")
display(result)


Analyzing goal: 10 minute, 0 extra time, Player team: Germany, Home: Costa Rica, Away: Germany, Own goal: 0

=== Updated Standings After This Goal ===

      team  total_points  total_goals_scored  total_goals_conceded  total_goal_difference  last_game_points  last_game_standing  changes  1st  2nd  3rd  4th
     Spain             5                   8                     1                      7                 1                   1        0    0    0    0    0
   Germany             4                   3                     3                      0                 3                   2        1    0    1    0    0
     Japan             4                   2                     2                      0                 1                   3        1    0    0    1    0
Costa Rica             3                   1                     8                     -7                 0                   4        1    0    0    0    1


Analyzing goal: 11 minute, 0 extra time, Player team: Spain, 

Unnamed: 0,tournament_name,group_name,team,goals_scored,goals_conceded,points,goals_difference,total_matches,tiebreaker,standing,...,total_goals_conceded,total_goal_difference,last_game_points,total_points,last_game_standing,1st,2nd,3rd,4th,changes
17,2022 FIFA World Cup,Group E,Japan,2,2,3,0,2,no need,2,...,3,1,3,6,1,1,0,1,0,2
16,2022 FIFA World Cup,Group E,Spain,8,1,4,7,2,no need,1,...,3,6,0,4,2,0,2,1,0,3
19,2022 FIFA World Cup,Group E,Germany,2,3,1,-1,2,no need,4,...,5,1,3,4,3,0,1,2,1,4
18,2022 FIFA World Cup,Group E,Costa Rica,1,7,3,-6,2,no need,3,...,11,-8,0,3,4,0,1,2,2,5


In [292]:
def process_goals_for_group_specific(tournament_name, group_name, all_games_before_last, goals_last_day_sorted, agg_goals_before_last_day):
    # Step 1: Filter the data for the specific tournament_name and group_name
    group_goals_tracking = all_games_before_last[
        (all_games_before_last['tournament_name'] == tournament_name) & 
        (all_games_before_last['group_name'] == group_name)
    ].copy()

    group_goals_last_day = goals_last_day_sorted[
        (goals_last_day_sorted['tournament_name'] == tournament_name) & 
        (goals_last_day_sorted['group_name'] == group_name)
    ]

    # Step 2: Initialize columns for tracking team performance
    group_goals_tracking['before_last_game_goals_scored'] = group_goals_tracking['goals_scored']
    group_goals_tracking['before_last_game_goals_conceded'] = group_goals_tracking['goals_conceded']
    group_goals_tracking['before_last_game_standing'] = group_goals_tracking['standing']
    group_goals_tracking['before_last_game_points'] = group_goals_tracking['points']

    group_goals_tracking['last_game_goals_scored'] = 0
    group_goals_tracking['last_game_goals_conceded'] = 0
    group_goals_tracking['total_goals_scored'] = group_goals_tracking['before_last_game_goals_scored']
    group_goals_tracking['total_goals_conceded'] = group_goals_tracking['before_last_game_goals_conceded']
    group_goals_tracking['total_goal_difference'] = group_goals_tracking['total_goals_scored'] - group_goals_tracking['total_goals_conceded']
    group_goals_tracking['last_game_points'] = 0
    group_goals_tracking['total_points'] = group_goals_tracking['before_last_game_points']
    group_goals_tracking['last_game_standing'] = 0
    group_goals_tracking['1st'] = 0
    group_goals_tracking['2nd'] = 0
    group_goals_tracking['3rd'] = 0
    group_goals_tracking['4th'] = 0
    group_goals_tracking['changes'] = 0  # Initialize this but we will redefine it later as the sum of 1st, 2nd, 3rd, 4th
    group_goals_tracking['tied'] = False  # Initialize a flag to track tied teams
    group_goals_tracking['tied_won'] = 0  # Initialize to track if the team won a tie-breaker match

    # Step 3: Sort group_goals_last_day by minute_regulation in ascending order
    group_goals_last_day = group_goals_last_day.sort_values(by=['minute_regulation', 'minute_stoppage'], ascending=[True, True])

    # Step 4: Iterate through the sorted and filtered last match goals and update the goals_tracking table
    previous_standings = group_goals_tracking['last_game_standing'].copy()
    first_iteration = True  # Variable to track the first iteration

    for _, goal in group_goals_last_day.iterrows():
        home_team = goal['home']
        away_team = goal['away']
        player_team = goal['player_team_name']
        own_goal = goal['own_goal']

        # Print goal information for each goal
        print(f"Analyzing goal: {goal['minute_regulation']} minute, {goal['minute_stoppage']} extra time, Player team: {player_team}, Home: {home_team}, Away: {away_team}, Own goal: {own_goal}")

        # Update the goals based on who scored the goal (own goal or normal goal)
        if own_goal == 0:  # Normal goal
            if player_team == home_team:
                # Home team scored, update home scored and away conceded
                group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'last_game_goals_scored'] += 1
                group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'last_game_goals_conceded'] += 1
            elif player_team == away_team:
                # Away team scored, update away scored and home conceded
                group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'last_game_goals_scored'] += 1
                group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'last_game_goals_conceded'] += 1
        elif own_goal == 1:  # Own goal
            if player_team == home_team:
                # Own goal by home team, away team scores
                group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'last_game_goals_scored'] += 1
                group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'last_game_goals_conceded'] += 1
            elif player_team == away_team:
                # Own goal by away team, home team scores
                group_goals_tracking.loc[group_goals_tracking['team'] == home_team, 'last_game_goals_scored'] += 1
                group_goals_tracking.loc[group_goals_tracking['team'] == away_team, 'last_game_goals_conceded'] += 1

        # Step 5: Update total_goals_scored, total_goals_conceded, and total_goal_difference
        group_goals_tracking['total_goals_scored'] = group_goals_tracking['before_last_game_goals_scored'] + group_goals_tracking['last_game_goals_scored']
        group_goals_tracking['total_goals_conceded'] = group_goals_tracking['before_last_game_goals_conceded'] + group_goals_tracking['last_game_goals_conceded']
        group_goals_tracking['total_goal_difference'] = group_goals_tracking['total_goals_scored'] - group_goals_tracking['total_goals_conceded']

        # Step 6: Assign points for the last game dynamically after each goal
        for i, row in group_goals_tracking.iterrows():
            if row['last_game_goals_scored'] > row['last_game_goals_conceded']:
                group_goals_tracking.loc[i, 'last_game_points'] = 3  # Win
            elif row['last_game_goals_scored'] == row['last_game_goals_conceded']:
                group_goals_tracking.loc[i, 'last_game_points'] = 1  # Draw
            else:
                group_goals_tracking.loc[i, 'last_game_points'] = 0  # Loss

        # Step 7: Update total points
        group_goals_tracking['total_points'] = group_goals_tracking['before_last_game_points'] + group_goals_tracking['last_game_points']

        # Step 8: Mark teams that are tied
        group_goals_tracking['tied'] = group_goals_tracking.duplicated(subset=['total_points', 'total_goal_difference', 'total_goals_scored'], keep=False)

        # Reset `tied_won` to 0 for all teams
        group_goals_tracking['tied_won'] = 0

        # Step 8b: Calculate tied_won only for tied teams
        tied_teams = group_goals_tracking[group_goals_tracking['tied']]

        if not tied_teams.empty:
            # Iterate over tied teams to resolve standings using head-to-head results
            for index, row in tied_teams.iterrows():
                team1 = row['team']
                team1_index = index

                # Look for other teams tied with this team
                for other_index, other_row in tied_teams[tied_teams.index != index].iterrows():
                    team2 = other_row['team']
                    team2_index = other_index

                    # Check if these two teams played against each other in agg_goals_before_last_day
                    match = agg_goals_before_last_day[
                        ((agg_goals_before_last_day['home'] == team1) & (agg_goals_before_last_day['away'] == team2)) |
                        ((agg_goals_before_last_day['home'] == team2) & (agg_goals_before_last_day['away'] == team1))
                    ]

                    if not match.empty:
                        match_result = match.iloc[0]['won']  # Assume the 'won' column holds 1 for home win, -1 for away win, 0 for draw

                        # Resolve the tie using the match result
                        if match_result == 1:
                            # Home team won
                            if match.iloc[0]['home'] == team1:
                                group_goals_tracking.loc[group_goals_tracking['team'] == team1, 'tied_won'] = 1
                            else:
                                group_goals_tracking.loc[group_goals_tracking['team'] == team2, 'tied_won'] = 1
                        elif match_result == -1:
                            # Away team won
                            if match.iloc[0]['away'] == team1:
                                group_goals_tracking.loc[group_goals_tracking['team'] == team1, 'tied_won'] = 1
                            else:
                                group_goals_tracking.loc[group_goals_tracking['team'] == team2, 'tied_won'] = 1
                        elif match_result == 0:
                            # Draw, both teams get the same standing
                            group_goals_tracking.loc[group_goals_tracking['team'] == team1, 'tied_won'] = 0
                            group_goals_tracking.loc[group_goals_tracking['team'] == team2, 'tied_won'] = 0

        # Step 8c: Sort teams by total points, goal difference, goals scored, and tied_won
        group_goals_tracking = group_goals_tracking.sort_values(by=['total_points', 'total_goal_difference', 'total_goals_scored', 'tied_won'],
                                                                ascending=[False, False, False, False])

        # Step 9: Assign standings based on the sorting and tie resolution
        group_goals_tracking['last_game_standing'] = group_goals_tracking.reset_index(drop=True).index + 1

        # Step 10: Track changes and update standing positions after each goal is processed
        for i, row in group_goals_tracking.iterrows():
            team = row['team']

            if first_iteration:
                # Skip the update if the standing did not change
                if row['before_last_game_standing'] == row['last_game_standing']:
                    continue  # Skip updating the counters
                else:
                    # Update the position counters since standing has changed
                    if row['last_game_standing'] == 1:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '1st'] += 1
                    elif row['last_game_standing'] == 2:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '2nd'] += 1
                    elif row['last_game_standing'] == 3:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '3rd'] += 1
                    elif row['last_game_standing'] == 4:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '4th'] += 1

                # Disable first iteration flag after the first goal
                first_iteration = False
            else:
                # Track the standing position counters only if the position differs from the previous state
                if row['last_game_standing'] != previous_standings[i]:  
                    if row['last_game_standing'] == 1:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '1st'] += 1
                    elif row['last_game_standing'] == 2:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '2nd'] += 1
                    elif row['last_game_standing'] == 3:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '3rd'] += 1
                    elif row['last_game_standing'] == 4:
                        group_goals_tracking.loc[group_goals_tracking['team'] == team, '4th'] += 1

        # Update previous standings after each goal
        previous_standings = group_goals_tracking['last_game_standing'].copy()

        # Step 11: Calculate changes as the sum of 1st, 2nd, 3rd, and 4th
        group_goals_tracking['changes'] = group_goals_tracking[['1st', '2nd', '3rd', '4th']].sum(axis=1)

        # Step 12: Print the updated group_goals_tracking after processing each goal
        print("\n=== Updated Standings After This Goal ===\n")
        display_columns = ['team', 'total_points', 'total_goals_scored', 'total_goals_conceded', 
                           'total_goal_difference', 'last_game_points', 'last_game_standing', 
                           'changes', '1st', '2nd', '3rd', '4th', 'tied', 'tied_won']
        print(group_goals_tracking[display_columns].to_string(index=False))
        print("\n========================================\n")

    # Step 13: Return the final DataFrame
    return group_goals_tracking


In [293]:
tournament_name = "2022 FIFA World Cup"
group_name = "Group E"

# Ensure you have the required 'agg_goals_before_last_day' data available
# result = process_goals_for_group_specific(tournament_name, group_name, all_games_before_last, goals_last_day_sorted)

# Updated function call
result = process_goals_for_group_specific(tournament_name, group_name, all_games_before_last, goals_last_day_sorted, agg_goals_before_last_day)

# Display just the first n columns of the result
print(f"Final results for {tournament_name} - {group_name}:")
print(result.iloc[:, :2].join(result.iloc[:, -4:]))


Analyzing goal: 10 minute, 0 extra time, Player team: Germany, Home: Costa Rica, Away: Germany, Own goal: 0

=== Updated Standings After This Goal ===

      team  total_points  total_goals_scored  total_goals_conceded  total_goal_difference  last_game_points  last_game_standing  changes  1st  2nd  3rd  4th  tied  tied_won
     Spain             5                   8                     1                      7                 1                   1        0    0    0    0    0 False         0
   Germany             4                   3                     3                      0                 3                   2        1    0    1    0    0 False         0
     Japan             4                   2                     2                      0                 1                   3        1    0    0    1    0 False         0
Costa Rica             3                   1                     8                     -7                 0                   4        1    0    0    0    1