# Libraries

In [58]:
import pandas as pd
from getpass import getuser

# Load and inspect dataset

In [59]:
# Get the current user's name
user = getuser()

# Path to the dataset
data_path = rf'C:\Users\{user}\Documents\GitHub\tiebreak_wc\data.csv'

# Read the dataset with a different encoding
df = pd.read_csv(data_path, encoding='ISO-8859-1')




# Extract relevant columns

In [60]:
# Extract relevant columns for goal events and match results
goals_df = df[['tournament_name', 'group_name','match_name', 'match_id', 'player_team_name','match_date', 'minute_regulation', 
               'team_id', 'own_goal']]

# Display the first few rows to see what we extracted
goals_df.head()



Unnamed: 0,tournament_name,group_name,match_name,match_id,player_team_name,match_date,minute_regulation,team_id,own_goal
0,1930 FIFA World Cup,Group 1,France v Mexico,M-1930-01,France,7/13/1930,19,T-28,0
1,1930 FIFA World Cup,Group 1,France v Mexico,M-1930-01,France,7/13/1930,40,T-28,0
2,1930 FIFA World Cup,Group 1,France v Mexico,M-1930-01,France,7/13/1930,43,T-28,0
3,1930 FIFA World Cup,Group 1,France v Mexico,M-1930-01,Mexico,7/13/1930,70,T-44,0
4,1930 FIFA World Cup,Group 1,France v Mexico,M-1930-01,France,7/13/1930,87,T-28,0


In [61]:
# Convert 'match_date' to datetime format
goals_df['match_date'] = pd.to_datetime(goals_df['match_date'], format='%m/%d/%Y')

# Sort the dataset by 'match_date' in ascending order (oldest first) and 'minute_regulation'
goals_df = goals_df.sort_values(by=['match_date', 'minute_regulation'], ascending=[True, True])

# Display the first few rows to confirm the sorting
goals_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  goals_df['match_date'] = pd.to_datetime(goals_df['match_date'], format='%m/%d/%Y')


Unnamed: 0,tournament_name,group_name,match_name,match_id,player_team_name,match_date,minute_regulation,team_id,own_goal
0,1930 FIFA World Cup,Group 1,France v Mexico,M-1930-01,France,1930-07-13,19,T-28,0
5,1930 FIFA World Cup,Group 4,United States v Belgium,M-1930-02,United States,1930-07-13,23,T-80,0
1,1930 FIFA World Cup,Group 1,France v Mexico,M-1930-01,France,1930-07-13,40,T-28,0
2,1930 FIFA World Cup,Group 1,France v Mexico,M-1930-01,France,1930-07-13,43,T-28,0
6,1930 FIFA World Cup,Group 4,United States v Belgium,M-1930-02,United States,1930-07-13,45,T-80,0


# Calculate dynamically match outcomes after each goal

## Recreate final league table

In [62]:
# Initialize match results ensuring both teams are accounted for
match_results = {}

# Loop through the sorted goal data and update match results
for index, row in goals_df.iterrows():
    match_id = row['match_id']
    group_name = row['group_name']
    match_name = row['match_name']  # This contains the two teams' names
    team_name = row['player_team_name']
    own_goal = row['own_goal']  # Check if it's an own goal
    
    # Extract the two teams playing from the match_name (e.g., "France v Mexico")
    team1, team2 = match_name.split(' v ')
    
    # Initialize the match entry if it's not already present
    if match_id not in match_results:
        match_results[match_id] = {
            'teams': {team1: 0, team2: 0},  # Initialize both teams with 0 goals
            'group_name': group_name
        }
    
    # Determine which team gets the goal
    if own_goal == 1:
        # If it's an own goal, assign the goal to the opponent team
        if team_name == team1:
            match_results[match_id]['teams'][team2] += 1
        else:
            match_results[match_id]['teams'][team1] += 1
    else:
        # Otherwise, assign the goal to the team that scored
        match_results[match_id]['teams'][team_name] += 1

# Display the structure of match results to ensure both teams are included
print(match_results)


{'M-1930-01': {'teams': {'France': 4, 'Mexico': 1}, 'group_name': 'Group 1'}, 'M-1930-02': {'teams': {'United States': 3, 'Belgium': 0}, 'group_name': 'Group 4'}, 'M-1930-04': {'teams': {'Romania': 3, 'Peru': 1}, 'group_name': 'Group 3'}, 'M-1930-03': {'teams': {'Yugoslavia': 2, 'Brazil': 1}, 'group_name': 'Group 2'}, 'M-1930-05': {'teams': {'Argentina': 1, 'France': 0}, 'group_name': 'Group 1'}, 'M-1930-06': {'teams': {'Chile': 3, 'Mexico': 0}, 'group_name': 'Group 1'}, 'M-1930-08': {'teams': {'United States': 3, 'Paraguay': 0}, 'group_name': 'Group 4'}, 'M-1930-07': {'teams': {'Yugoslavia': 4, 'Bolivia': 0}, 'group_name': 'Group 2'}, 'M-1930-09': {'teams': {'Uruguay': 1, 'Peru': 0}, 'group_name': 'Group 3'}, 'M-1930-11': {'teams': {'Argentina': 6, 'Mexico': 3}, 'group_name': 'Group 1'}, 'M-1930-10': {'teams': {'Chile': 1, 'France': 0}, 'group_name': 'Group 1'}, 'M-1930-12': {'teams': {'Brazil': 4, 'Bolivia': 0}, 'group_name': 'Group 2'}, 'M-1930-13': {'teams': {'Paraguay': 1, 'Belgiu

## Calculate and update league table after each goal

In [63]:
from collections import defaultdict

# Initialize a dictionary to store league tables per group and tournament_name
# For each group in a tournament, it holds a dictionary of teams with their statistics.
group_tables = defaultdict(lambda: defaultdict(lambda: {'points': 0, 'goals_scored': 0, 'goals_conceded': 0, 'goal_difference': 0}))