# Libraries

In [70]:
import pandas as pd
from getpass import getuser

# Load and inspect dataset

In [71]:
# Get the current user's name
user = getuser()

# Path to the dataset
data_path = rf'C:\Users\{user}\Documents\GitHub\tiebreak_wc\data.csv'

# Read the dataset with a different encoding
df = pd.read_csv(data_path, encoding='ISO-8859-1')




# Extract relevant columns

In [72]:
# Extract relevant columns for goal events and match results
goals_df = df[['tournament_name', 'group_name','match_name', 'match_id', 'player_team_name','match_date', 'minute_regulation', 
               'team_id', 'own_goal']]

# Display the first few rows to see what we extracted
goals_df.head()



Unnamed: 0,tournament_name,group_name,match_name,match_id,player_team_name,match_date,minute_regulation,team_id,own_goal
0,1930 FIFA World Cup,Group 1,France v Mexico,M-1930-01,France,7/13/1930,19,T-28,0
1,1930 FIFA World Cup,Group 1,France v Mexico,M-1930-01,France,7/13/1930,40,T-28,0
2,1930 FIFA World Cup,Group 1,France v Mexico,M-1930-01,France,7/13/1930,43,T-28,0
3,1930 FIFA World Cup,Group 1,France v Mexico,M-1930-01,Mexico,7/13/1930,70,T-44,0
4,1930 FIFA World Cup,Group 1,France v Mexico,M-1930-01,France,7/13/1930,87,T-28,0


In [73]:
# Convert 'match_date' to datetime format
goals_df['match_date'] = pd.to_datetime(goals_df['match_date'], format='%m/%d/%Y')

# Sort the dataset by 'match_date' in ascending order (oldest first) and 'minute_regulation'
goals_df = goals_df.sort_values(by=['match_date', 'minute_regulation'], ascending=[True, True])

# Display the first few rows to confirm the sorting
goals_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  goals_df['match_date'] = pd.to_datetime(goals_df['match_date'], format='%m/%d/%Y')


Unnamed: 0,tournament_name,group_name,match_name,match_id,player_team_name,match_date,minute_regulation,team_id,own_goal
0,1930 FIFA World Cup,Group 1,France v Mexico,M-1930-01,France,1930-07-13,19,T-28,0
5,1930 FIFA World Cup,Group 4,United States v Belgium,M-1930-02,United States,1930-07-13,23,T-80,0
1,1930 FIFA World Cup,Group 1,France v Mexico,M-1930-01,France,1930-07-13,40,T-28,0
2,1930 FIFA World Cup,Group 1,France v Mexico,M-1930-01,France,1930-07-13,43,T-28,0
6,1930 FIFA World Cup,Group 4,United States v Belgium,M-1930-02,United States,1930-07-13,45,T-80,0


# Calculate dynamically match outcomes after each goal

In [74]:
from collections import defaultdict

# Initialize group tables for each tournament and group
group_tables = defaultdict(lambda: defaultdict(lambda: {
    'points': 0,
    'goals_scored': 0,
    'goals_conceded': 0,
    'goal_difference': 0
}))

# Initialize dictionary to track the number of table changes per date, group, and tournament
table_changes = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))

# Store the previous ranking to compare with
previous_ranking = defaultdict(lambda: defaultdict(list))


def update_group_table(group_tables, tournament_name, group_name, match_name, match_results):
    # Extract the match result from match_results using tournament_name and group_name
    teams = match_results[tournament_name][group_name][match_name]  # Get the teams and their scores from the match results
    team1, team2 = list(teams.keys())  # Extract the team names
    goals_team1 = teams[team1]  # Goals scored by team1
    goals_team2 = teams[team2]  # Goals scored by team2
    
    # Ensure both teams are initialized in the group_tables
    if team1 not in group_tables[tournament_name][group_name]:
        group_tables[tournament_name][group_name][team1] = {'points': 0, 'goals_scored': 0, 'goals_conceded': 0, 'goal_difference': 0}
    
    if team2 not in group_tables[tournament_name][group_name]:
        group_tables[tournament_name][group_name][team2] = {'points': 0, 'goals_scored': 0, 'goals_conceded': 0, 'goal_difference': 0}
    
    # Ensure that no unwanted keys like 'points', 'goals_scored', etc. exist as teams
    if 'points' in group_tables[tournament_name][group_name]:
        del group_tables[tournament_name][group_name]['points']
    if 'goals_scored' in group_tables[tournament_name][group_name]:
        del group_tables[tournament_name][group_name]['goals_scored']
    if 'goals_conceded' in group_tables[tournament_name][group_name]:
        del group_tables[tournament_name][group_name]['goals_conceded']
    if 'goal_difference' in group_tables[tournament_name][group_name]:
        del group_tables[tournament_name][group_name]['goal_difference']
    
    # Reset goals scored and conceded
    group_tables[tournament_name][group_name][team1]['goals_scored'] = goals_team1
    group_tables[tournament_name][group_name][team1]['goals_conceded'] = goals_team2
    group_tables[tournament_name][group_name][team2]['goals_scored'] = goals_team2
    group_tables[tournament_name][group_name][team2]['goals_conceded'] = goals_team1

    # Update goal difference for both teams
    group_tables[tournament_name][group_name][team1]['goal_difference'] = goals_team1 - goals_team2
    group_tables[tournament_name][group_name][team2]['goal_difference'] = goals_team2 - goals_team1
    
    # Update points based on the match result
    if goals_team1 > goals_team2:
        # Team 1 wins, 3 points for team 1
        group_tables[tournament_name][group_name][team1]['points'] = 3
        group_tables[tournament_name][group_name][team2]['points'] = 0
    elif goals_team1 < goals_team2:
        # Team 2 wins, 3 points for team 2
        group_tables[tournament_name][group_name][team2]['points'] = 3
        group_tables[tournament_name][group_name][team1]['points'] = 0
    else:
        # It's a draw, 1 point for each team
        group_tables[tournament_name][group_name][team1]['points'] = 1
        group_tables[tournament_name][group_name][team2]['points'] = 1

# Function to update the group table and count changes with added debug information
def update_group_table_and_count_changes(group_tables, table_changes, previous_ranking, match_results, tournament_name, group_name, match_date):
    teams = group_tables[tournament_name][group_name].keys()
    
    # Debug: Print team and its corresponding values to check the structure
    print(f"Checking teams for {tournament_name}, {group_name}:")
    for team in teams:
        print(f"Team: {team}, Data: {group_tables[tournament_name][group_name][team]}")
    
    # Sort teams by points, goal difference, and goals scored (World Cup tie-breaking rules)
    current_ranking = sorted(teams, key=lambda team: (
        group_tables[tournament_name][group_name][team]['points'],
        group_tables[tournament_name][group_name][team]['goal_difference'],
        group_tables[tournament_name][group_name][team]['goals_scored']
    ), reverse=True)
    
    # Initialize a variable to track the number of changes
    changes = 0
    
    # Compare the current ranking with the previous one to detect changes
    if previous_ranking[tournament_name][group_name] != current_ranking:
        # Increment the number of changes for this date, group, and tournament
        table_changes[tournament_name][group_name][match_date] += 1
        # Count how many teams' positions have changed
        changes = sum(1 for prev, curr in zip(previous_ranking[tournament_name][group_name], current_ranking) if prev != curr)
        # Update the previous ranking to the current one
        previous_ranking[tournament_name][group_name] = current_ranking
    
    return changes



# Main loop to process the goals
match_results = defaultdict(lambda: defaultdict(lambda: {}))

# Loop through each goal in the sorted dataset
for index, row in goals_df.iterrows():
    match_id = row['match_id']
    group_name = row['group_name']
    tournament_name = row['tournament_name']
    match_date = row['match_date']  # Track changes per date
    
    team_name = row['player_team_name']
    own_goal = row['own_goal']
    
    # Extract the two teams from the match_name (e.g., "France v Mexico")
    match_name = row['match_name']
    team1, team2 = match_name.split(' v ')
    
    # Ensure the tournament and group are initialized in match_results
    if tournament_name not in match_results:
        match_results[tournament_name] = defaultdict(lambda: {})

    if group_name not in match_results[tournament_name]:
        match_results[tournament_name][group_name] = {}

    # Initialize the match result if not already present
    if match_name not in match_results[tournament_name][group_name]:
        match_results[tournament_name][group_name][match_name] = {team1: 0, team2: 0}
    
    # Assign the goal to the correct team
    if own_goal == 1:
        if team_name == team1:
            match_results[tournament_name][group_name][match_name][team2] += 1
        else:
            match_results[tournament_name][group_name][match_name][team1] += 1
    else:
        match_results[tournament_name][group_name][match_name][team_name] += 1
    
    # Update group table for the match results
    update_group_table(group_tables, tournament_name, group_name, match_name, match_results)
    
    # Track changes in the league table for the current match date and store in "changes"
    changes = update_group_table_and_count_changes(
        group_tables, table_changes, previous_ranking, match_results, tournament_name, group_name, match_date
    )
    
    # Print or store the value of "changes" for analysis
    print(f"Date: {match_date}, Tournament: {tournament_name}, Group: {group_name}, Changes: {changes}")


Checking teams for 1930 FIFA World Cup, Group 1:
Team: France, Data: {'points': 3, 'goals_scored': 1, 'goals_conceded': 0, 'goal_difference': 1}
Team: Mexico, Data: {'points': 0, 'goals_scored': 0, 'goals_conceded': 1, 'goal_difference': -1}
Date: 1930-07-13 00:00:00, Tournament: 1930 FIFA World Cup, Group: Group 1, Changes: 0
Checking teams for 1930 FIFA World Cup, Group 4:
Team: United States, Data: {'points': 3, 'goals_scored': 1, 'goals_conceded': 0, 'goal_difference': 1}
Team: Belgium, Data: {'points': 0, 'goals_scored': 0, 'goals_conceded': 1, 'goal_difference': -1}
Date: 1930-07-13 00:00:00, Tournament: 1930 FIFA World Cup, Group: Group 4, Changes: 0
Checking teams for 1930 FIFA World Cup, Group 1:
Team: France, Data: {'points': 3, 'goals_scored': 2, 'goals_conceded': 0, 'goal_difference': 2}
Team: Mexico, Data: {'points': 0, 'goals_scored': 0, 'goals_conceded': 2, 'goal_difference': -2}
Date: 1930-07-13 00:00:00, Tournament: 1930 FIFA World Cup, Group: Group 1, Changes: 0
Check

In [75]:
# Convert table_changes from a nested dictionary to a list of rows for DataFrame
table_changes_list = []

for tournament, groups in table_changes.items():
    for group, dates in groups.items():
        for date, changes in dates.items():
            table_changes_list.append({
                'Tournament': tournament,
                'Group': group,
                'Date': date,
                'Changes': changes
            })

# Convert the list to a DataFrame
table_changes_df = pd.DataFrame(table_changes_list)


In [76]:
# Remove rows where 'Group' is 'not applicable'
table_changes_df = table_changes_df[table_changes_df['Group'] != 'not applicable']

In [77]:
output_path = rf'C:\Users\{user}\Documents\GitHub\tiebreak_wc\table_changes.xlsx'
# Save the DataFrame as an Excel file
table_changes_df.to_excel(output_path, index=False)

print(f"File successfully saved at {output_path}")


File successfully saved at C:\Users\aldi\Documents\GitHub\tiebreak_wc\table_changes.xlsx
