In [121]:
import json
import matplotlib.pyplot as plt
import pandas as pd


In [2]:
with open('../../data/whoscored/data.json', 'r') as file:
    data = json.load(file)


In [4]:
events = [event for event in data['matchCentreData']['events']]
len(events)

1653

In [330]:
def calculate_ball_control(df, half, start=None, end=None):
    df_half = df[df['period_displayName'] == half]

    start_half_time = df_half.iloc[0].minute
    end_half_time =  df_half.iloc[-1].minute

    print(start_half_time, end_half_time)
    
    expanded_time = 0
    if half == 'SecondHalf':
        expanded_time = df_half.iloc[0].expandedMinute - 45
        
    if start is not None:
        print(start, start_half_time)
        if start < start_half_time or start > end_half_time:
            print(f"Invalid start time {start}. Half time: ({start_half_time} - {end_half_time})")
            return
            
        start_time_sec_exp = (start + expanded_time) * 60
    else:
        start_time_sec_exp = df_half.iloc[0].tot_seconds
        
    if end is not None:
        if end < start_half_time or end > end_half_time or end < start:
            print(f"Invalid end time {end}. Half time: ({start_half_time} - {end_half_time})")
            return
            
        end_time_sec_exp = (end + expanded_time) * 60
    else:
        end_time_sec_exp = df_half.iloc[-1].tot_seconds

    print(start_time_sec_exp, end_time_sec_exp, (end_time_sec_exp-start_time_sec_exp)/60)
        
    
    control_time_start = start_time_sec_exp
    control_team = ''
    control_time = {'Barcelona' : 0,
                    'Real Madrid' : 0}

    for _, row in df_half[df_half['tot_seconds'] > start_time_sec_exp].iterrows():
        # Clearance significantly worsen results 

        if row['tot_seconds'] > end_time_sec_exp:
            break
        
        # Check for events that contribute to ball control
        if row['type_displayName'] in ['Pass', 'TakeOn', 'KeeperPickup', 'ShieldBallOpp', 'Dispossessed', 'Save', 
                                       'Smother', 'BallRecovery', 'ShotOnPost', 'SavedShot', 'BlockedPass', 'BallTouch', 'MissedShots'] \
        or (row['type_displayName'] in ['Challenge', 'Aerial'] and row['outcomeType_displayName'] == 'Successful'):
            if control_team != row['team_name']:
                if control_team == '':
                    control_team = row['team_name']
                    control_time_start = row['tot_seconds']
                else:
                    control_time[control_team] += row['tot_seconds'] - control_time_start
                    control_time_start = row['tot_seconds']
                    control_team = row['team_name']

        if row['type_displayName'] in ['Goal', 'Foul', 'Card', 'CornerAwarded'] \
            and control_team != '' \
            and row['outcomeType_displayName'] == 'Successful':
            control_time[control_team] += row['tot_seconds'] - control_time_start
            control_time_start = row['tot_seconds']
            control_team = ''



    control_time[control_team] += end_time_sec_exp - control_time_start

    print(control_time)
    sec_total = control_time['Barcelona'] + control_time['Real Madrid']
    control_time['Barcelona'] = round(control_time['Barcelona'] / sec_total * 100)
    control_time['Real Madrid'] = round(control_time['Real Madrid'] / sec_total * 100)
    
    return control_time 
        

In [334]:
calculate_ball_control(events_df, 'SecondHalf', 52, 60)

45 96
52 45
3300 3780 8.0
{'Barcelona': 198, 'Real Madrid': 225}


{'Barcelona': 47, 'Real Madrid': 53}

In [299]:
calculate_ball_control(events_df, 'SecondHalf')

45 96
2880 5944 51.06666666666667
{'Barcelona': 1468, 'Real Madrid': 1595}


{'Barcelona': 48, 'Real Madrid': 52}

In [74]:
events_df[events_df['period_displayName'] == 'SecondHalf'].iloc[0].tot_seconds

2880

In [347]:
events_df = pd.DataFrame(pd.json_normalize(events, sep='_'))
events_df = events_df.drop(columns=['period_value', 'type_value', 'outcomeType_value', 'cardType_value', 'id'])

events_df['team_name'] = ''
events_df.loc[events_df['teamId'] == data['matchCentreData']['home']['teamId'], 'team_name'] = 'Barcelona'
events_df.loc[events_df['teamId'] == data['matchCentreData']['away']['teamId'], 'team_name'] = 'Real Madrid'

events_df.loc[events_df['second'].isna(), 'second'] = 0
events_df['tot_seconds'] = events_df.apply(lambda row: int(row['expandedMinute']) * 60 + int(row['second']), axis=1)
events_df = events_df.sort_values(by=['tot_seconds'])
print(events_df.columns)

Index(['eventId', 'minute', 'second', 'teamId', 'x', 'y', 'expandedMinute',
       'qualifiers', 'satisfiedEventsTypes', 'isTouch', 'period_displayName',
       'type_displayName', 'outcomeType_displayName', 'playerId', 'endX',
       'endY', 'goalMouthZ', 'goalMouthY', 'isGoal', 'isShot',
       'relatedEventId', 'relatedPlayerId', 'blockedX', 'blockedY',
       'cardType_displayName', 'team_name', 'tot_seconds'],
      dtype='object')


In [343]:
def calculate_ball_control(df, half, start=None, end=None):


    df_half = df[df['period_displayName'] == half]

    start_half_time = df_half.iloc[0].minute
    end_half_time =  df_half.iloc[-1].minute

    # print(start_half_time, end_half_time)

    # 2nd time starts from 45 min, we need to calculate margin from additional time in 1st half
    expanded_time = 0
    if half == 'SecondHalf':
        expanded_time = df_half.iloc[0].expandedMinute - 45
        
    if start is not None:
        # print(start, start_half_time)
        if start < start_half_time or start > end_half_time:
            print(f"Invalid start time {start}. Half time: ({start_half_time} - {end_half_time})")
            return
            
        start_time_sec_exp = (start + expanded_time) * 60
    else:
        start_time_sec_exp = df_half.iloc[0].tot_seconds
        
    if end is not None:
        if end < start_half_time or end > end_half_time or end < start:
            print(f"Invalid end time {end}. Half time: ({start_half_time} - {end_half_time})")
            return
            
        end_time_sec_exp = (end + expanded_time) * 60
    else:
        end_time_sec_exp = df_half.iloc[-1].tot_seconds        
    
    control_time_start = start_time_sec_exp
    control_team = ''
    control_time = {'Barcelona' : 0,
                    'Real Madrid' : 0}

    for _, row in df_half[df_half['tot_seconds'] > start_time_sec_exp].iterrows():
        # Clearance significantly worsen results 

        if row['tot_seconds'] > end_time_sec_exp:
            break
        # Check for events that contribute to ball control
        if row['type_displayName'] in ['Pass', 'TakeOn', 'KeeperPickup', 'ShieldBallOpp', 'Dispossessed', 'Save', 
                                       'Smother', 'BallRecovery', 'ShotOnPost', 'SavedShot', 'BlockedPass', 'BallTouch', 'MissedShots'] \
        or (row['type_displayName'] in ['Challenge', 'Aerial'] and row['outcomeType_displayName'] == 'Successful'):
            if control_team != row['team_name']:
                if control_team == '':
                    control_team = row['team_name']
                    control_time_start = row['tot_seconds']
                else:
                    control_time[control_team] += row['tot_seconds'] - control_time_start
                    control_time_start = row['tot_seconds']
                    control_team = row['team_name']
        # Check for events that reset ball control
        # if row['type_displayName'] in ['Goal', 'Foul', 'Card', 'CornerAwarded'] \
        #     and control_team != '' \
        #     and row['outcomeType_displayName'] == 'Successful':
        #     control_time[control_team] += row['tot_seconds'] - control_time_start
        #     control_time_start = row['tot_seconds']
        #     control_team = ''

    control_time[control_team] += end_time_sec_exp - control_time_start

    # Calculate and update ball control percentages for each team
    sec_total = control_time['Barcelona'] + control_time['Real Madrid']
    control_time['Barcelona'] = round(control_time['Barcelona'] / sec_total * 100)
    control_time['Real Madrid'] = round(control_time['Real Madrid'] / sec_total * 100)
    
    return control_time 
        

In [345]:
calculate_ball_control(events_df, 'FirstHalf')

{'Barcelona': 55, 'Real Madrid': 45}

{'Barcelona': 48, 'Real Madrid': 52}