### Metrics

In [1]:
import pandas as pd
import json
import os
import numpy as np

pd.options.display.max_columns = 999

Read datasets

In [2]:
# load tracking data
current_directory = os.getcwd()
path_tracking = os.path.join(os.path.join(os.path.dirname(current_directory),'data'),"tracking_set_0")
print(path_tracking)
game_id = 1

df_tracking = pd.read_parquet(f'{path_tracking}/{game_id}_tracking.parquet')

#           ------------------------------------------------------------        

# load events names
path_event_csv = os.path.join(os.path.dirname(current_directory),'data')
df_event_names = pd.read_csv(os.path.join(path_event_csv,'event_names.csv'))
dict_event_names = df_event_names.set_index('event_type_id').to_dict()['event_description']

# load event data
def load_event_data(file_name, base_path):
    # read in event file
    with open(f'{base_path}/{file_name}') as f:
        data=json.loads(f.read())

    f.close()
    
    # transform data into pandas dataframe
    df_events = pd.json_normalize(data['liveData']['event'])
    
    # preprocess event data and keep relevant information only

    # add timeelapsed to each event
    df_events['timestamp'] = pd.to_datetime(df_events.timeStamp).apply(lambda x: x.timestamp())

    df_events = df_events.query('periodId in [1,2]')

    def add_timeelapsed_to_events(df):
        start_time = df.query('typeId==32')['timestamp'].iloc[0]
        df['timestamp_new'] = np.int64((df['timestamp'] - start_time)*1000)

        df['timeelapsed'] = df['timestamp_new'].apply(lambda x: (40 * round(x/40))/1000)

        return df

    df_events = df_events.groupby('periodId').apply(add_timeelapsed_to_events)

    df_events = df_events.drop(columns=['timeStamp','timestamp','timestamp_new'])
    
    # rename some columns
    df_events = df_events.rename(columns=
        {
            'periodId':'current_phase',
            'typeId':'event_type_id',
            'timeMin':'period_minute',
            'timeSec':'period_second'
        }
    )
    
    return df_events

path_events = os.path.join(os.path.join(os.path.dirname(current_directory),'data'),"first_10_events")
print(path_events)

event_file = f'{game_id}.json'

df_events = load_event_data(
    base_path=path_events,
    file_name=event_file
)

# add event descriptions
df_events['event_description'] = df_events['event_type_id'].map(dict_event_names)

# make a copy of it for later usage
events_all = df_events.copy()

display(df_events.head())
print(df_events['event_description'].unique())


#           ------------------------------------------------------------        


# read in qualifier list
path_data = os.path.join(os.path.dirname(current_directory),'data')
qualifier_names = pd.read_csv(os.path.join(path_data,"qualifier_names.csv"))

# explode coverts each element in each list to a separate row
cols = ['id', 'qualifier']
qualifiers = events_all[cols].explode('qualifier')
display(qualifiers.head())

print("------------")

qualifiers = qualifiers[qualifiers.qualifier.notna()].reset_index(drop=True)
print(qualifiers.shape)
print("------------")
display(qualifiers.head())
print("------------")

# save corresponding event ids for each qualifier
event_ids = qualifiers.id.tolist()

qualifiers = pd.json_normalize(qualifiers[qualifiers.qualifier.notna()]['qualifier'])
print(qualifiers.shape)
print("------------")
display(qualifiers.head())
print("------------")

qualifiers['event_id'] = event_ids
display(qualifiers.head())
print("------------")
qualifiers = qualifiers.merge(qualifier_names, how='left', on='qualifierId')
display(qualifiers.head())

c:\Users\Gabriel\OneDrive\Escritorio\SportsAnalyticsCourse\OptaForum\OptaChallenge_Clustering_Player_Styles\data\tracking_set_0
c:\Users\Gabriel\OneDrive\Escritorio\SportsAnalyticsCourse\OptaForum\OptaChallenge_Clustering_Player_Styles\data\first_10_events


To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  df_events = df_events.groupby('periodId').apply(add_timeelapsed_to_events)


Unnamed: 0,id,eventId,event_type_id,current_phase,period_minute,period_second,contestantId,outcome,x,y,lastModified,qualifier,playerId,lineBreakingPass.linesBroken.value,passOption.player,passTarget.player,xThreat.applied,lineBreakingPass.lastLineBroken.value,pressure.pressureReceived.value,pressure.player,xThreat.removed,keyPass,assist,timeelapsed,event_description
2,2423549045,2,32,1,0,0,3c3jcs7vc1t6vz5lev162jyv7,1,0.0,0.0,2022-05-22T03:17:52Z,"[{'id': 3586084711, 'qualifierId': 127, 'value...",,,,,,,,,,,,0.0,Period start
3,2423549041,2,32,1,0,0,bx0cdmzr2gwr70ez72dorx82p,1,0.0,0.0,2022-05-21T18:59:34Z,"[{'id': 3586084701, 'qualifierId': 127, 'value...",,,,,,,,,,,,0.0,Period start
4,2423549063,3,1,1,0,0,bx0cdmzr2gwr70ez72dorx82p,1,49.9,50.0,2022-05-22T03:34:41Z,"[{'id': 3586084825, 'qualifierId': 56, 'value'...",6u2ob6fv950r1qve8uejkq2uh,,,,,,,,,,,0.04,Pass
5,2423549097,4,1,1,0,2,bx0cdmzr2gwr70ez72dorx82p,1,31.5,57.2,2022-05-22T06:37:07Z,"[{'id': 3586085043, 'qualifierId': 213, 'value...",azuc3tma44xyrbgf5y279o1xx,0.0,"[{'playerId': 'e3kdoxu1kwn2w3wwi1rqhvr9x', 'sh...","[{'playerId': '7sep6mx2s67mh5fr3raxu7aei', 'sh...",0.0029771626,,,,,,,2.84,Pass
6,2423549113,5,1,1,0,7,bx0cdmzr2gwr70ez72dorx82p,1,49.2,95.4,2022-05-22T06:37:06Z,"[{'id': 3586085129, 'qualifierId': 212, 'value...",7sep6mx2s67mh5fr3raxu7aei,1.0,"[{'playerId': '5qgc6zjc38a5xjl35gs7h3vu1', 'sh...","[{'playerId': 'e3kdoxu1kwn2w3wwi1rqhvr9x', 'sh...",0.0309752524,secondToLast,high,"[{'playerId': 'e6ok0deqkoe80184iu509gzu2', 'sh...",,,,7.88,Pass


['Period start' 'Pass' 'Take On' 'Challenge' 'Blocked Pass'
 'Ball recovery' 'Attempted Tackle' 'Out' 'Ball touch' '50/50'
 'Dispossessed' 'Tackle' 'Corner Awarded' 'Clearance' 'Offside Pass'
 'Offside provoked' 'Foul' 'Aerial' 'Keeper pick-up' 'Deleted event'
 'Interception' 'Error' 'Goal' 'Attempt Saved' 'Save' 'Miss' 'Claim'
 'Card' 'Start delay' 'End delay' 'Referee Drop Ball' nan 'End'
 'Player Off' 'Player on' 'Formation change' 'Keeper Sweeper'
 'Shield ball opp']


Unnamed: 0,id,qualifier
2,2423549045,"{'id': 3586084711, 'qualifierId': 127, 'value'..."
3,2423549041,"{'id': 3586084701, 'qualifierId': 127, 'value'..."
4,2423549063,"{'id': 3586084825, 'qualifierId': 56, 'value':..."
4,2423549063,"{'id': 3586084833, 'qualifierId': 213, 'value'..."
4,2423549063,"{'id': 3586084827, 'qualifierId': 140, 'value'..."


------------
(9430, 2)
------------


Unnamed: 0,id,qualifier
0,2423549045,"{'id': 3586084711, 'qualifierId': 127, 'value'..."
1,2423549041,"{'id': 3586084701, 'qualifierId': 127, 'value'..."
2,2423549063,"{'id': 3586084825, 'qualifierId': 56, 'value':..."
3,2423549063,"{'id': 3586084833, 'qualifierId': 213, 'value'..."
4,2423549063,"{'id': 3586084827, 'qualifierId': 140, 'value'..."


------------
(9430, 3)
------------


Unnamed: 0,id,qualifierId,value
0,3586084711,127,Right to Left
1,3586084701,127,Left to Right
2,3586084825,56,Back
3,3586084833,213,2.7
4,3586084827,140,28.5


------------


Unnamed: 0,id,qualifierId,value,event_id
0,3586084711,127,Right to Left,2423549045
1,3586084701,127,Left to Right,2423549041
2,3586084825,56,Back,2423549063
3,3586084833,213,2.7,2423549063
4,3586084827,140,28.5,2423549063


------------


Unnamed: 0,id,qualifierId,value,event_id,qualifier
0,3586084711,127,Right to Left,2423549045,Direction of Play
1,3586084701,127,Left to Right,2423549041,Direction of Play
2,3586084825,56,Back,2423549063,Zone
3,3586084833,213,2.7,2423549063,Angle
4,3586084827,140,28.5,2423549063,Pass End X


Number of passes attempted per player

In [8]:
def event_per_player(df,event):
    df = df[df['playerId'].notna()]
    try:
        df = df[df['event_description']==event]
        return df.groupby('playerId').size().reset_index(name=event)
    except:
        return False

df = event_per_player(df_events,'Pass')
print(df)

                     playerId  Pass
0   2lvit204llltk13iglsa2tjah     2
1   3sc349yey596xp2j6xlyt0frp    54
2   3vx94h32ahujciraspdayj9t6    19
3   4u281v53ges3kimtgac0tidm2    52
4   5ak9fwtqlr2pll0nsv5br7p7u    20
5   5qgc6zjc38a5xjl35gs7h3vu1    36
6   6ekdnbnk56xlxforb5owt3dn9    47
7   6j0ogojh2b7poyceg7i3k09yi    64
8   6u2ob6fv950r1qve8uejkq2uh    59
9   72d5uxwcmvhd6mzthxuvev1sl    38
10  7cp51c8zn7y08iyk0hc9ix5nt    66
11  7k0r5crdh9blj3edt31zwy0dm     1
12  7sep6mx2s67mh5fr3raxu7aei    42
13  8f3bhiy6r5eei1n25exhbwr8p    22
14  8gkexxgf3pypshhqwg6ibp7o4    35
15  8qmm84tue6kuz8e5nhhdhmz8p    41
16  96wcx761pzv5ub4sfwsynp51x    54
17  976riwm0dz0e74d4l28y3ttcl    55
18  a56woizbe4g6jpl3fg4tlgno5    24
19   afymbx9eo87zau8mo99pakbu    41
20  agwvouyocx93y39g7tmwaojx1     5
21  azuc3tma44xyrbgf5y279o1xx    41
22  bvbebtykj45j3luvemk8yc4ph    41
23  ccu7hw3wrcspl1a18g2ldnsh5     6
24  dxb1r4gqgxkngb0pzvfby9iol    13
25  e3kdoxu1kwn2w3wwi1rqhvr9x    14
26  e6ok0deqkoe80184iu509gzu

Number of assist and number of key passes

In [22]:
display(df_passes[df_passes['assist']==True].groupby('playerId').size().reset_index(name='total assist'))
display(df_passes[df_passes['keyPass']==True].groupby('playerId').size().reset_index(name='total assist'))


Unnamed: 0,playerId,total assist
0,6u2ob6fv950r1qve8uejkq2uh,1
1,e6ok0deqkoe80184iu509gzu2,1


Unnamed: 0,playerId,total assist
0,5ak9fwtqlr2pll0nsv5br7p7u,1
1,6j0ogojh2b7poyceg7i3k09yi,1
2,6u2ob6fv950r1qve8uejkq2uh,1
3,7sep6mx2s67mh5fr3raxu7aei,3
4,8gkexxgf3pypshhqwg6ibp7o4,1
5,96wcx761pzv5ub4sfwsynp51x,1
6,976riwm0dz0e74d4l28y3ttcl,1
7,bvbebtykj45j3luvemk8yc4ph,1
8,vja0xo3xiuax8eh0b6q3y09,1


Xthreat 

- 764/1792 values
- 763 pass, 1 offside pass
- Mean values

In [21]:
xthreat = df_events[df_events['xThreat.applied'].notna()]
print(xthreat['event_description'].value_counts())

xthreat = xthreat[xthreat['playerId'].notna()]
xthreat['xThreat.applied'] = pd.to_numeric(xthreat['xThreat.applied'])
xthreat.groupby('playerId')['xThreat.applied'].mean().reset_index(name='xthreat')

Pass            763
Offside Pass      1
Name: event_description, dtype: int64


Unnamed: 0,playerId,xthreat
0,3sc349yey596xp2j6xlyt0frp,0.017285
1,3vx94h32ahujciraspdayj9t6,0.027412
2,4u281v53ges3kimtgac0tidm2,0.014667
3,5ak9fwtqlr2pll0nsv5br7p7u,0.099773
4,5qgc6zjc38a5xjl35gs7h3vu1,0.079592
5,6ekdnbnk56xlxforb5owt3dn9,0.000711
6,6j0ogojh2b7poyceg7i3k09yi,0.046613
7,6u2ob6fv950r1qve8uejkq2uh,0.095785
8,72d5uxwcmvhd6mzthxuvev1sl,0.006289
9,7cp51c8zn7y08iyk0hc9ix5nt,0.008418


Total short and long passes and mean distance of pass by player
- Esta comprovat que tot els pases tenen un qualifier que es length
- Tots els pases tenen un playerId

In [17]:
df_passes = df_events[df_events['event_description']=='Pass']
print(df_passes.shape)
print(len(df_passes['id'].unique()))

# ---------- MEAN DISTANCE PER PLAYER --------------------
df_completo = pd.merge(df_passes, qualifiers[qualifiers['qualifier'] == 'Length'], left_on = 'id',right_on='event_id')
df_completo['value'] = pd.to_numeric(df_completo['value'])
display(df_completo.groupby('playerId')['value'].mean().reset_index(name='mean_distance'))

# ---------- TOTAL SHORT AND LONG PASSES PER PLAYER --------------------

df_completo['long'] = df_completo['value']>25

count = df_completo.groupby('playerId')['long'].value_counts().unstack(fill_value=0)

count.columns = ['Short passes', 'Long passes']
display(count)


(1024, 25)
1024


Unnamed: 0,playerId,mean_distance
0,2lvit204llltk13iglsa2tjah,24.8
1,3sc349yey596xp2j6xlyt0frp,16.642593
2,3vx94h32ahujciraspdayj9t6,12.226316
3,4u281v53ges3kimtgac0tidm2,17.028846
4,5ak9fwtqlr2pll0nsv5br7p7u,18.615
5,5qgc6zjc38a5xjl35gs7h3vu1,18.286111
6,6ekdnbnk56xlxforb5owt3dn9,29.331915
7,6j0ogojh2b7poyceg7i3k09yi,15.4125
8,6u2ob6fv950r1qve8uejkq2uh,16.323729
9,72d5uxwcmvhd6mzthxuvev1sl,20.926316


Unnamed: 0_level_0,Short passes,Long passes
playerId,Unnamed: 1_level_1,Unnamed: 2_level_1
2lvit204llltk13iglsa2tjah,1,1
3sc349yey596xp2j6xlyt0frp,44,10
3vx94h32ahujciraspdayj9t6,18,1
4u281v53ges3kimtgac0tidm2,43,9
5ak9fwtqlr2pll0nsv5br7p7u,14,6
5qgc6zjc38a5xjl35gs7h3vu1,28,8
6ekdnbnk56xlxforb5owt3dn9,24,23
6j0ogojh2b7poyceg7i3k09yi,55,9
6u2ob6fv950r1qve8uejkq2uh,49,10
72d5uxwcmvhd6mzthxuvev1sl,31,7


Number of shots attempted (Goal + miss)

In [77]:
df_shots = df_events[df_events['event_description'].isin(['Goal','Miss'])]

df_shots.groupby('playerId').size().reset_index(name='shots')

Unnamed: 0,playerId,shots
0,3vx94h32ahujciraspdayj9t6,1
1,4u281v53ges3kimtgac0tidm2,1
2,6j0ogojh2b7poyceg7i3k09yi,1
3,6u2ob6fv950r1qve8uejkq2uh,1
4,8gkexxgf3pypshhqwg6ibp7o4,1
5,8qmm84tue6kuz8e5nhhdhmz8p,1
6,e3kdoxu1kwn2w3wwi1rqhvr9x,4


Line Breaking Passes & last line Breaking Passes

In [18]:
df_events[(df_events['lineBreakingPass.linesBroken.value'].notna())]

Unnamed: 0,id,eventId,event_type_id,current_phase,period_minute,period_second,contestantId,outcome,x,y,lastModified,qualifier,playerId,lineBreakingPass.linesBroken.value,passOption.player,passTarget.player,xThreat.applied,lineBreakingPass.lastLineBroken.value,pressure.pressureReceived.value,pressure.player,xThreat.removed,keyPass,assist,timeelapsed,event_description
5,2423549097,4,1,1,0,2,bx0cdmzr2gwr70ez72dorx82p,1,31.5,57.2,2022-05-22T06:37:07Z,"[{'id': 3586085043, 'qualifierId': 213, 'value...",azuc3tma44xyrbgf5y279o1xx,0,"[{'playerId': 'e3kdoxu1kwn2w3wwi1rqhvr9x', 'sh...","[{'playerId': '7sep6mx2s67mh5fr3raxu7aei', 'sh...",0.0029771626,,,,,,,2.84,Pass
6,2423549113,5,1,1,0,7,bx0cdmzr2gwr70ez72dorx82p,1,49.2,95.4,2022-05-22T06:37:06Z,"[{'id': 3586085129, 'qualifierId': 212, 'value...",7sep6mx2s67mh5fr3raxu7aei,1,"[{'playerId': '5qgc6zjc38a5xjl35gs7h3vu1', 'sh...","[{'playerId': 'e3kdoxu1kwn2w3wwi1rqhvr9x', 'sh...",0.0309752524,secondToLast,high,"[{'playerId': 'e6ok0deqkoe80184iu509gzu2', 'sh...",,,,7.88,Pass
15,2423549219,11,1,1,0,21,bx0cdmzr2gwr70ez72dorx82p,1,37.2,56.4,2022-05-22T06:37:01Z,"[{'id': 3586085771, 'qualifierId': 140, 'value...",azuc3tma44xyrbgf5y279o1xx,0,"[{'playerId': '5qgc6zjc38a5xjl35gs7h3vu1', 'sh...","[{'playerId': '6j0ogojh2b7poyceg7i3k09yi', 'sh...",0.0027582049,,,,,,,21.72,Pass
17,2423549257,13,1,1,0,26,bx0cdmzr2gwr70ez72dorx82p,1,32.5,46.7,2022-05-22T06:36:59Z,"[{'id': 3586085949, 'qualifierId': 212, 'value...",azuc3tma44xyrbgf5y279o1xx,2,"[{'playerId': '4u281v53ges3kimtgac0tidm2', 'sh...","[{'playerId': 'e7e68wlpiqqohpg71oh4vrbl6', 'sh...",0.0055874586,secondToLast,high,"[{'playerId': 'e6ok0deqkoe80184iu509gzu2', 'sh...",,,,26.32,Pass
20,2423549347,5,1,1,0,30,3c3jcs7vc1t6vz5lev162jyv7,1,35.5,64.4,2022-05-22T06:36:58Z,"[{'id': 3586086323, 'qualifierId': 213, 'value...",72d5uxwcmvhd6mzthxuvev1sl,1,"[{'playerId': '96wcx761pzv5ub4sfwsynp51x', 'sh...","[{'playerId': '3vx94h32ahujciraspdayj9t6', 'sh...",0.0203064084,secondToLast,medium,"[{'playerId': 'e3kdoxu1kwn2w3wwi1rqhvr9x', 'sh...",,,,30.28,Pass
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1749,2423663961,890,1,2,91,20,bx0cdmzr2gwr70ez72dorx82p,1,23.9,80.2,2022-05-22T06:19:00Z,"[{'id': 3586718331, 'qualifierId': 56, 'value'...",6u2ob6fv950r1qve8uejkq2uh,1,"[{'playerId': '7cp51c8zn7y08iyk0hc9ix5nt', 'sh...","[{'playerId': 'e3kdoxu1kwn2w3wwi1rqhvr9x', 'sh...",0.0239186883,secondToLast,medium,"[{'playerId': 'fvd7y3f6948713acbas7w3u2', 'shi...",,,,2780.96,Pass
1756,2423668971,816,1,2,92,6,3c3jcs7vc1t6vz5lev162jyv7,1,41.6,33.6,2022-05-22T06:18:58Z,"[{'id': 3587615859, 'qualifierId': 178}, {'id'...",72d5uxwcmvhd6mzthxuvev1sl,0,"[{'playerId': '96wcx761pzv5ub4sfwsynp51x', 'sh...","[{'playerId': '6ekdnbnk56xlxforb5owt3dn9', 'sh...",0.0103171468,,,,,,,2826.00,Pass
1759,2423664769,798,1,2,92,13,3c3jcs7vc1t6vz5lev162jyv7,1,63.7,19.5,2022-05-22T06:18:57Z,"[{'id': 3586722773, 'qualifierId': 56, 'value'...",96wcx761pzv5ub4sfwsynp51x,0,"[{'playerId': '7ty1wdxxnusgkl34k5raipbl5', 'sh...","[{'playerId': 'ccu7hw3wrcspl1a18g2ldnsh5', 'sh...",0.0428849459,,,,,,,2833.84,Pass
1774,2423665291,805,1,2,92,43,3c3jcs7vc1t6vz5lev162jyv7,1,24.4,50.3,2022-05-22T06:18:50Z,"[{'id': 3586725655, 'qualifierId': 56, 'value'...",6ekdnbnk56xlxforb5owt3dn9,1,"[{'playerId': '3sc349yey596xp2j6xlyt0frp', 'sh...","[{'playerId': 'ccu7hw3wrcspl1a18g2ldnsh5', 'sh...",0.0080752969,,high,"[{'playerId': '5ak9fwtqlr2pll0nsv5br7p7u', 'sh...",,,,2863.04,Pass


In [86]:
print(df_events.shape)
df_1 = df_events[(df_events['lineBreakingPass.linesBroken.value'].notna()) & (df_events['lineBreakingPass.linesBroken.value']!=0)].groupby('playerId').size().reset_index(name='line breaking passes')
df_2 = df_events[df_events['lineBreakingPass.lastLineBroken.value'].notna()].groupby('playerId').size().reset_index(name='last_line breaking passes')


(1786, 25)


Unnamed: 0,playerId,line breaking passes
0,3sc349yey596xp2j6xlyt0frp,10
1,3vx94h32ahujciraspdayj9t6,4
2,4u281v53ges3kimtgac0tidm2,26
3,5ak9fwtqlr2pll0nsv5br7p7u,4
4,5qgc6zjc38a5xjl35gs7h3vu1,8
5,6ekdnbnk56xlxforb5owt3dn9,33
6,6j0ogojh2b7poyceg7i3k09yi,28
7,6u2ob6fv950r1qve8uejkq2uh,20
8,72d5uxwcmvhd6mzthxuvev1sl,23
9,7cp51c8zn7y08iyk0hc9ix5nt,27


Unnamed: 0,playerId,line breaking passes
0,3vx94h32ahujciraspdayj9t6,2
1,4u281v53ges3kimtgac0tidm2,5
2,5ak9fwtqlr2pll0nsv5br7p7u,2
3,6ekdnbnk56xlxforb5owt3dn9,4
4,6j0ogojh2b7poyceg7i3k09yi,7
5,6u2ob6fv950r1qve8uejkq2uh,8
6,72d5uxwcmvhd6mzthxuvev1sl,4
7,7cp51c8zn7y08iyk0hc9ix5nt,5
8,7sep6mx2s67mh5fr3raxu7aei,6
9,8f3bhiy6r5eei1n25exhbwr8p,3


Pressure value in Pass, Goal, Miss, Ball touch, Control

In [25]:
listss = ['Pass','Goal','Miss','Ball touch','Control']
print(df_events[df_events['event_description'].isin(listss)].isna().sum())
print(df_events[df_events['event_description'].isin(listss)].shape)

id                                          0
eventId                                     0
event_type_id                               0
current_phase                               0
period_minute                               0
period_second                               0
contestantId                                0
outcome                                     0
x                                           0
y                                           0
lastModified                                0
qualifier                                   0
playerId                                    0
lineBreakingPass.linesBroken.value        703
passOption.player                         170
passTarget.player                         170
xThreat.applied                           338
lineBreakingPass.lastLineBroken.value    1002
pressure.pressureReceived.value           443
pressure.player                           443
xThreat.removed                           933
keyPass                           

Number of take ons and number of succesfull take ons

In [31]:
df_events[df_events['event_description']=='Take On']['outcome'].value_counts()
display(df_events[df_events['event_description']=='Take On'].groupby('playerId').size().reset_index(name='take on'))

condition = (df_events['event_description'] == 'Take On') & (df_events['outcome'] == 1)
display(df_events[condition].groupby('playerId').size().reset_index(name='take on succesfull'))

Unnamed: 0,playerId,take on
0,3sc349yey596xp2j6xlyt0frp,1
1,3vx94h32ahujciraspdayj9t6,4
2,4u281v53ges3kimtgac0tidm2,1
3,5qgc6zjc38a5xjl35gs7h3vu1,3
4,6j0ogojh2b7poyceg7i3k09yi,3
5,6u2ob6fv950r1qve8uejkq2uh,4
6,7cp51c8zn7y08iyk0hc9ix5nt,1
7,7sep6mx2s67mh5fr3raxu7aei,2
8,8f3bhiy6r5eei1n25exhbwr8p,1
9,8qmm84tue6kuz8e5nhhdhmz8p,1


Unnamed: 0,playerId,take on succesfull
0,3sc349yey596xp2j6xlyt0frp,1
1,4u281v53ges3kimtgac0tidm2,1
2,5qgc6zjc38a5xjl35gs7h3vu1,1
3,6j0ogojh2b7poyceg7i3k09yi,3
4,6u2ob6fv950r1qve8uejkq2uh,3
5,7cp51c8zn7y08iyk0hc9ix5nt,1
6,7sep6mx2s67mh5fr3raxu7aei,2
7,8f3bhiy6r5eei1n25exhbwr8p,1
8,976riwm0dz0e74d4l28y3ttcl,1
9,afymbx9eo87zau8mo99pakbu,1


In [32]:
# load data
current_directory = os.getcwd()
path_tracking = os.path.join(os.path.join(os.path.dirname(current_directory),'data'),"tracking_set_0")
print(path_tracking)
game_id = 1

path_events = os.path.join(os.path.join(os.path.dirname(current_directory),'data'),"first_10_events")
print(path_events)

# load the tracking data
df_tracking = pd.read_parquet(f'{path_tracking}/{game_id}_tracking.parquet')

# load event names
path_event_csv = os.path.join(os.path.dirname(current_directory),'data')
df_event_names = pd.read_csv(os.path.join(path_event_csv,'event_names.csv'))
dict_event_names = df_event_names.set_index('event_type_id').to_dict()['event_description']

c:\Users\Gabriel\OneDrive\Escritorio\SportsAnalyticsCourse\OptaForum\OptaChallenge_Clustering_Player_Styles\data\tracking_set_0
c:\Users\Gabriel\OneDrive\Escritorio\SportsAnalyticsCourse\OptaForum\OptaChallenge_Clustering_Player_Styles\data\first_10_events


Merge events with tracking

In [3]:
df_all = pd.merge(df_events,df_tracking, on =['timeelapsed','current_phase'])
display(df_all.head(2))

Unnamed: 0,id,eventId,event_type_id,current_phase,period_minute,period_second,contestantId,outcome,x,y,lastModified,qualifier,playerId,lineBreakingPass.linesBroken.value,passOption.player,passTarget.player,xThreat.applied,lineBreakingPass.lastLineBroken.value,pressure.pressureReceived.value,pressure.player,xThreat.removed,keyPass,assist,timeelapsed,event_description,team_id_opta,player_id,jersey_no,pos_x,pos_y,speed,frame_count,team_id,acc,speed_x,speed_y,dop,team_in_possession
0,2423549045,2,32,1,0,0,3c3jcs7vc1t6vz5lev162jyv7,1,0.0,0.0,2022-05-22T03:17:52Z,"[{'id': 3586084711, 'qualifierId': 127, 'value...",,,,,,,,,,,,0.0,Period start,bx0cdmzr2gwr70ez72dorx82p,azuc3tma44xyrbgf5y279o1xx,16.0,-19.477912,4.240699,0.464332,10000,2.0,0.727075,-0.121969,-0.357286,L,bx0cdmzr2gwr70ez72dorx82p
1,2423549045,2,32,1,0,0,3c3jcs7vc1t6vz5lev162jyv7,1,0.0,0.0,2022-05-22T03:17:52Z,"[{'id': 3586084711, 'qualifierId': 127, 'value...",,,,,,,,,,,,0.0,Period start,bx0cdmzr2gwr70ez72dorx82p,6u2ob6fv950r1qve8uejkq2uh,1.0,-0.117343,-0.234231,0.328293,10000,2.0,0.899067,0.224221,0.126503,L,bx0cdmzr2gwr70ez72dorx82p


Average shot distance per player

In [47]:
df_shots = df_all[(df_all['event_description'].isin(['Goal','Miss'])) & (df_all['player_id']==df_all['playerId'])]

df_shots['pos_x'] = pd.to_numeric(df_shots['pos_x'])
mask = df_shots['dop'] == 'R'
df_shots.loc[mask, 'dop'] = 'L'
df_shots.loc[mask, 'pos_x'] *= -1

# determine the distance
target_point = (52.5, 0)

df_shots['distance_to_target'] = np.sqrt((df_shots['pos_x'] - target_point[0])**2 + (df_shots['pos_y'] - target_point[1])**2)

display(df_shots.groupby('playerId')['distance_to_target'].mean().reset_index(name='avg_shot_distance'))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_shots['pos_x'] = pd.to_numeric(df_shots['pos_x'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_shots['distance_to_target'] = np.sqrt((df_shots['pos_x'] - target_point[0])**2 + (df_shots['pos_y'] - target_point[1])**2)


Unnamed: 0,playerId,avg_shot_distance
0,3vx94h32ahujciraspdayj9t6,20.554468
1,4u281v53ges3kimtgac0tidm2,11.381902
2,6j0ogojh2b7poyceg7i3k09yi,27.086573
3,6u2ob6fv950r1qve8uejkq2uh,24.234017
4,8gkexxgf3pypshhqwg6ibp7o4,20.302314
5,8qmm84tue6kuz8e5nhhdhmz8p,5.237487
6,e3kdoxu1kwn2w3wwi1rqhvr9x,21.619386


Defensive metrics (Tackles, Fouls, attempted tackles, aerial, blocked pass, ball recovery, interception)

In [33]:
df_events[df_events['event_description']=='Blocked Pass']

df_events[df_events['eventId']==8]

#qualifiers[qualifiers['event_id']==2423549165]

Unnamed: 0,id,eventId,event_type_id,current_phase,period_minute,period_second,contestantId,outcome,x,y,lastModified,qualifier,playerId,lineBreakingPass.linesBroken.value,passOption.player,passTarget.player,xThreat.applied,lineBreakingPass.lastLineBroken.value,pressure.pressureReceived.value,pressure.player,xThreat.removed,keyPass,assist,timeelapsed,event_description
11,2423549155,8,1,1,0,12,bx0cdmzr2gwr70ez72dorx82p,0,57.8,83.6,2022-05-22T06:37:04Z,"[{'id': 3586095441, 'qualifierId': 141, 'value...",6j0ogojh2b7poyceg7i3k09yi,,"[{'playerId': '7sep6mx2s67mh5fr3raxu7aei', 'sh...","[{'playerId': '7sep6mx2s67mh5fr3raxu7aei', 'sh...",,,high,"[{'playerId': 'e6ok0deqkoe80184iu509gzu2', 'sh...",0.0053730607,,,12.92,Pass
24,2423549479,8,1,1,0,41,3c3jcs7vc1t6vz5lev162jyv7,1,21.7,82.5,2022-05-22T06:36:54Z,"[{'id': 3587585583, 'qualifierId': 178}, {'id'...",3vx94h32ahujciraspdayj9t6,,"[{'playerId': '6ekdnbnk56xlxforb5owt3dn9', 'sh...","[{'playerId': '72d5uxwcmvhd6mzthxuvev1sl', 'sh...",0.0005321801,,high,"[{'playerId': '4u281v53ges3kimtgac0tidm2', 'sh...",,,,41.4,Pass


Quantitat de pases en cada zona amb pressió alta i el seu % de success

In [20]:
df_passes = df_all[(df_all['event_description']=='Pass') & (df_all['player_id']==df_all['playerId'])]

df_passes['pos_x'] = pd.to_numeric(df_passes['pos_x'])
mask = df_passes['dop'] == 'R'
df_passes.loc[mask, 'dop'] = 'L'
df_passes.loc[mask, 'pos_x'] *= -1
df_passes.loc[mask, 'pos_y'] *= -1


condiciones = [
    (df_passes['pos_x'] <= 0, 'defensive'),
    ((df_passes['pos_x'] > 0) & (-34 <= df_passes['pos_y']) & (df_passes['pos_y'] < -11), 'left_wing'),
    ((df_passes['pos_x'] > 0) & (11 < df_passes['pos_y']) & (df_passes['pos_y'] <= 34), 'right_wing'),
    ((df_passes['pos_x'] > 0) & (df_passes['pos_x'] <= 26.25) & (-11 <= df_passes['pos_y']) & (df_passes['pos_y'] <= 11), 'center_down'),
    ((26.25 < df_passes['pos_x']) & (df_passes['pos_x'] <= 52.5) & (-11 <= df_passes['pos_y']) & (df_passes['pos_y'] <= 11), 'center_up')
]

df_passes['zone'] = np.select([cond[0] for cond in condiciones], [cond[1] for cond in condiciones], default='unknown')
display(df_passes.head())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_passes['pos_x'] = pd.to_numeric(df_passes['pos_x'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_passes['zone'] = np.select([cond[0] for cond in condiciones], [cond[1] for cond in condiciones], default='unknown')


Unnamed: 0,id,eventId,event_type_id,current_phase,period_minute,period_second,contestantId,outcome,x,y,lastModified,qualifier,playerId,lineBreakingPass.linesBroken.value,passOption.player,passTarget.player,xThreat.applied,lineBreakingPass.lastLineBroken.value,pressure.pressureReceived.value,pressure.player,xThreat.removed,keyPass,assist,timeelapsed,event_description,team_id_opta,player_id,jersey_no,pos_x,pos_y,speed,frame_count,team_id,acc,speed_x,speed_y,dop,team_in_possession,zone
50,2423549063,3,1,1,0,0,bx0cdmzr2gwr70ez72dorx82p,1,49.9,50.0,2022-05-22T03:34:41Z,"[{'id': 3586084825, 'qualifierId': 56, 'value'...",6u2ob6fv950r1qve8uejkq2uh,,,,,,,,,,,0.04,Pass,bx0cdmzr2gwr70ez72dorx82p,6u2ob6fv950r1qve8uejkq2uh,1.0,-0.1074,-0.228648,0.361032,10001,2.0,0.945378,0.273445,0.15221,L,bx0cdmzr2gwr70ez72dorx82p,defensive
73,2423549097,4,1,1,0,2,bx0cdmzr2gwr70ez72dorx82p,1,31.5,57.2,2022-05-22T06:37:07Z,"[{'id': 3586085043, 'qualifierId': 213, 'value...",azuc3tma44xyrbgf5y279o1xx,0.0,"[{'playerId': 'e3kdoxu1kwn2w3wwi1rqhvr9x', 'sh...","[{'playerId': '7sep6mx2s67mh5fr3raxu7aei', 'sh...",0.0029771626,,,,,,,2.84,Pass,bx0cdmzr2gwr70ez72dorx82p,azuc3tma44xyrbgf5y279o1xx,16.0,-19.847447,4.775255,2.387325,10071,2.0,0.28522,0.019709,2.403938,L,bx0cdmzr2gwr70ez72dorx82p,defensive
98,2423549113,5,1,1,0,7,bx0cdmzr2gwr70ez72dorx82p,1,49.2,95.4,2022-05-22T06:37:06Z,"[{'id': 3586085129, 'qualifierId': 212, 'value...",7sep6mx2s67mh5fr3raxu7aei,1.0,"[{'playerId': '5qgc6zjc38a5xjl35gs7h3vu1', 'sh...","[{'playerId': 'e3kdoxu1kwn2w3wwi1rqhvr9x', 'sh...",0.0309752524,secondToLast,high,"[{'playerId': 'e6ok0deqkoe80184iu509gzu2', 'sh...",,,,7.88,Pass,bx0cdmzr2gwr70ez72dorx82p,7sep6mx2s67mh5fr3raxu7aei,13.0,-0.955286,30.978778,2.309112,10197,2.0,-0.607715,2.298279,-0.214493,L,bx0cdmzr2gwr70ez72dorx82p,defensive
124,2423549127,6,1,1,0,9,bx0cdmzr2gwr70ez72dorx82p,1,72.1,88.0,2022-05-22T06:37:05Z,"[{'id': 3586085187, 'qualifierId': 56, 'value'...",e3kdoxu1kwn2w3wwi1rqhvr9x,,"[{'playerId': '7cp51c8zn7y08iyk0hc9ix5nt', 'sh...","[{'playerId': '6u2ob6fv950r1qve8uejkq2uh', 'sh...",0.0338825583,,high,"[{'playerId': '8qmm84tue6kuz8e5nhhdhmz8p', 'sh...",,,,9.16,Pass,bx0cdmzr2gwr70ez72dorx82p,e3kdoxu1kwn2w3wwi1rqhvr9x,17.0,23.980289,25.796733,4.552663,10229,2.0,-0.53547,-1.150168,4.411378,L,bx0cdmzr2gwr70ez72dorx82p,right_wing
188,2423549153,7,1,1,0,11,bx0cdmzr2gwr70ez72dorx82p,1,63.6,94.3,2022-05-22T06:37:04Z,"[{'id': 3586085373, 'qualifierId': 140, 'value...",6u2ob6fv950r1qve8uejkq2uh,,"[{'playerId': '5qgc6zjc38a5xjl35gs7h3vu1', 'sh...","[{'playerId': '6j0ogojh2b7poyceg7i3k09yi', 'sh...",0.0111802518,,high,"[{'playerId': 'fvd7y3f6948713acbas7w3u2', 'shi...",,,,11.56,Pass,bx0cdmzr2gwr70ez72dorx82p,6u2ob6fv950r1qve8uejkq2uh,1.0,14.403023,29.964194,3.729682,10289,2.0,0.169329,-0.737431,3.661325,L,bx0cdmzr2gwr70ez72dorx82p,right_wing


In [40]:
df_passes['pressure.pressureReceived.value']=df_passes['pressure.pressureReceived.value'].fillna('NaN')
df_p_total=df_passes[df_passes['outcome']==1].groupby(['playerId','zone','pressure.pressureReceived.value']).size().reset_index(name='completed_passes')

total = df_passes.groupby(['playerId','zone','pressure.pressureReceived.value']).size().reset_index(name='total_passes')

df_p_total = pd.merge(df_p_total,total,on=['playerId','zone','pressure.pressureReceived.value'])
df_p_total["% succes"] = df_p_total['completed_passes'] /df_p_total['total_passes']
display(df_p_total)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_passes['pressure.pressureReceived.value']=df_passes['pressure.pressureReceived.value'].fillna('NaN')


Unnamed: 0,playerId,zone,pressure.pressureReceived.value,completed_passes,total_passes,% succes
0,2lvit204llltk13iglsa2tjah,center_down,,1,1,1.000000
1,3sc349yey596xp2j6xlyt0frp,defensive,,11,14,0.785714
2,3sc349yey596xp2j6xlyt0frp,defensive,high,8,12,0.666667
3,3sc349yey596xp2j6xlyt0frp,defensive,medium,10,12,0.833333
4,3sc349yey596xp2j6xlyt0frp,right_wing,,3,4,0.750000
...,...,...,...,...,...,...
226,vja0xo3xiuax8eh0b6q3y09,left_wing,high,4,5,0.800000
227,vja0xo3xiuax8eh0b6q3y09,left_wing,low,1,1,1.000000
228,vja0xo3xiuax8eh0b6q3y09,left_wing,medium,4,4,1.000000
229,vja0xo3xiuax8eh0b6q3y09,right_wing,,1,1,1.000000


Defensive metrics (Tackles, Fouls, attempted tackles, aerial, blocked pass, ball recovery, interception)

Tackle – Unsuccessful Take-on/Dispossessed. A tackle is awarded if a player wins the ball from another player who is in possession. If he is attempting to beat the tackler, the other player will get an unsuccessful Take-on. If he is in possession but not attempting to “beat” his man, then he will get a dispossessed.

Quanta pressio fa cada jugador?   High = 0.99, medium = 0.66, low = 0.33

In [17]:
df_events[df_events['pressure.player'].notna()]['event_description'].unique()

array(['Pass', 'Take On', 'Ball touch', 'Dispossessed', 'Clearance',
       'Offside Pass', 'Foul', 'Interception', 'Goal', 'Attempt Saved',
       'Miss', 'Claim', 'Keeper pick-up', 'Save'], dtype=object)

Pass, Take on, Ball touch, Dispossessed, Clearance, Interception, Goal, Miss

In [3]:
pressure_events = df_events[(df_events['event_description'].isin(['Pass','Take On','Ball touch',
                                                                 'Dispossessed','Clearance','Offisde Pass',
                                                                 'Foul','Interception',
                                                                  'Goal','Miss']))
                                                                 & df_events['pressure.player'].notna()]

# per cada fila, agafar els jugadors que estan presionant 
#i afegirlos a un dataframe amb la intensitat de la pressió 
print(pressure_events.shape)
df_pressure = pd.DataFrame(columns=['playerid', 'intensity'])
for index,row in pressure_events.iterrows():
   pressurePlayers = row['pressure.player']
   players = [d['playerId'] for d in pressurePlayers]
   intensity = [d['intensity'] for d in pressurePlayers]
   player_intensity_dict = dict(zip(players, intensity))
   temp_df = pd.DataFrame(list(player_intensity_dict.items()), columns=['playerid', 'intensity'])
   df_pressure = pd.concat([df_pressure, temp_df], ignore_index=True)

display(df_pressure)


df_pressure['pressure_value'] = np.where(df_pressure['intensity'] == 'high', 0.99, 
                                         np.where(df_pressure['intensity'] == 'medium', 0.66, 
                                                  np.where(df_pressure['intensity'] == 'low', 0.33, 0)))

display(df_pressure)

df_pressure_players = df_pressure.groupby('playerid')['pressure_value'].sum().reset_index(name='pressure_total_value')

display(df_pressure_players)

df_minutes = df_tracking.groupby('player_id').size().reset_index(name='counts')

# fem sumatori i ho dividim entre 60 = 94.62 min!

# max counts = 141928, per tant, 141928/94.62  = 1500 per tant 1500 counts son 1 minut.

df_minutes['minutes'] = df_minutes['counts'] / 1500

df_minutes.drop('counts',axis=1,inplace=True)
df_minutes = df_minutes[df_minutes['player_id'] != 'aaaaaaaaaaaaaaaaaaaaaaaaa']

display(df_minutes)

df_pressure_players=pd.merge(df_pressure_players,df_minutes, left_on = 'playerid',right_on='player_id')
df_pressure_players['pressure_90'] = df_pressure_players['pressure_total_value'] / df_pressure_players['minutes']*90
display(df_pressure_players)

(771, 25)


Unnamed: 0,playerid,intensity
0,e6ok0deqkoe80184iu509gzu2,high
1,8qmm84tue6kuz8e5nhhdhmz8p,high
2,96wcx761pzv5ub4sfwsynp51x,high
3,fvd7y3f6948713acbas7w3u2,high
4,afymbx9eo87zau8mo99pakbu,low
...,...,...
954,6j0ogojh2b7poyceg7i3k09yi,high
955,7k0r5crdh9blj3edt31zwy0dm,high
956,5ak9fwtqlr2pll0nsv5br7p7u,high
957,7k0r5crdh9blj3edt31zwy0dm,high


Unnamed: 0,playerid,intensity,pressure_value
0,e6ok0deqkoe80184iu509gzu2,high,0.99
1,8qmm84tue6kuz8e5nhhdhmz8p,high,0.99
2,96wcx761pzv5ub4sfwsynp51x,high,0.99
3,fvd7y3f6948713acbas7w3u2,high,0.99
4,afymbx9eo87zau8mo99pakbu,low,0.33
...,...,...,...
954,6j0ogojh2b7poyceg7i3k09yi,high,0.99
955,7k0r5crdh9blj3edt31zwy0dm,high,0.99
956,5ak9fwtqlr2pll0nsv5br7p7u,high,0.99
957,7k0r5crdh9blj3edt31zwy0dm,high,0.99


Unnamed: 0,playerid,pressure_total_value
0,2lvit204llltk13iglsa2tjah,11.55
1,3sc349yey596xp2j6xlyt0frp,30.69
2,3vx94h32ahujciraspdayj9t6,27.72
3,4u281v53ges3kimtgac0tidm2,38.28
4,5ak9fwtqlr2pll0nsv5br7p7u,11.88
5,5qgc6zjc38a5xjl35gs7h3vu1,22.11
6,6j0ogojh2b7poyceg7i3k09yi,34.98
7,6u2ob6fv950r1qve8uejkq2uh,49.17
8,72d5uxwcmvhd6mzthxuvev1sl,8.25
9,7cp51c8zn7y08iyk0hc9ix5nt,12.21


Unnamed: 0,player_id,minutes
0,2lvit204llltk13iglsa2tjah,18.538667
1,3sc349yey596xp2j6xlyt0frp,94.618667
2,3vx94h32ahujciraspdayj9t6,62.395333
3,4u281v53ges3kimtgac0tidm2,94.618667
4,5ak9fwtqlr2pll0nsv5br7p7u,38.255333
5,5qgc6zjc38a5xjl35gs7h3vu1,68.112
6,6ekdnbnk56xlxforb5owt3dn9,94.618667
7,6j0ogojh2b7poyceg7i3k09yi,94.618667
8,6u2ob6fv950r1qve8uejkq2uh,94.618667
9,72d5uxwcmvhd6mzthxuvev1sl,94.618667


Unnamed: 0,playerid,pressure_total_value,player_id,minutes,pressure_90
0,2lvit204llltk13iglsa2tjah,11.55,2lvit204llltk13iglsa2tjah,18.538667,56.071994
1,3sc349yey596xp2j6xlyt0frp,30.69,3sc349yey596xp2j6xlyt0frp,94.618667,29.191914
2,3vx94h32ahujciraspdayj9t6,27.72,3vx94h32ahujciraspdayj9t6,62.395333,39.983759
3,4u281v53ges3kimtgac0tidm2,38.28,4u281v53ges3kimtgac0tidm2,94.618667,36.41142
4,5ak9fwtqlr2pll0nsv5br7p7u,11.88,5ak9fwtqlr2pll0nsv5br7p7u,38.255333,27.949044
5,5qgc6zjc38a5xjl35gs7h3vu1,22.11,5qgc6zjc38a5xjl35gs7h3vu1,68.112,29.215116
6,6j0ogojh2b7poyceg7i3k09yi,34.98,6j0ogojh2b7poyceg7i3k09yi,94.618667,33.272504
7,6u2ob6fv950r1qve8uejkq2uh,49.17,6u2ob6fv950r1qve8uejkq2uh,94.618667,46.769841
8,72d5uxwcmvhd6mzthxuvev1sl,8.25,72d5uxwcmvhd6mzthxuvev1sl,94.618667,7.847289
9,7cp51c8zn7y08iyk0hc9ix5nt,12.21,7cp51c8zn7y08iyk0hc9ix5nt,94.618667,11.613987


In [4]:
df_pos = df_tracking[['player_id','pos_x','pos_y','dop']]

# All with the same direction
df_pos['pos_x'] = pd.to_numeric(df_pos['pos_x'])
mask = df_pos['dop'] == 'R'
df_pos.loc[mask, 'dop'] = 'L'
df_pos.loc[mask, 'pos_x'] *= -1
df_pos.loc[mask, 'pos_y'] *= -1

# Divide the field into zones

# Into two zones
df_pos['ofensive'] = np.where(df_pos['pos_x'] > 0, 1, 0)
df_pos['defensive'] = np.where(df_pos['pos_x'] <= 0, 1, 0)

# More especific
df_pos['left_wing'] = np.where((df_pos['pos_x'] > 0) & (-34 <= df_pos['pos_y']) & (df_pos['pos_y'] < -11), 1, 0)
df_pos['right_wing'] = np.where((df_pos['pos_x'] > 0) & (11 < df_pos['pos_y']) & (df_pos['pos_y'] <= 34), 1, 0)
df_pos['center_down'] = np.where((df_pos['pos_x'] > 0) & (df_pos['pos_x'] <= 26.25) & (-11 <= df_pos['pos_y']) & (df_pos['pos_y'] <= 11), 1, 0)
df_pos['center_up'] = np.where((26.25 < df_pos['pos_x']) & (df_pos['pos_x'] <= 52.5) & (-11 <= df_pos['pos_y']) & (df_pos['pos_y'] <= 11), 1, 0)

pos_total = df_pos.groupby('player_id').agg({'ofensive': 'sum', 'defensive': 'sum',
                                             'left_wing': 'sum','right_wing': 'sum',
                                             'center_up': 'sum','center_down': 'sum'}).reset_index()

pos_total = pd.merge(pos_total, df_pos.groupby('player_id').size().reset_index(name='total'),
                      on='player_id', how='inner')

pos_total['ofensive %'] = pos_total['ofensive'] / pos_total['total']
pos_total['defensive %'] = pos_total['defensive'] / pos_total['total']
pos_total['left_wing %'] = pos_total['left_wing'] / pos_total['total']
pos_total['right_wing %'] = pos_total['right_wing'] / pos_total['total']
pos_total['center_up %'] = pos_total['center_up'] / pos_total['total']
pos_total['center_down %'] = pos_total['center_down'] / pos_total['total']

pos_pct_total = pos_total[['player_id','ofensive %','defensive %','left_wing %','right_wing %','center_up %','center_down %']]

#display(pos_pct_total)

pos_pct_total = pos_pct_total[pos_pct_total['ofensive %']>0.5]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pos['pos_x'] = pd.to_numeric(df_pos['pos_x'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pos['ofensive'] = np.where(df_pos['pos_x'] > 0, 1, 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pos['defensive'] = np.where(df_pos['pos_x'] <= 0, 1, 0)
A value is trying to be set on a copy of

In [5]:
players_offensive = pos_pct_total['player_id'].tolist()

df_pressure_off_players = df_pressure_players[df_pressure_players['player_id'].isin(players_offensive)]
display(df_pressure_off_players)

Unnamed: 0,playerid,pressure_total_value,player_id,minutes,pressure_90
0,2lvit204llltk13iglsa2tjah,11.55,2lvit204llltk13iglsa2tjah,18.538667,56.071994
2,3vx94h32ahujciraspdayj9t6,27.72,3vx94h32ahujciraspdayj9t6,62.395333,39.983759
4,5ak9fwtqlr2pll0nsv5br7p7u,11.88,5ak9fwtqlr2pll0nsv5br7p7u,38.255333,27.949044
5,5qgc6zjc38a5xjl35gs7h3vu1,22.11,5qgc6zjc38a5xjl35gs7h3vu1,68.112,29.215116
6,6j0ogojh2b7poyceg7i3k09yi,34.98,6j0ogojh2b7poyceg7i3k09yi,94.618667,33.272504
7,6u2ob6fv950r1qve8uejkq2uh,49.17,6u2ob6fv950r1qve8uejkq2uh,94.618667,46.769841
10,7k0r5crdh9blj3edt31zwy0dm,9.9,7k0r5crdh9blj3edt31zwy0dm,26.505333,33.615876
12,7ty1wdxxnusgkl34k5raipbl5,6.6,7ty1wdxxnusgkl34k5raipbl5,10.188667,58.300072
19,agwvouyocx93y39g7tmwaojx1,14.85,agwvouyocx93y39g7tmwaojx1,32.222,41.477872
23,dxb1r4gqgxkngb0pzvfby9iol,9.9,dxb1r4gqgxkngb0pzvfby9iol,38.122,23.372331


In [None]:
df_pressure_off_players.to_csv('pressure_off_players.csv',index=False)

Shots on target

In [16]:
df_events[df_events['event_description'].isin(['Goal','Miss'])]
#qualifiers[qualifiers['event_id']==2423568337]

Unnamed: 0,id,eventId,event_type_id,current_phase,period_minute,period_second,contestantId,outcome,x,y,lastModified,qualifier,playerId,lineBreakingPass.linesBroken.value,passOption.player,passTarget.player,xThreat.applied,lineBreakingPass.lastLineBroken.value,pressure.pressureReceived.value,pressure.player,xThreat.removed,keyPass,assist,timeelapsed,event_description
184,2423557685,90,16,1,8,1,3c3jcs7vc1t6vz5lev162jyv7,1,77.3,38.5,2022-05-22T06:35:22Z,"[{'id': 3586133347, 'qualifierId': 20}, {'id':...",3vx94h32ahujciraspdayj9t6,,,,,,medium,"[{'playerId': 'azuc3tma44xyrbgf5y279o1xx', 'sh...",,,,481.16,Goal
297,2423564565,127,13,1,13,42,bx0cdmzr2gwr70ez72dorx82p,1,76.4,44.0,2022-05-22T06:34:14Z,"[{'id': 3586168999, 'qualifierId': 18}, {'id':...",6u2ob6fv950r1qve8uejkq2uh,,,,,,high,"[{'playerId': '8qmm84tue6kuz8e5nhhdhmz8p', 'sh...",,,,822.28,Miss
349,2423568337,185,16,1,16,51,3c3jcs7vc1t6vz5lev162jyv7,1,76.7,57.0,2022-05-22T06:33:34Z,"[{'id': 3586920981, 'qualifierId': 395, 'value...",8gkexxgf3pypshhqwg6ibp7o4,,,,,,high,"[{'playerId': '4u281v53ges3kimtgac0tidm2', 'sh...",,,,1011.52,Goal
1412,2423637613,722,13,2,67,49,bx0cdmzr2gwr70ez72dorx82p,1,73.7,45.4,2022-05-22T06:22:08Z,"[{'id': 3586569373, 'qualifierId': 56, 'value'...",6j0ogojh2b7poyceg7i3k09yi,,,,,,high,"[{'playerId': '8gkexxgf3pypshhqwg6ibp7o4', 'sh...",,,,1369.44,Miss
1517,2423644617,774,16,2,74,4,bx0cdmzr2gwr70ez72dorx82p,1,88.5,50.0,2022-05-22T04:00:16Z,"[{'id': 3586609837, 'qualifierId': 56, 'value'...",e3kdoxu1kwn2w3wwi1rqhvr9x,,,,,,,,,,,1744.84,Goal
1552,2423647403,792,16,2,76,33,bx0cdmzr2gwr70ez72dorx82p,1,85.4,49.9,2022-05-22T06:20:45Z,"[{'id': 3586625857, 'qualifierId': 22}, {'id':...",e3kdoxu1kwn2w3wwi1rqhvr9x,,,,,,medium,"[{'playerId': '8gkexxgf3pypshhqwg6ibp7o4', 'sh...",,,,1893.96,Goal
1580,2423651317,811,13,2,80,5,bx0cdmzr2gwr70ez72dorx82p,1,88.8,56.2,2022-05-22T06:20:25Z,"[{'id': 3586648109, 'qualifierId': 328}, {'id'...",4u281v53ges3kimtgac0tidm2,,,,,,high,"[{'playerId': '8qmm84tue6kuz8e5nhhdhmz8p', 'sh...",,,,2105.64,Miss
1617,2423653225,832,16,2,81,51,bx0cdmzr2gwr70ez72dorx82p,1,96.1,58.1,2022-05-22T06:44:01Z,"[{'id': 3586664227, 'qualifierId': 214}, {'id'...",e3kdoxu1kwn2w3wwi1rqhvr9x,,,,,,high,"[{'playerId': 'afymbx9eo87zau8mo99pakbu', 'shi...",,,,2211.04,Goal
1762,2423664859,801,13,2,92,19,3c3jcs7vc1t6vz5lev162jyv7,1,94.1,47.7,2022-05-22T07:31:26Z,"[{'id': 3586724977, 'qualifierId': 24}, {'id':...",8qmm84tue6kuz8e5nhhdhmz8p,,,,,,,,,,,2839.6,Miss
1771,2423665119,899,13,2,92,32,bx0cdmzr2gwr70ez72dorx82p,1,51.0,74.6,2022-05-22T06:18:51Z,"[{'id': 3586949291, 'qualifierId': 231, 'value...",e3kdoxu1kwn2w3wwi1rqhvr9x,,,,,,high,"[{'playerId': 'ccu7hw3wrcspl1a18g2ldnsh5', 'sh...",,,,2852.24,Miss


In [17]:
df_all = pd.merge(df_events,df_tracking, on =['timeelapsed','current_phase'])
display(df_all.head(2))

df_all_miss = df_all[df_all['id']==2423564565]
display(df_all_miss)




Unnamed: 0,id,eventId,event_type_id,current_phase,period_minute,period_second,contestantId,outcome,x,y,lastModified,qualifier,playerId,lineBreakingPass.linesBroken.value,passOption.player,passTarget.player,xThreat.applied,lineBreakingPass.lastLineBroken.value,pressure.pressureReceived.value,pressure.player,xThreat.removed,keyPass,assist,timeelapsed,event_description,team_id_opta,player_id,jersey_no,pos_x,pos_y,speed,frame_count,team_id,acc,speed_x,speed_y,dop,team_in_possession
0,2423549045,2,32,1,0,0,3c3jcs7vc1t6vz5lev162jyv7,1,0.0,0.0,2022-05-22T03:17:52Z,"[{'id': 3586084711, 'qualifierId': 127, 'value...",,,,,,,,,,,,0.0,Period start,bx0cdmzr2gwr70ez72dorx82p,azuc3tma44xyrbgf5y279o1xx,16.0,-19.477912,4.240699,0.464332,10000,2.0,0.727075,-0.121969,-0.357286,L,bx0cdmzr2gwr70ez72dorx82p
1,2423549045,2,32,1,0,0,3c3jcs7vc1t6vz5lev162jyv7,1,0.0,0.0,2022-05-22T03:17:52Z,"[{'id': 3586084711, 'qualifierId': 127, 'value...",,,,,,,,,,,,0.0,Period start,bx0cdmzr2gwr70ez72dorx82p,6u2ob6fv950r1qve8uejkq2uh,1.0,-0.117343,-0.234231,0.328293,10000,2.0,0.899067,0.224221,0.126503,L,bx0cdmzr2gwr70ez72dorx82p


Unnamed: 0,id,eventId,event_type_id,current_phase,period_minute,period_second,contestantId,outcome,x,y,lastModified,qualifier,playerId,lineBreakingPass.linesBroken.value,passOption.player,passTarget.player,xThreat.applied,lineBreakingPass.lastLineBroken.value,pressure.pressureReceived.value,pressure.player,xThreat.removed,keyPass,assist,timeelapsed,event_description,team_id_opta,player_id,jersey_no,pos_x,pos_y,speed,frame_count,team_id,acc,speed_x,speed_y,dop,team_in_possession
6785,2423564565,127,13,1,13,42,bx0cdmzr2gwr70ez72dorx82p,1,76.4,44.0,2022-05-22T06:34:14Z,"[{'id': 3586168999, 'qualifierId': 18}, {'id':...",6u2ob6fv950r1qve8uejkq2uh,,,,,,high,"[{'playerId': '8qmm84tue6kuz8e5nhhdhmz8p', 'sh...",,,,822.28,Miss,3c3jcs7vc1t6vz5lev162jyv7,8gkexxgf3pypshhqwg6ibp7o4,6.0,21.499467,-15.481811,4.408298,30557,1.0,-0.830438,3.357054,2.855477,R,bx0cdmzr2gwr70ez72dorx82p
6786,2423564565,127,13,1,13,42,bx0cdmzr2gwr70ez72dorx82p,1,76.4,44.0,2022-05-22T06:34:14Z,"[{'id': 3586168999, 'qualifierId': 18}, {'id':...",6u2ob6fv950r1qve8uejkq2uh,,,,,,high,"[{'playerId': '8qmm84tue6kuz8e5nhhdhmz8p', 'sh...",,,,822.28,Miss,3c3jcs7vc1t6vz5lev162jyv7,976riwm0dz0e74d4l28y3ttcl,20.0,34.040936,-10.525651,2.917017,30557,1.0,-0.327736,2.908747,0.190629,R,bx0cdmzr2gwr70ez72dorx82p
6787,2423564565,127,13,1,13,42,bx0cdmzr2gwr70ez72dorx82p,1,76.4,44.0,2022-05-22T06:34:14Z,"[{'id': 3586168999, 'qualifierId': 18}, {'id':...",6u2ob6fv950r1qve8uejkq2uh,,,,,,high,"[{'playerId': '8qmm84tue6kuz8e5nhhdhmz8p', 'sh...",,,,822.28,Miss,3c3jcs7vc1t6vz5lev162jyv7,3vx94h32ahujciraspdayj9t6,17.0,3.519819,-7.337529,2.3743,30557,1.0,-0.788773,0.779988,2.244148,R,bx0cdmzr2gwr70ez72dorx82p
6788,2423564565,127,13,1,13,42,bx0cdmzr2gwr70ez72dorx82p,1,76.4,44.0,2022-05-22T06:34:14Z,"[{'id': 3586168999, 'qualifierId': 18}, {'id':...",6u2ob6fv950r1qve8uejkq2uh,,,,,,high,"[{'playerId': '8qmm84tue6kuz8e5nhhdhmz8p', 'sh...",,,,822.28,Miss,3c3jcs7vc1t6vz5lev162jyv7,fvd7y3f6948713acbas7w3u2,21.0,14.431847,-5.836049,4.143205,30557,1.0,-0.977393,3.285065,2.522008,R,bx0cdmzr2gwr70ez72dorx82p
6789,2423564565,127,13,1,13,42,bx0cdmzr2gwr70ez72dorx82p,1,76.4,44.0,2022-05-22T06:34:14Z,"[{'id': 3586168999, 'qualifierId': 18}, {'id':...",6u2ob6fv950r1qve8uejkq2uh,,,,,,high,"[{'playerId': '8qmm84tue6kuz8e5nhhdhmz8p', 'sh...",,,,822.28,Miss,3c3jcs7vc1t6vz5lev162jyv7,afymbx9eo87zau8mo99pakbu,26.0,28.558585,6.563891,4.558537,30557,1.0,-0.670126,4.382103,-1.251649,R,bx0cdmzr2gwr70ez72dorx82p
6790,2423564565,127,13,1,13,42,bx0cdmzr2gwr70ez72dorx82p,1,76.4,44.0,2022-05-22T06:34:14Z,"[{'id': 3586168999, 'qualifierId': 18}, {'id':...",6u2ob6fv950r1qve8uejkq2uh,,,,,,high,"[{'playerId': '8qmm84tue6kuz8e5nhhdhmz8p', 'sh...",,,,822.28,Miss,3c3jcs7vc1t6vz5lev162jyv7,8qmm84tue6kuz8e5nhhdhmz8p,16.0,30.743602,-2.535393,4.882442,30557,1.0,3.83961,2.430562,-4.245598,R,bx0cdmzr2gwr70ez72dorx82p
6791,2423564565,127,13,1,13,42,bx0cdmzr2gwr70ez72dorx82p,1,76.4,44.0,2022-05-22T06:34:14Z,"[{'id': 3586168999, 'qualifierId': 18}, {'id':...",6u2ob6fv950r1qve8uejkq2uh,,,,,,high,"[{'playerId': '8qmm84tue6kuz8e5nhhdhmz8p', 'sh...",,,,822.28,Miss,3c3jcs7vc1t6vz5lev162jyv7,96wcx761pzv5ub4sfwsynp51x,3.0,26.02251,-3.964728,5.755434,30557,1.0,-1.431339,5.176525,2.511721,R,bx0cdmzr2gwr70ez72dorx82p
6792,2423564565,127,13,1,13,42,bx0cdmzr2gwr70ez72dorx82p,1,76.4,44.0,2022-05-22T06:34:14Z,"[{'id': 3586168999, 'qualifierId': 18}, {'id':...",6u2ob6fv950r1qve8uejkq2uh,,,,,,high,"[{'playerId': '8qmm84tue6kuz8e5nhhdhmz8p', 'sh...",,,,822.28,Miss,3c3jcs7vc1t6vz5lev162jyv7,72d5uxwcmvhd6mzthxuvev1sl,5.0,32.965227,-5.050043,1.953662,30557,1.0,-3.534887,1.752391,-0.850783,R,bx0cdmzr2gwr70ez72dorx82p
6793,2423564565,127,13,1,13,42,bx0cdmzr2gwr70ez72dorx82p,1,76.4,44.0,2022-05-22T06:34:14Z,"[{'id': 3586168999, 'qualifierId': 18}, {'id':...",6u2ob6fv950r1qve8uejkq2uh,,,,,,high,"[{'playerId': '8qmm84tue6kuz8e5nhhdhmz8p', 'sh...",,,,822.28,Miss,3c3jcs7vc1t6vz5lev162jyv7,3sc349yey596xp2j6xlyt0frp,29.0,31.596956,-17.772419,5.380581,30557,1.0,-0.349731,4.776794,2.474837,R,bx0cdmzr2gwr70ez72dorx82p
6794,2423564565,127,13,1,13,42,bx0cdmzr2gwr70ez72dorx82p,1,76.4,44.0,2022-05-22T06:34:14Z,"[{'id': 3586168999, 'qualifierId': 18}, {'id':...",6u2ob6fv950r1qve8uejkq2uh,,,,,,high,"[{'playerId': '8qmm84tue6kuz8e5nhhdhmz8p', 'sh...",,,,822.28,Miss,3c3jcs7vc1t6vz5lev162jyv7,6ekdnbnk56xlxforb5owt3dn9,18.0,49.3628,-1.066902,0.5209,30557,1.0,1.099635,-0.460566,-0.324449,R,bx0cdmzr2gwr70ez72dorx82p


In [3]:
df_events[df_events['event_description']=='Ball touch']

Unnamed: 0,id,eventId,event_type_id,current_phase,period_minute,period_second,contestantId,outcome,x,y,lastModified,qualifier,playerId,lineBreakingPass.linesBroken.value,passOption.player,passTarget.player,xThreat.applied,lineBreakingPass.lastLineBroken.value,pressure.pressureReceived.value,pressure.player,xThreat.removed,keyPass,assist,timeelapsed,event_description
38,2423549973,22,61,1,1,12,bx0cdmzr2gwr70ez72dorx82p,0,63.1,23.4,2022-05-22T06:36:45Z,"[{'id': 3587586589, 'qualifierId': 21}, {'id':...",e3kdoxu1kwn2w3wwi1rqhvr9x,,,,,,high,"[{'playerId': '72d5uxwcmvhd6mzthxuvev1sl', 'sh...",,,,72.52,Ball touch
126,2423554205,58,61,1,5,25,bx0cdmzr2gwr70ez72dorx82p,0,47.1,13.7,2022-05-22T06:35:53Z,"[{'id': 3587595693, 'qualifierId': 15}, {'id':...",e3kdoxu1kwn2w3wwi1rqhvr9x,,,,,,high,"[{'playerId': '976riwm0dz0e74d4l28y3ttcl', 'sh...",,,,325.20,Ball touch
150,2423555597,76,61,1,6,30,3c3jcs7vc1t6vz5lev162jyv7,0,55.8,4.7,2022-05-22T06:35:42Z,"[{'id': 3586120557, 'qualifierId': 56, 'value'...",fvd7y3f6948713acbas7w3u2,,,,,,low,"[{'playerId': '6j0ogojh2b7poyceg7i3k09yi', 'sh...",,,,390.20,Ball touch
161,2423556435,77,61,1,7,7,bx0cdmzr2gwr70ez72dorx82p,0,73.4,54.8,2022-05-22T06:35:36Z,"[{'id': 3587600013, 'qualifierId': 388}, {'id'...",5qgc6zjc38a5xjl35gs7h3vu1,,,,,,high,"[{'playerId': '96wcx761pzv5ub4sfwsynp51x', 'sh...",,,,427.32,Ball touch
163,2423556479,79,61,1,7,8,3c3jcs7vc1t6vz5lev162jyv7,1,26.2,48.5,2022-05-22T06:35:36Z,"[{'id': 3586125289, 'qualifierId': 56, 'value'...",96wcx761pzv5ub4sfwsynp51x,,,,,,,,,,,428.08,Ball touch
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1721,2423662741,877,61,2,90,12,bx0cdmzr2gwr70ez72dorx82p,0,60.4,94.5,2022-05-22T06:19:12Z,"[{'id': 3586711543, 'qualifierId': 56, 'value'...",6u2ob6fv950r1qve8uejkq2uh,,,,,,medium,"[{'playerId': 'fvd7y3f6948713acbas7w3u2', 'shi...",,,,2712.80,Ball touch
1734,2423663399,884,61,2,90,48,bx0cdmzr2gwr70ez72dorx82p,1,61.8,83.5,2022-05-22T06:19:08Z,"[{'id': 3587613625, 'qualifierId': 178}, {'id'...",6u2ob6fv950r1qve8uejkq2uh,,,,,,,,,,,2748.92,Ball touch
1750,2423663971,891,61,2,91,26,bx0cdmzr2gwr70ez72dorx82p,0,67.1,97.9,2022-05-22T06:19:00Z,"[{'id': 3587615021, 'qualifierId': 15}, {'id':...",e3kdoxu1kwn2w3wwi1rqhvr9x,,,,,,high,"[{'playerId': '72d5uxwcmvhd6mzthxuvev1sl', 'sh...",,,,2786.60,Ball touch
1770,2423705563,819,61,2,92,29,3c3jcs7vc1t6vz5lev162jyv7,1,66.6,6.5,2022-05-22T06:18:53Z,"[{'id': 3587616867, 'qualifierId': 178}, {'id'...",7ty1wdxxnusgkl34k5raipbl5,,,,,,,,,,,2849.36,Ball touch
