## Pass study

In [1]:
import pandas as pd
import json
import os
import numpy as np

pd.options.display.max_columns = 999

In [2]:
# load tracking data
current_directory = os.getcwd()
path_tracking = os.path.join(os.path.join(os.path.dirname(current_directory),'data'),"tracking_set_0")
print(path_tracking)
game_id = 1

df_tracking = pd.read_parquet(f'{path_tracking}/{game_id}_tracking.parquet')

#           ------------------------------------------------------------        

# load events names
path_event_csv = os.path.join(os.path.dirname(current_directory),'data')
df_event_names = pd.read_csv(os.path.join(path_event_csv,'event_names.csv'))
dict_event_names = df_event_names.set_index('event_type_id').to_dict()['event_description']

# load event data
def load_event_data(file_name, base_path):
    # read in event file
    with open(f'{base_path}/{file_name}') as f:
        data=json.loads(f.read())

    f.close()
    
    # transform data into pandas dataframe
    df_events = pd.json_normalize(data['liveData']['event'])
    
    # preprocess event data and keep relevant information only

    # add timeelapsed to each event
    df_events['timestamp'] = pd.to_datetime(df_events.timeStamp).apply(lambda x: x.timestamp())

    df_events = df_events.query('periodId in [1,2]')

    def add_timeelapsed_to_events(df):
        start_time = df.query('typeId==32')['timestamp'].iloc[0]
        df['timestamp_new'] = np.int64((df['timestamp'] - start_time)*1000)

        df['timeelapsed'] = df['timestamp_new'].apply(lambda x: (40 * round(x/40))/1000)

        return df

    df_events = df_events.groupby('periodId').apply(add_timeelapsed_to_events)

    df_events = df_events.drop(columns=['timeStamp','timestamp','timestamp_new'])
    
    # rename some columns
    df_events = df_events.rename(columns=
        {
            'periodId':'current_phase',
            'typeId':'event_type_id',
            'timeMin':'period_minute',
            'timeSec':'period_second'
        }
    )
    
    return df_events

path_events = os.path.join(os.path.join(os.path.dirname(current_directory),'data'),"first_10_events")
print(path_events)

event_file = f'{game_id}.json'

df_events = load_event_data(
    base_path=path_events,
    file_name=event_file
)

# add event descriptions
df_events['event_description'] = df_events['event_type_id'].map(dict_event_names)

# make a copy of it for later usage
events_all = df_events.copy()

display(df_events.head())
print(df_events['event_description'].unique())


#           ------------------------------------------------------------        


# read in qualifier list
path_data = os.path.join(os.path.dirname(current_directory),'data')
qualifier_names = pd.read_csv(os.path.join(path_data,"qualifier_names.csv"))

# explode coverts each element in each list to a separate row
cols = ['id', 'qualifier']
qualifiers = events_all[cols].explode('qualifier')
display(qualifiers.head())

print("------------")

qualifiers = qualifiers[qualifiers.qualifier.notna()].reset_index(drop=True)
print(qualifiers.shape)
print("------------")
display(qualifiers.head())
print("------------")

# save corresponding event ids for each qualifier
event_ids = qualifiers.id.tolist()

qualifiers = pd.json_normalize(qualifiers[qualifiers.qualifier.notna()]['qualifier'])
print(qualifiers.shape)
print("------------")
display(qualifiers.head())
print("------------")

qualifiers['event_id'] = event_ids
display(qualifiers.head())
print("------------")
qualifiers = qualifiers.merge(qualifier_names, how='left', on='qualifierId')
display(qualifiers.head())

c:\Users\Gabriel\OneDrive\Escritorio\SportsAnalyticsCourse\OptaForum\OptaChallenge_Clustering_Player_Styles\data\tracking_set_0
c:\Users\Gabriel\OneDrive\Escritorio\SportsAnalyticsCourse\OptaForum\OptaChallenge_Clustering_Player_Styles\data\first_10_events


To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  df_events = df_events.groupby('periodId').apply(add_timeelapsed_to_events)


Unnamed: 0,id,eventId,event_type_id,current_phase,period_minute,period_second,contestantId,outcome,x,y,lastModified,qualifier,playerId,lineBreakingPass.linesBroken.value,passOption.player,passTarget.player,xThreat.applied,lineBreakingPass.lastLineBroken.value,pressure.pressureReceived.value,pressure.player,xThreat.removed,keyPass,assist,timeelapsed,event_description
2,2423549045,2,32,1,0,0,3c3jcs7vc1t6vz5lev162jyv7,1,0.0,0.0,2022-05-22T03:17:52Z,"[{'id': 3586084711, 'qualifierId': 127, 'value...",,,,,,,,,,,,0.0,Period start
3,2423549041,2,32,1,0,0,bx0cdmzr2gwr70ez72dorx82p,1,0.0,0.0,2022-05-21T18:59:34Z,"[{'id': 3586084701, 'qualifierId': 127, 'value...",,,,,,,,,,,,0.0,Period start
4,2423549063,3,1,1,0,0,bx0cdmzr2gwr70ez72dorx82p,1,49.9,50.0,2022-05-22T03:34:41Z,"[{'id': 3586084825, 'qualifierId': 56, 'value'...",6u2ob6fv950r1qve8uejkq2uh,,,,,,,,,,,0.04,Pass
5,2423549097,4,1,1,0,2,bx0cdmzr2gwr70ez72dorx82p,1,31.5,57.2,2022-05-22T06:37:07Z,"[{'id': 3586085043, 'qualifierId': 213, 'value...",azuc3tma44xyrbgf5y279o1xx,0.0,"[{'playerId': 'e3kdoxu1kwn2w3wwi1rqhvr9x', 'sh...","[{'playerId': '7sep6mx2s67mh5fr3raxu7aei', 'sh...",0.0029771626,,,,,,,2.84,Pass
6,2423549113,5,1,1,0,7,bx0cdmzr2gwr70ez72dorx82p,1,49.2,95.4,2022-05-22T06:37:06Z,"[{'id': 3586085129, 'qualifierId': 212, 'value...",7sep6mx2s67mh5fr3raxu7aei,1.0,"[{'playerId': '5qgc6zjc38a5xjl35gs7h3vu1', 'sh...","[{'playerId': 'e3kdoxu1kwn2w3wwi1rqhvr9x', 'sh...",0.0309752524,secondToLast,high,"[{'playerId': 'e6ok0deqkoe80184iu509gzu2', 'sh...",,,,7.88,Pass


['Period start' 'Pass' 'Take On' 'Challenge' 'Blocked Pass'
 'Ball recovery' 'Attempted Tackle' 'Out' 'Ball touch' '50/50'
 'Dispossessed' 'Tackle' 'Corner Awarded' 'Clearance' 'Offside Pass'
 'Offside provoked' 'Foul' 'Aerial' 'Keeper pick-up' 'Deleted event'
 'Interception' 'Error' 'Goal' 'Attempt Saved' 'Save' 'Miss' 'Claim'
 'Card' 'Start delay' 'End delay' 'Referee Drop Ball' nan 'End'
 'Player Off' 'Player on' 'Formation change' 'Keeper Sweeper'
 'Shield ball opp']


Unnamed: 0,id,qualifier
2,2423549045,"{'id': 3586084711, 'qualifierId': 127, 'value'..."
3,2423549041,"{'id': 3586084701, 'qualifierId': 127, 'value'..."
4,2423549063,"{'id': 3586084825, 'qualifierId': 56, 'value':..."
4,2423549063,"{'id': 3586084833, 'qualifierId': 213, 'value'..."
4,2423549063,"{'id': 3586084827, 'qualifierId': 140, 'value'..."


------------
(9430, 2)
------------


Unnamed: 0,id,qualifier
0,2423549045,"{'id': 3586084711, 'qualifierId': 127, 'value'..."
1,2423549041,"{'id': 3586084701, 'qualifierId': 127, 'value'..."
2,2423549063,"{'id': 3586084825, 'qualifierId': 56, 'value':..."
3,2423549063,"{'id': 3586084833, 'qualifierId': 213, 'value'..."
4,2423549063,"{'id': 3586084827, 'qualifierId': 140, 'value'..."


------------
(9430, 3)
------------


Unnamed: 0,id,qualifierId,value
0,3586084711,127,Right to Left
1,3586084701,127,Left to Right
2,3586084825,56,Back
3,3586084833,213,2.7
4,3586084827,140,28.5


------------


Unnamed: 0,id,qualifierId,value,event_id
0,3586084711,127,Right to Left,2423549045
1,3586084701,127,Left to Right,2423549041
2,3586084825,56,Back,2423549063
3,3586084833,213,2.7,2423549063
4,3586084827,140,28.5,2423549063


------------


Unnamed: 0,id,qualifierId,value,event_id,qualifier
0,3586084711,127,Right to Left,2423549045,Direction of Play
1,3586084701,127,Left to Right,2423549041,Direction of Play
2,3586084825,56,Back,2423549063,Zone
3,3586084833,213,2.7,2423549063,Angle
4,3586084827,140,28.5,2423549063,Pass End X


In [67]:
df_events.head(1)

Unnamed: 0,id,eventId,event_type_id,current_phase,period_minute,period_second,contestantId,outcome,x,y,lastModified,qualifier,playerId,lineBreakingPass.linesBroken.value,passOption.player,passTarget.player,xThreat.applied,lineBreakingPass.lastLineBroken.value,pressure.pressureReceived.value,pressure.player,xThreat.removed,keyPass,assist,timeelapsed,event_description
2,2423549045,2,32,1,0,0,3c3jcs7vc1t6vz5lev162jyv7,1,0.0,0.0,2022-05-22T03:17:52Z,"[{'id': 3586084711, 'qualifierId': 127, 'value...",,,,,,,,,,,,0.0,Period start


Anem a agafar el expected completion i anem a fer l'avg per cada jugador per veure qué de dificil son els seus pases en general i després a l'outcome li restarem la probabilitat de cada pase (expected completion) per veure si aquest jugador es bo fent pases.

- Expected Pass Completion (xP) - show a player's pass difficulty selection
- Passes Completed Above Expected per pass (PAx per pass) - show how good a player is at completing the passes they attempt given their difficulty. El per pass es per normalitzar-ho

Get the passes with predictions

In [8]:
is_pass_event = df_events.event_type_id == 1
is_without_predictions = df_events['passOption.player'].isna()
remove_passes_without_predictions = df_events['passOption.player'].notna()
remove_passes_without_target_predictions = df_events['passTarget.player'].notna()

pass_events = df_events.loc[is_pass_event & remove_passes_without_predictions & remove_passes_without_target_predictions]

In [9]:
expected_pass = pass_events[['id','outcome','playerId','passOption.player','passTarget.player','xThreat.applied','timeelapsed']]
expected_pass

Unnamed: 0,id,outcome,playerId,passOption.player,passTarget.player,xThreat.applied,timeelapsed
5,2423549097,1,azuc3tma44xyrbgf5y279o1xx,"[{'playerId': 'e3kdoxu1kwn2w3wwi1rqhvr9x', 'sh...","[{'playerId': '7sep6mx2s67mh5fr3raxu7aei', 'sh...",0.0029771626,2.84
6,2423549113,1,7sep6mx2s67mh5fr3raxu7aei,"[{'playerId': '5qgc6zjc38a5xjl35gs7h3vu1', 'sh...","[{'playerId': 'e3kdoxu1kwn2w3wwi1rqhvr9x', 'sh...",0.0309752524,7.88
7,2423549127,1,e3kdoxu1kwn2w3wwi1rqhvr9x,"[{'playerId': '7cp51c8zn7y08iyk0hc9ix5nt', 'sh...","[{'playerId': '6u2ob6fv950r1qve8uejkq2uh', 'sh...",0.0338825583,9.16
10,2423549153,1,6u2ob6fv950r1qve8uejkq2uh,"[{'playerId': '5qgc6zjc38a5xjl35gs7h3vu1', 'sh...","[{'playerId': '6j0ogojh2b7poyceg7i3k09yi', 'sh...",0.0111802518,11.56
11,2423549155,0,6j0ogojh2b7poyceg7i3k09yi,"[{'playerId': '7sep6mx2s67mh5fr3raxu7aei', 'sh...","[{'playerId': '7sep6mx2s67mh5fr3raxu7aei', 'sh...",,12.92
...,...,...,...,...,...,...,...
1769,2423665113,0,bvbebtykj45j3luvemk8yc4ph,"[{'playerId': '7k0r5crdh9blj3edt31zwy0dm', 'sh...","[{'playerId': 'e3kdoxu1kwn2w3wwi1rqhvr9x', 'sh...",,2849.16
1774,2423665291,1,6ekdnbnk56xlxforb5owt3dn9,"[{'playerId': '3sc349yey596xp2j6xlyt0frp', 'sh...","[{'playerId': 'ccu7hw3wrcspl1a18g2ldnsh5', 'sh...",0.0080752969,2863.04
1777,2423665341,0,ccu7hw3wrcspl1a18g2ldnsh5,"[{'playerId': '976riwm0dz0e74d4l28y3ttcl', 'sh...","[{'playerId': '7ty1wdxxnusgkl34k5raipbl5', 'sh...",,2865.84
1779,2423665377,1,8qmm84tue6kuz8e5nhhdhmz8p,"[{'playerId': 'ccu7hw3wrcspl1a18g2ldnsh5', 'sh...","[{'playerId': 'fvd7y3f6948713acbas7w3u2', 'shi...",0.0200788379,2868.88


avg xP per player

In [10]:
pass_options = expected_pass[['id','playerId','outcome','passTarget.player']].explode('passTarget.player')
display(pass_options.shape)
pass_options_flat = pd.json_normalize(pass_options['passTarget.player'])

# save corresponding event ids for each qualifier
event_ids = pass_options[pass_options['passTarget.player'].notna()].id.tolist()
pass_options_flat['event_id'] = event_ids
pass_options = pass_options.rename(columns={'playerId': 'player_id'})

pass_options = pd.merge(pass_options,pass_options_flat, left_on = 'id', right_on = 'event_id')

display(pass_options)
pass_options['predictions.expectedPassCompletion.value'] = pd.to_numeric(pass_options['predictions.expectedPassCompletion.value'])
xP_player = pass_options.groupby('player_id')['predictions.expectedPassCompletion.value'].mean().reset_index(name='xP')
display(xP_player)

(931, 4)

Unnamed: 0,id,player_id,outcome,passTarget.player,playerId,shirtNumber,predictions.expectedPassReceiver.value,predictions.expectedPassCompletion.value,predictions.expectedThreat.value,predictions.passOptionQuality.value,event_id
0,2423549097,azuc3tma44xyrbgf5y279o1xx,1,"{'playerId': '7sep6mx2s67mh5fr3raxu7aei', 'shi...",7sep6mx2s67mh5fr3raxu7aei,13,0.9812835455,0.995218277,0.0029771626,-0.189,2423549097
1,2423549113,7sep6mx2s67mh5fr3raxu7aei,1,"{'playerId': 'e3kdoxu1kwn2w3wwi1rqhvr9x', 'shi...",e3kdoxu1kwn2w3wwi1rqhvr9x,17,0.8744962215,0.5744152069,0.0309752524,-0.061,2423549113
2,2423549127,e3kdoxu1kwn2w3wwi1rqhvr9x,1,"{'playerId': '6u2ob6fv950r1qve8uejkq2uh', 'shi...",6u2ob6fv950r1qve8uejkq2uh,1,0.9626127481,0.697702527,0.0338825583,0.065,2423549127
3,2423549153,6u2ob6fv950r1qve8uejkq2uh,1,"{'playerId': '6j0ogojh2b7poyceg7i3k09yi', 'shi...",6j0ogojh2b7poyceg7i3k09yi,22,0.3720048666,0.4653860033,0.0111802518,-0.424,2423549153
4,2423549155,6j0ogojh2b7poyceg7i3k09yi,0,"{'playerId': '7sep6mx2s67mh5fr3raxu7aei', 'shi...",7sep6mx2s67mh5fr3raxu7aei,13,0.2266225219,0.7828329802,0.0053730607,-0.296,2423549155
...,...,...,...,...,...,...,...,...,...,...,...
926,2423665113,bvbebtykj45j3luvemk8yc4ph,0,"{'playerId': 'e3kdoxu1kwn2w3wwi1rqhvr9x', 'shi...",e3kdoxu1kwn2w3wwi1rqhvr9x,17,0.7923606038,0.5883867145,0.0174834132,-0.249,2423665113
927,2423665291,6ekdnbnk56xlxforb5owt3dn9,1,"{'playerId': 'ccu7hw3wrcspl1a18g2ldnsh5', 'shi...",ccu7hw3wrcspl1a18g2ldnsh5,13,0.8176244497,0.5313349962,0.0080752969,-0.425,2423665291
928,2423665341,ccu7hw3wrcspl1a18g2ldnsh5,0,"{'playerId': '7ty1wdxxnusgkl34k5raipbl5', 'shi...",7ty1wdxxnusgkl34k5raipbl5,19,0.6607499123,0.3737411499,0.0542605221,0.146,2423665341
929,2423665377,8qmm84tue6kuz8e5nhhdhmz8p,1,"{'playerId': 'fvd7y3f6948713acbas7w3u2', 'shir...",fvd7y3f6948713acbas7w3u2,21,0.8954467177,0.9761477709,0.0200788379,0.049,2423665377


Unnamed: 0,player_id,xP
0,2lvit204llltk13iglsa2tjah,0.363136
1,3sc349yey596xp2j6xlyt0frp,0.861019
2,3vx94h32ahujciraspdayj9t6,0.84572
3,4u281v53ges3kimtgac0tidm2,0.865266
4,5ak9fwtqlr2pll0nsv5br7p7u,0.585973
5,5qgc6zjc38a5xjl35gs7h3vu1,0.728652
6,6ekdnbnk56xlxforb5owt3dn9,0.854137
7,6j0ogojh2b7poyceg7i3k09yi,0.889823
8,6u2ob6fv950r1qve8uejkq2uh,0.824437
9,72d5uxwcmvhd6mzthxuvev1sl,0.895574


PAx per pass or per 100 passes

In [11]:
PAx = pass_options.groupby('player_id').agg(total_completed_passes = ('outcome','sum'),
                                      total_xp = ('predictions.expectedPassCompletion.value','sum'),
                                      total_attempted = ('outcome','count')).reset_index()

PAx['PAx per pass'] = (PAx['total_completed_passes'] - PAx['total_xp']) / PAx['total_attempted']
PAx['PAx per 100 passes'] = PAx['PAx per pass']*100
display(PAx)

Unnamed: 0,player_id,total_completed_passes,total_xp,total_attempted,PAx per pass,PAx per 100 passes
0,2lvit204llltk13iglsa2tjah,0,0.363136,1,-0.363136,-36.313611
1,3sc349yey596xp2j6xlyt0frp,37,39.606883,46,-0.056671,-5.667137
2,3vx94h32ahujciraspdayj9t6,16,15.222958,18,0.043169,4.316901
3,4u281v53ges3kimtgac0tidm2,38,39.802239,46,-0.039179,-3.91791
4,5ak9fwtqlr2pll0nsv5br7p7u,10,9.96154,17,0.002262,0.226234
5,5qgc6zjc38a5xjl35gs7h3vu1,22,21.859553,30,0.004682,0.468157
6,6ekdnbnk56xlxforb5owt3dn9,28,28.186531,33,-0.005652,-0.565244
7,6j0ogojh2b7poyceg7i3k09yi,56,55.169015,62,0.013403,1.340298
8,6u2ob6fv950r1qve8uejkq2uh,47,46.168449,56,0.014849,1.484913
9,72d5uxwcmvhd6mzthxuvev1sl,33,31.345093,35,0.047283,4.728305


- Desglosar el playerOption.
- Quedarme amb els playerOptions que siguin good oportunity (expected receiver > un limit definit i xp > limit i expected threat > upper quantile)
- Ordenar-ho per threat i els 2 primers son good oportunities. 
- Veure si el target és un d'aquests dos, si ho és, good oportunity taken. Sinó, no.
- Analitzar per cada good oportunity taken, el seu Pax per pass.

In [72]:
pass_options = pass_options.rename(columns={'predictions.expectedThreat.value': 'xT_target'})
pass_options['xT_target'] = pd.to_numeric(pass_options['xT_target'])

xT_upper_quartile = pass_options.xT_target.quantile(0.75)
print(xT_upper_quartile)

0.048556879149999996


In [73]:
expected_pass = pass_events[['id','outcome','playerId','passOption.player','passTarget.player','xThreat.applied','timeelapsed']]

display(expected_pass['passOption.player'].values)

for index,row in expected_pass.iterrows():
   OptionPlayer = row['passOption.player']
   resultados_filtrados = [d for d in OptionPlayer if (float(d['predictions']['expectedPassCompletion']['value']) > 0.65)
                           & (float(d['predictions']['expectedPassReceiver']['value']) > 0.65) & (float(d['predictions']['expectedThreat']['value']) >= xT_upper_quartile)]
   data_ordenada = sorted(resultados_filtrados, key=lambda x: float(x['predictions']['expectedThreat']['value']))
   if len(data_ordenada)>2:
      data_ordenada = data_ordenada[:2]
   expected_pass.at[index, 'passOption.player'] = data_ordenada


pass_options = expected_pass[['id','playerId','outcome','passOption.player','passTarget.player']].explode('passTarget.player')
display(pass_options.shape)
pass_options_flat = pd.json_normalize(pass_options['passTarget.player'])

# save corresponding event ids for each qualifier
event_ids = pass_options[pass_options['passTarget.player'].notna()].id.tolist()
pass_options_flat['event_id'] = event_ids
pass_options = pass_options.rename(columns={'playerId': 'player_id'})

pass_options = pd.merge(pass_options,pass_options_flat, left_on = 'id', right_on = 'event_id')

display(pass_options)

# if player_id del target esta dins del passOption.player vol dir que good oportunity taken

for index,row in pass_options.iterrows():
   OptionPlayer = row['passOption.player']
   optionsplayers = [d['playerId'] for d in OptionPlayer]
   if row['playerId'] in optionsplayers:
      pass_options.at[index, 'good_opo_taken'] = 1
   else:
      pass_options.at[index, 'good_opo_taken'] = 0

display(pass_options.head(10))
    

array([list([{'playerId': 'e3kdoxu1kwn2w3wwi1rqhvr9x', 'shirtNumber': 17, 'predictions': {'expectedPassReceiver': {'value': '0.6592077613'}, 'expectedPassCompletion': {'value': '0.3655609488'}, 'expectedThreat': {'value': '0.0272596478'}, 'passOptionQuality': {'value': '-0.255'}}}, {'playerId': 'e7e68wlpiqqohpg71oh4vrbl6', 'shirtNumber': 11, 'predictions': {'expectedPassReceiver': {'value': '0.0037940145'}, 'expectedPassCompletion': {'value': '0.2479383051'}, 'expectedThreat': {'value': '0.0191572905'}, 'passOptionQuality': {'value': '-0.453'}}}, {'playerId': 'vja0xo3xiuax8eh0b6q3y09', 'shirtNumber': 4, 'predictions': {'expectedPassReceiver': {'value': '0.002256453'}, 'expectedPassCompletion': {'value': '0.4868352115'}, 'expectedThreat': {'value': '0.0069071949'}, 'passOptionQuality': {'value': '-0.472'}}}, {'playerId': '7cp51c8zn7y08iyk0hc9ix5nt', 'shirtNumber': 5, 'predictions': {'expectedPassReceiver': {'value': '0.0033909082'}, 'expectedPassCompletion': {'value': '0.9509418011'}, '

(931, 5)

Unnamed: 0,id,player_id,outcome,passOption.player,passTarget.player,playerId,shirtNumber,predictions.expectedPassReceiver.value,predictions.expectedPassCompletion.value,predictions.expectedThreat.value,predictions.passOptionQuality.value,event_id
0,2423549097,azuc3tma44xyrbgf5y279o1xx,1,[],"{'playerId': '7sep6mx2s67mh5fr3raxu7aei', 'shi...",7sep6mx2s67mh5fr3raxu7aei,13,0.9812835455,0.995218277,0.0029771626,-0.189,2423549097
1,2423549113,7sep6mx2s67mh5fr3raxu7aei,1,[],"{'playerId': 'e3kdoxu1kwn2w3wwi1rqhvr9x', 'shi...",e3kdoxu1kwn2w3wwi1rqhvr9x,17,0.8744962215,0.5744152069,0.0309752524,-0.061,2423549113
2,2423549127,e3kdoxu1kwn2w3wwi1rqhvr9x,1,[],"{'playerId': '6u2ob6fv950r1qve8uejkq2uh', 'shi...",6u2ob6fv950r1qve8uejkq2uh,1,0.9626127481,0.697702527,0.0338825583,0.065,2423549127
3,2423549153,6u2ob6fv950r1qve8uejkq2uh,1,[],"{'playerId': '6j0ogojh2b7poyceg7i3k09yi', 'shi...",6j0ogojh2b7poyceg7i3k09yi,22,0.3720048666,0.4653860033,0.0111802518,-0.424,2423549153
4,2423549155,6j0ogojh2b7poyceg7i3k09yi,0,[],"{'playerId': '7sep6mx2s67mh5fr3raxu7aei', 'shi...",7sep6mx2s67mh5fr3raxu7aei,13,0.2266225219,0.7828329802,0.0053730607,-0.296,2423549155
...,...,...,...,...,...,...,...,...,...,...,...,...
926,2423665113,bvbebtykj45j3luvemk8yc4ph,0,[],"{'playerId': 'e3kdoxu1kwn2w3wwi1rqhvr9x', 'shi...",e3kdoxu1kwn2w3wwi1rqhvr9x,17,0.7923606038,0.5883867145,0.0174834132,-0.249,2423665113
927,2423665291,6ekdnbnk56xlxforb5owt3dn9,1,[],"{'playerId': 'ccu7hw3wrcspl1a18g2ldnsh5', 'shi...",ccu7hw3wrcspl1a18g2ldnsh5,13,0.8176244497,0.5313349962,0.0080752969,-0.425,2423665291
928,2423665341,ccu7hw3wrcspl1a18g2ldnsh5,0,[],"{'playerId': '7ty1wdxxnusgkl34k5raipbl5', 'shi...",7ty1wdxxnusgkl34k5raipbl5,19,0.6607499123,0.3737411499,0.0542605221,0.146,2423665341
929,2423665377,8qmm84tue6kuz8e5nhhdhmz8p,1,[],"{'playerId': 'fvd7y3f6948713acbas7w3u2', 'shir...",fvd7y3f6948713acbas7w3u2,21,0.8954467177,0.9761477709,0.0200788379,0.049,2423665377


Unnamed: 0,id,player_id,outcome,passOption.player,passTarget.player,playerId,shirtNumber,predictions.expectedPassReceiver.value,predictions.expectedPassCompletion.value,predictions.expectedThreat.value,predictions.passOptionQuality.value,event_id,good_opo_taken
0,2423549097,azuc3tma44xyrbgf5y279o1xx,1,[],"{'playerId': '7sep6mx2s67mh5fr3raxu7aei', 'shi...",7sep6mx2s67mh5fr3raxu7aei,13,0.9812835455,0.995218277,0.0029771626,-0.189,2423549097,0.0
1,2423549113,7sep6mx2s67mh5fr3raxu7aei,1,[],"{'playerId': 'e3kdoxu1kwn2w3wwi1rqhvr9x', 'shi...",e3kdoxu1kwn2w3wwi1rqhvr9x,17,0.8744962215,0.5744152069,0.0309752524,-0.061,2423549113,0.0
2,2423549127,e3kdoxu1kwn2w3wwi1rqhvr9x,1,[],"{'playerId': '6u2ob6fv950r1qve8uejkq2uh', 'shi...",6u2ob6fv950r1qve8uejkq2uh,1,0.9626127481,0.697702527,0.0338825583,0.065,2423549127,0.0
3,2423549153,6u2ob6fv950r1qve8uejkq2uh,1,[],"{'playerId': '6j0ogojh2b7poyceg7i3k09yi', 'shi...",6j0ogojh2b7poyceg7i3k09yi,22,0.3720048666,0.4653860033,0.0111802518,-0.424,2423549153,0.0
4,2423549155,6j0ogojh2b7poyceg7i3k09yi,0,[],"{'playerId': '7sep6mx2s67mh5fr3raxu7aei', 'shi...",7sep6mx2s67mh5fr3raxu7aei,13,0.2266225219,0.7828329802,0.0053730607,-0.296,2423549155,0.0
5,2423549181,vja0xo3xiuax8eh0b6q3y09,1,[],"{'playerId': '7cp51c8zn7y08iyk0hc9ix5nt', 'shi...",7cp51c8zn7y08iyk0hc9ix5nt,5,0.9018160105,0.964581728,0.0091148615,-0.119,2423549181,0.0
6,2423549197,7cp51c8zn7y08iyk0hc9ix5nt,1,[],"{'playerId': 'azuc3tma44xyrbgf5y279o1xx', 'shi...",azuc3tma44xyrbgf5y279o1xx,16,0.7530357838,0.9972851872,0.0002713203,-0.227,2423549197,0.0
7,2423549219,azuc3tma44xyrbgf5y279o1xx,1,[],"{'playerId': '6j0ogojh2b7poyceg7i3k09yi', 'shi...",6j0ogojh2b7poyceg7i3k09yi,22,0.9635518789,0.9978380203,0.0027582049,-0.19,2423549219,0.0
8,2423549235,6j0ogojh2b7poyceg7i3k09yi,1,[],"{'playerId': 'azuc3tma44xyrbgf5y279o1xx', 'shi...",azuc3tma44xyrbgf5y279o1xx,16,0.9203734398,0.9768826962,7.33733e-05,-0.243,2423549235,0.0
9,2423549257,azuc3tma44xyrbgf5y279o1xx,1,[],"{'playerId': 'e7e68wlpiqqohpg71oh4vrbl6', 'shi...",e7e68wlpiqqohpg71oh4vrbl6,11,0.9006440639,0.6596499085,0.0055874586,-0.375,2423549257,0.0


In [74]:
good_oportunities = pass_options[pass_options['good_opo_taken']==1]
good_oportunities['predictions.expectedPassCompletion.value'] = pd.to_numeric(good_oportunities['predictions.expectedPassCompletion.value'])

PAx_go = good_oportunities.groupby('player_id').agg(total_completed_passes = ('outcome','sum'),
                                      total_xp = ('predictions.expectedPassCompletion.value','sum'),
                                      total_attempted = ('outcome','count')).reset_index()

PAx_go['PAx per pass'] = (PAx_go['total_completed_passes'] - PAx_go['total_xp']) / PAx_go['total_attempted']
PAx_go['PAx per 100 passes'] = PAx_go['PAx per pass']*100
display(PAx_go)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  good_oportunities['predictions.expectedPassCompletion.value'] = pd.to_numeric(good_oportunities['predictions.expectedPassCompletion.value'])


Unnamed: 0,player_id,total_completed_passes,total_xp,total_attempted,PAx per pass,PAx per 100 passes
0,3sc349yey596xp2j6xlyt0frp,2,1.892772,2,0.053614,5.361405
1,3vx94h32ahujciraspdayj9t6,2,1.680673,2,0.159664,15.966374
2,4u281v53ges3kimtgac0tidm2,1,1.571788,2,-0.285894,-28.589413
3,5ak9fwtqlr2pll0nsv5br7p7u,3,3.366187,4,-0.091547,-9.15468
4,5qgc6zjc38a5xjl35gs7h3vu1,14,12.528931,14,0.105076,10.507636
5,6j0ogojh2b7poyceg7i3k09yi,13,13.137321,14,-0.009809,-0.980863
6,6u2ob6fv950r1qve8uejkq2uh,21,20.092914,22,0.041231,4.123117
7,72d5uxwcmvhd6mzthxuvev1sl,1,0.991905,1,0.008095,0.809485
8,7cp51c8zn7y08iyk0hc9ix5nt,1,2.631052,3,-0.543684,-54.368391
9,7sep6mx2s67mh5fr3raxu7aei,8,8.030122,9,-0.003347,-0.334687


Let's do a plot!

In [12]:
import matplotlib.pyplot as plt

PAx['is_positive'] = PAx['PAx per 100 passes'] > 0
PAx = PAx.sort_values(by='PAx per 100 passes',ascending=False)

PAx = PAx[PAx['total_attempted']>15]
display(PAx)

df = pd.concat([PAx.head(3), PAx.tail(3)])
df.to_csv('PAxper100passes.csv', index=False)


Unnamed: 0,player_id,total_completed_passes,total_xp,total_attempted,PAx per pass,PAx per 100 passes,is_positive
14,8gkexxgf3pypshhqwg6ibp7o4,30,27.301367,33,0.081777,8.177675,True
9,72d5uxwcmvhd6mzthxuvev1sl,33,31.345093,35,0.047283,4.728305,True
2,3vx94h32ahujciraspdayj9t6,16,15.222958,18,0.043169,4.316901,True
16,96wcx761pzv5ub4sfwsynp51x,47,45.08505,53,0.036131,3.613112,True
17,976riwm0dz0e74d4l28y3ttcl,44,42.414043,53,0.029924,2.992373,True
12,7sep6mx2s67mh5fr3raxu7aei,30,28.839349,39,0.02976,2.976029,True
29,vja0xo3xiuax8eh0b6q3y09,40,38.829841,42,0.027861,2.786094,True
26,e6ok0deqkoe80184iu509gzu2,22,21.53277,29,0.016111,1.611138,True
8,6u2ob6fv950r1qve8uejkq2uh,47,46.168449,56,0.014849,1.484913,True
7,6j0ogojh2b7poyceg7i3k09yi,56,55.169015,62,0.013403,1.340298,True
