In [266]:
import pandas as pd
from pandas.io.json import json_normalize
import datetime

# Load data

In [282]:
df = pd.read_json('data/figshare/match/matches_England.json')
df_events = pd.read_json('data/figshare/events/events_England.json')
df_teams = pd.read_json('data/figshare/teams.json')
df_players = pd.read_json('data/figshare/players.json')

# Data Prep

In [283]:
# Work out the forward distance of each pass
def difference_in_x(row):
    if row['eventName'] == 'Pass':
        return row['positions'][1]['x'] - row['positions'][0]['x']
    else: 
        return 0
    
df_events['percent_x_gain']=df_events.apply(difference_in_x, axis=1)


In [284]:
# Expand the nested json data for players here
df_players = pd.concat([df_players, json_normalize(df_players['role'])], axis=1, sort=False)
df_players = df_players[['shortName','code3','wyId','birthDate','height','weight','foot']]

# Explore One Match 

- Liverpool v Brighton
- May 13th 2018

In [295]:
class Match():
    
    '''
    A class to store a single match. 
    Intention is to make it a little easier to navigate analysing one match.
    
    inputs
    - the match id in question
    - all original datasets
    - extracts only the data relating to this specific match. 
    '''
    
    def __init__(self, match_id, df_matches, df_events, df_teams,df_players):
        self.match_id = match_id
        self.match = df_matches[df_matches['wyId'] == match_id].reset_index()
        self.events = df_events[df_events['matchId'] == match_id].reset_index()
        print('Number of events in this match is {}'.format(len(self.events)))
        
        # Easiest to use the label (names of teams) to get home and away team
        self.home_team = self.match['label'][0].split(',')[0].split(' - ')[0] 
        self.away_team = self.match['label'][0].split(',')[0].split(' - ')[1] 
        
        self.homeid = df_teams[df_teams['name'] ==self.home_team]['wyId'].reset_index()['wyId'][0]
        self.awayid = df_teams[df_teams['name'] ==self.away_team]['wyId'].reset_index()['wyId'][0]
        
        self.team_data = json_normalize(self.match['teamsData'][0])
        
        # Create lineups for each team
        self.home_lineup = json_normalize(teams_data[str(self.homeid) + '.formation.lineup'][0])
        self.away_lineup = json_normalize(teams_data[str(self.awayid) + '.formation.lineup'][0])
        
        # Join with the player data so we have their names and characteristics.
        self.home_lineup = self.home_lineup .merge(df_players, how='left', left_on='playerId', right_on='wyId')
        self.away_lineup = self.away_lineup.merge(df_players, how='left', left_on='playerId', right_on='wyId')
        
        # Calculate the net % distance made up the pitch
        grouped_forward_distance = self.events.groupby('playerId', as_index=False).agg({'percent_x_gain':'sum'})
        self.home_lineup = self.home_lineup.merge(grouped_forward_distance, how='left', left_on ='playerId', right_on='playerId')
        self.away_lineup = self.away_lineup.merge(grouped_forward_distance, how='left', left_on ='playerId', right_on='playerId')
        
     

In [296]:
example_game = Match(2500092, df, df_events, df_teams, df_players)

Number of events in this match is 1703


In [298]:
example_game.away_lineup

Unnamed: 0,goals,ownGoals,playerId,redCards,yellowCards,shortName,code3,wyId,birthDate,height,weight,foot,percent_x_gain
0,0.0,2,120,0,0,J. Locadia,FWD,120,1993-11-07,185,78,right,8
1,0.0,2,61961,0,0,B. Kayal,MID,61961,1988-05-02,178,74,right,-23
2,0.0,2,8242,0,0,S. Duffy,DEF,8242,1992-01-01,193,76,right,82
3,,0,61390,0,0,M. Ryan,GKP,61390,1992-04-08,184,82,right,761
4,,0,9097,0,0,L. Dunk,DEF,9097,1991-11-21,192,88,right,225
5,,0,8086,0,0,D. Stephens,MID,8086,1989-12-12,185,71,right,287
6,,0,26495,0,0,G. Bong,DEF,26495,1988-04-25,187,74,left,362
7,,0,25950,0,0,A. Knockaert,MID,25950,1991-11-20,172,69,left,-17
8,,0,247248,0,0,S. March,MID,247248,1994-07-20,180,72,left,131
9,,0,466,0,0,D. Pr\u00f6pper,MID,466,1991-09-02,185,72,right,251


In [299]:
example_game.home_lineup

Unnamed: 0,goals,ownGoals,playerId,redCards,yellowCards,shortName,code3,wyId,birthDate,height,weight,foot,percent_x_gain
0,0.0,2,25747,0,0,S. Man\u00e9,FWD,25747,1992-04-10,175,75,right,-54
1,0.0,2,15808,0,0,Roberto Firmino,FWD,15808,1991-10-02,181,76,right,161
2,1.0,2,120353,0,0,Mohamed Salah,FWD,120353,1992-06-15,175,72,left,-34
3,1.0,2,25393,0,0,D. Lovren,DEF,25393,1989-07-05,188,84,right,558
4,1.0,2,286831,0,0,D. Solanke,FWD,286831,1997-09-14,185,80,right,10
5,,0,346101,0,0,T. Alexander-Arnold,DEF,346101,1998-10-07,175,69,right,244
6,1.0,2,222220,0,0,A. Robertson,DEF,222220,1994-03-11,178,63,left,89
7,,0,116,0,0,G. Wijnaldum,MID,116,1990-11-11,175,74,right,147
8,,0,7964,0,0,J. Henderson,MID,7964,1990-06-17,182,67,right,659
9,,0,15175,0,0,L. Karius,GKP,15175,1993-06-22,190,87,right,433
