In [1]:
import json
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import copy

In [2]:
from utils_football import *

In [3]:
#loading dataset with events for Spanish league
with open("events/events_Spain.json") as f:
    data_spain = json.load(f)

In [4]:
# loading common files
with open("players.json") as f:
    players = json.load(f)
with open("teams.json") as f:
    teams = json.load(f) 

In [5]:
#loading tag and event names csv files
tags_name = pd.read_csv("tags2name.csv")
events_name = pd.read_csv("eventid2name.csv")

In [6]:
#processing events_name df. 
events_label = events_name.groupby(['event','event_label']).size().reset_index()
events_label = events_label.loc[:,['event', 'event_label']]
events_label.set_index("event", inplace = True)

In [7]:
#processing teams_df: processing and making sure the names appear correctly as initially some of the names were not properly showing.  
teams_obeject = utils(teams)
teams_df = teams_obeject.creat_df()
teams_df.set_index("wyId", inplace = True)
teams_df = teams_df.rename(columns = {'name': 'team'})
teams_df['team'] = [item.encode().decode('unicode_escape') for item in list(teams_df['team'])]  

In [8]:
#processing teams_df: processing and making sure the names appear correctly as initially some of the names were not properly showing.  
players_obeject = utils(players)
players_df = players_obeject.creat_df()
players_df.set_index("wyId", inplace = True)
players_df.rename(columns = {"shortName": "player"}, inplace = True)
players_df['player'] = [item.encode().decode('unicode_escape') for item in list(players_df['player'])] 

In [10]:
#Processing the event dataset to produce a usable dataframe
spain_object = events(data_spain, 'Spain', tags_name, events_label)
spain_event_data = spain_object.process(teams_df, players_df, output = True) #output is set to be True so we can save it into a separate object in case we want to work with it separately.

In [54]:
#querying for team level data. Different sets of queries produce sets of dataframes that are stored into a list.
by = 'team'
tags = ['Goal', 'assist', 'keyPass', 'opportunity', 'counter_attack']; events = ['Pass', 'Shot', 'Free Kick']
spain_object.query_eventsub_tag(tags, events = events, subevents = None, by = by, output = False, combine = True)
tags = ['lost', 'won', 'sliding_tackle']; events = ['Duel']
spain_object.query_eventsub_tag(tags, events = events, subevents = None, by = by, output = False, combine = False)
tags = ['accurate']; subevents = ['Corner']
spain_object.query_eventsub_tag(tags, events = None, subevents = subevents, by = by, output = False, combine = False)
spain_object.query_eventOrSub(['Air duel', 'Cross', 'Free Kick'], output = False, level = 'subevent')

In [55]:
#create a combined dataframe from the list generated in the previous step
spain_team_data = spain_object.concat_df() 

In [56]:
spain_team_data.head(5)

Unnamed: 0_level_0,total_assist,total_opportunity,total_keyPass,total_counter_attack,total_Goal,Duel_lost,Duel_won,Duel_sliding_tackle,Corner_accurate,total_Air duel,total_Cross,total_Free Kick
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Athletic Club,24,306,154,222,39,3447,3428,223,121,1803,720,462
Atlético Madrid,35,261,82,371,57,3500,3498,217,116,1529,480,415
Barcelona,67,469,167,328,96,2694,3305,185,158,889,447,536
Celta de Vigo,37,315,115,412,58,3166,3417,147,116,1522,605,535
Deportivo Alavés,27,238,84,231,40,3478,3311,235,77,1706,481,452


In [64]:
#need to clear the query list before start querying for player. This needs to be done whenever query changes from 'team' to 'player' or vice versa
spain_object.refresh_query_list() 

In [65]:
#querying for player level data. Different sets of queries produce sets of dataframes that are stored into a list.
by = 'player'
tags = ['Goal', 'assist', 'keyPass', 'opportunity', 'counter_attack']; events = ['Pass', 'Shot', 'Free Kick']
spain_object.query_eventsub_tag(tags, events = events, subevents = None, by = by, output = False, combine = True)
tags = ['lost', 'won', 'sliding_tackle']; events = ['Duel']
spain_object.query_eventsub_tag(tags, events = events, subevents = None, by = by, output = False, combine = False)
tags = ['accurate']; subevents = ['Corner']
spain_object.query_eventsub_tag(tags, events = None, subevents = subevents, by = by, output = False, combine = False)
spain_object.query_eventOrSub(['Air duel', 'Cross', 'Free Kick'], by = by, output = False, level = 'subevent')

In [66]:
#create a combined dataframe from the list generated in the previous step
spain_player_data = spain_object.concat_df()  

In [69]:
spain_player_data.head(10)

Unnamed: 0_level_0,total_assist,total_opportunity,total_keyPass,total_counter_attack,total_Goal,Duel_lost,Duel_won,Duel_sliding_tackle,Corner_accurate,total_Air duel,total_Cross,total_Free Kick
player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
A. Griezmann,9,57,16,49,19,215,183,30,10,92,27,6
A. Guardado,6,19,18,22,2,185,164,18,33,54,49,37
A. Halilović,1,28,4,19,2,67,137,2,19,17,50,8
A. Januzaj,5,42,12,19,3,195,271,3,23,22,50,7
A. Szymanowski,2,11,6,17,2,92,73,4,5,14,36,1
Aarón,0,3,16,15,0,206,168,18,7,72,129,12
Aday,0,21,11,6,2,135,226,4,2,68,137,6
Adrián,0,21,4,19,3,255,215,23,2,192,14,15
Ager Aketxe,0,1,1,0,0,5,5,0,0,1,3,1
Aleix García,0,6,5,5,1,53,54,2,22,15,8,16
