In [1]:
import pandas as pd
import networkx as nx
print(nx.__version__)
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

from utils import *
from plot_utils import *
from metrics import *

import warnings
warnings.filterwarnings('ignore')

init_notebook_mode(connected=True)

2.5


# Load Dataset

In [2]:
data_download()

Downloading matches data
Downloading teams data
Downloading players data
Downloading competitions data
Downloading events data
Download completed


In [3]:
#specifica country, base è italy
match_id2match, match_id2events, player_id2player, competition_id2competition, team_id2team = load_public_dataset(data_folder=data_folder, tournament = 'Italy')

In [7]:
match_id = 2576335 # a single match in Italy
match_events = match_id2events[match_id]
match_label = match_id2match[match_id]['label']
            
match_df = pd.DataFrame(match_events)
match_df.head()

Unnamed: 0,eventId,subEventName,tags,playerId,positions,matchId,eventName,teamId,matchPeriod,eventSec,subEventId,id
0,8,Simple pass,[{'id': 1801}],3344,"[{'y': 50, 'x': 49}, {'y': 58, 'x': 38}]",2576335,Pass,3161,1H,2.41759,85,253668302
1,8,Simple pass,[{'id': 1801}],116349,"[{'y': 58, 'x': 38}, {'y': 91, 'x': 37}]",2576335,Pass,3161,1H,3.904412,85,253668303
2,8,Simple pass,[{'id': 1801}],135903,"[{'y': 91, 'x': 37}, {'y': 72, 'x': 34}]",2576335,Pass,3161,1H,6.484211,85,253668304
3,8,Simple pass,[{'id': 1801}],138408,"[{'y': 72, 'x': 34}, {'y': 14, 'x': 36}]",2576335,Pass,3161,1H,10.043835,85,253668306
4,8,Simple pass,[{'id': 1801}],21094,"[{'y': 14, 'x': 36}, {'y': 39, 'x': 30}]",2576335,Pass,3161,1H,14.03207,85,253668308


In [8]:
pass_df = match_df[match_df['eventName'] == 'Pass']

# Graph

In [10]:
def passing_networks(match_id=2576105):
    """
    Construct the passing networks of the teams in the match.
    
    Parameters
    ----------
    match_id : int, optional
        identifier of the match to plot
        
    Returns
    -------
    tuple
        the two constructed networks, as networkx objects.
    """
    
    # take the names of the two teams of the match
    match_label = match_id2match[match_id]['label']
    team1_name = match_label.split('-')[0].split(' ')[0]
    team2_name = match_label.split('-')[1].split(' ')[1].split(',')[0]
    
    # take all the events of the match
    match_events = []
    for event in match_id2events[match_id]:
        if event['eventName'] == 'Pass':
            match_events.append(event)

    match_events_df = pd.DataFrame(match_events)
    first_half_max_duration = np.max(match_events_df[match_events_df['matchPeriod'] == '1H']['eventSec'])

    # sum 1H time end to all the time in 2H
    for event in match_events:
        if event['matchPeriod'] == '2H':
            event['eventSec'] += first_half_max_duration
    
    team2pass2weight = defaultdict(lambda: defaultdict(int))
    send = []
    receive = []
    for event, next_event, next_next_event in zip(match_events, match_events[1:], match_events[2:]):
        try:
            if event['eventName'] == 'Pass' and ACCURATE_PASS in [tag['id'] for tag in event['tags']]:
                sender = player_id2player[event['playerId']]['shortName'].encode('ascii', 'strict').decode('unicode-escape')
                send.append(sender)
                # case of duel
                if next_event['eventName'] == 'Duel':
                    # if the next event of from a playero of the same tema
                    if next_event['teamId'] == event['teamId']:
                        receiver = player_id2player[next_event['playerId']]['shortName'].encode('ascii', 'strict').decode('unicode-escape')
                        team2pass2weight[team_id2team[event['teamId']]['name']][(sender, receiver)] += 1
                        receive.append(receive)
                    else:
                        receiver = player_id2player[next_next_event['playerId']]['shortName'].encode('ascii', 'strict').decode('unicode-escape')
                        team2pass2weight[team_id2team[event['teamId']]['name']][(sender, receiver)] += 1
                        receive.append(receive)
                else:  # any other event 
                    if next_event['teamId'] == event['teamId']:
                        receiver = player_id2player[next_event['playerId']]['shortName'].encode('ascii', 'strict').decode('unicode-escape')
                        team2pass2weight[team_id2team[event['teamId']]['name']][(sender, receiver)] += 1
                        receive.append(receive)
        except KeyError:
            pass
    # craete networkx graphs
    G1, G2 = nx.DiGraph(team=team1_name), nx.DiGraph(team=team2_name)
    for (sender, receiver), weight in team2pass2weight[team1_name].items():
        G1.add_edge(sender, receiver, weight=weight)
    for (sender, receiver), weight in team2pass2weight[team2_name].items():
        G2.add_edge(sender, receiver, weight=weight)    
    
    return G1, G2

In [None]:
def plot_passing_networks(G1, G2):
    """
    Plot the two passing networks in input.
    
    Parameters
    ----------
    G1 : networkx object
        the object representing the first network
        
    G2 : networkx object
        the object representing the second network
    """
    #POSIZIONI
    pos1 = nx.spring_layout(G1, 4)
    pos2 = nx.spring_layout(G2, 4)
    nome2degree = dict(G1.degree)
    nx.draw(G1, pos=pos1, nodelist=list(nome2degree.keys()), 
            node_size=[deg * 50 for deg in nome2degree.values()], 
            node_color='red', edge_color='black',
            with_labels=True, font_weight='bold', alpha=0.75)
    plt.show()
    
    nome2degree = dict(G2.degree)
    nx.draw(G2, pos=pos2, nodelist=list(nome2degree.keys()), 
            node_size=[deg * 50 for deg in nome2degree.values()],
            node_color='blue', edge_color='black',
            with_labels=True, font_weight='bold', alpha=0.75)
    plt.show()

In [None]:
G1, G2 = passing_networks(match_id=2576105)

In [None]:
plot_passing_networks(G1, G2)

In [52]:
team2pass2weight

defaultdict(<function __main__.<lambda>()>,
            {'Internazionale': defaultdict(int,
                         {('Rafinha', 'M. Vecino'): 5,
                          ('M. Vecino', 'João Cancelo'): 8,
                          ('João Cancelo', 'M. Škriniar'): 10,
                          ('M. Škriniar', "D. D'Ambrosio"): 7,
                          ("D. D'Ambrosio", 'João Miranda'): 10,
                          ('João Miranda', 'M. Škriniar'): 11,
                          ("D. D'Ambrosio", 'I. Perišić'): 9,
                          ('I. Perišić', 'M. Brozović'): 3,
                          ('M. Brozović', 'Rafinha'): 12,
                          ('Rafinha', "D. D'Ambrosio"): 7,
                          ("D. D'Ambrosio", 'M. Škriniar'): 2,
                          ('M. Škriniar', 'M. Vecino'): 5,
                          ('M. Vecino', 'M. Škriniar'): 5,
                          ('M. Škriniar', 'João Cancelo'): 13,
                          ('João Cancelo', 'A. Candreva'

In [17]:
    # take the names of the two teams of the match
    match_label = match_id2match[match_id]['label']
    team1_name = match_label.split('-')[0].split(' ')[0]
    team2_name = match_label.split('-')[1].split(' ')[1].split(',')[0]
    
    # take all the events of the match
    match_events = []
    for event in match_id2events[match_id]:
        if event['eventName'] == 'Pass':
            match_events.append(event)

    match_events_df = pd.DataFrame(match_events)
    first_half_max_duration = np.max(match_events_df[match_events_df['matchPeriod'] == '1H']['eventSec'])

    # sum 1H time end to all the time in 2H
    for event in match_events:
        if event['matchPeriod'] == '2H':
            event['eventSec'] += first_half_max_duration
    
    team2pass2weight = defaultdict(lambda: defaultdict(int))
    couple =[]
    for event, next_event, next_next_event in zip(match_events, match_events[1:], match_events[2:]):
        try:
            if event['eventName'] == 'Pass' and ACCURATE_PASS in [tag['id'] for tag in event['tags']]:
                sender = player_id2player[event['playerId']]['shortName'].encode('ascii', 'strict').decode('unicode-escape')
                send.append(sender)
                # case of duel
                if next_event['eventName'] == 'Duel':
                    # if the next event of from a playero of the same tema
                    if next_event['teamId'] == event['teamId']:
                        receiver = player_id2player[next_event['playerId']]['shortName'].encode('ascii', 'strict').decode('unicode-escape')
                        team2pass2weight[team_id2team[event['teamId']]['name']][(sender, receiver)] += 1
                    else:
                        receiver = player_id2player[next_next_event['playerId']]['shortName'].encode('ascii', 'strict').decode('unicode-escape')
                        team2pass2weight[team_id2team[event['teamId']]['name']][(sender, receiver)] += 1
                else:  # any other event 
                    if next_event['teamId'] == event['teamId']:
                        receiver = player_id2player[next_event['playerId']]['shortName'].encode('ascii', 'strict').decode('unicode-escape')
                        team2pass2weight[team_id2team[event['teamId']]['name']][(sender, receiver)] += 1
                couple.append((sender,receiver))
        except KeyError:
            pass

In [18]:
couple

[('Rafinha', 'M. Vecino'),
 ('M. Vecino', 'João Cancelo'),
 ('João Cancelo', 'M. Škriniar'),
 ('M. Škriniar', "D. D'Ambrosio"),
 ("D. D'Ambrosio", 'João Miranda'),
 ('João Miranda', 'M. Škriniar'),
 ('M. Škriniar', "D. D'Ambrosio"),
 ("D. D'Ambrosio", 'I. Perišić'),
 ('I. Perišić', 'M. Brozović'),
 ('M. Brozović', 'Rafinha'),
 ('Rafinha', "D. D'Ambrosio"),
 ("D. D'Ambrosio", 'M. Škriniar'),
 ('M. Škriniar', 'M. Vecino'),
 ('M. Vecino', 'M. Škriniar'),
 ('M. Škriniar', 'João Cancelo'),
 ('João Cancelo', 'A. Candreva'),
 ('A. Candreva', 'A. Candreva'),
 ('S. de Vrij', 'Ş. Radu'),
 ('Ş. Radu', 'S. Lulić'),
 ('Luiz Felipe', 'C. Immobile'),
 ('S. Handanovič', 'M. Brozović'),
 ('M. Brozović', 'M. Škriniar'),
 ('M. Škriniar', 'A. Candreva'),
 ('A. Candreva', 'M. Vecino'),
 ('M. Vecino', 'M. Škriniar'),
 ('M. Škriniar', 'M. Vecino'),
 ('M. Vecino', 'M. Brozović'),
 ('M. Brozović', 'M. Vecino'),
 ('M. Vecino', 'M. Brozović'),
 ('M. Brozović', 'João Miranda'),
 ('João Miranda', 'M. Vecino'),
 ('

In [28]:
count = {}
for i in couple:
    if i in count:
        count[i] += 1
    else:
        count[i] = 1

{('Rafinha', 'M. Vecino'): 5, ('M. Vecino', 'João Cancelo'): 8, ('João Cancelo', 'M. Škriniar'): 10, ('M. Škriniar', "D. D'Ambrosio"): 7, ("D. D'Ambrosio", 'João Miranda'): 10, ('João Miranda', 'M. Škriniar'): 11, ("D. D'Ambrosio", 'I. Perišić'): 9, ('I. Perišić', 'M. Brozović'): 3, ('M. Brozović', 'Rafinha'): 12, ('Rafinha', "D. D'Ambrosio"): 7, ("D. D'Ambrosio", 'M. Škriniar'): 2, ('M. Škriniar', 'M. Vecino'): 5, ('M. Vecino', 'M. Škriniar'): 5, ('M. Škriniar', 'João Cancelo'): 13, ('João Cancelo', 'A. Candreva'): 6, ('A. Candreva', 'A. Candreva'): 3, ('S. de Vrij', 'Ş. Radu'): 3, ('Ş. Radu', 'S. Lulić'): 5, ('Luiz Felipe', 'C. Immobile'): 2, ('S. Handanovič', 'M. Brozović'): 2, ('M. Brozović', 'M. Škriniar'): 3, ('M. Škriniar', 'A. Candreva'): 1, ('A. Candreva', 'M. Vecino'): 2, ('M. Vecino', 'M. Brozović'): 9, ('M. Brozović', 'M. Vecino'): 11, ('M. Brozović', 'João Miranda'): 5, ('João Miranda', 'M. Vecino'): 10, ('Rafinha', 'M. Brozović'): 12, ('M. Brozović', "D. D'Ambrosio"): 9, 

In [43]:
df = pd.DataFrame(count, index=['count'])

In [44]:
df

Unnamed: 0_level_0,Rafinha,M. Vecino,João Cancelo,M. Škriniar,D. D'Ambrosio,João Miranda,D. D'Ambrosio,I. Perišić,M. Brozović,Rafinha,...,Felipe Anderson,Eder,Luiz Felipe,Nani,A. Marušić,Nani,J. Lukaku,I. Perišić,Bastos,Nani
Unnamed: 0_level_1,M. Vecino,João Cancelo,M. Škriniar,D. D'Ambrosio,João Miranda,M. Škriniar,I. Perišić,M. Brozović,Rafinha,D. D'Ambrosio,...,J. Lukaku,M. Icardi,Nani,A. Marušić,Nani,J. Lukaku,A. Marušić,A. Marušić,Nani,Luiz Felipe
count,5,8,10,7,10,11,9,3,12,7,...,1,1,1,1,1,1,1,1,1,1


In [45]:
df = df.transpose()

In [46]:
df['pair'] = df.index

In [48]:
df.reset_index(drop=True, inplace=True)

In [51]:
df

Unnamed: 0,count,pair
0,5,"(Rafinha, M. Vecino)"
1,8,"(M. Vecino, João Cancelo)"
2,10,"(João Cancelo, M. Škriniar)"
3,7,"(M. Škriniar, D. D'Ambrosio)"
4,10,"(D. D'Ambrosio, João Miranda)"
...,...,...
218,1,"(Nani, J. Lukaku)"
219,1,"(J. Lukaku, A. Marušić)"
220,1,"(I. Perišić, A. Marušić)"
221,1,"(Bastos, Nani)"
