In [1]:
import sys
sys.path.append('../../src/')

import pandas as pd
import numpy as np
import networkx as nx
import scipy.sparse as sp
from stellargraph import StellarDiGraph

from extract_network import extract_network

2021-12-15 11:48:31.226169: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-12-15 11:48:35.450506: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 12966 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:18:00.0, compute capability: 7.5
2021-12-15 11:48:35.452112: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13524 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:19:00.0, compute capability: 7.5
2021-12-15 11:48:35.453548: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 1352

In [2]:
data = pd.read_csv('./England_2009_2010.csv', header=None)
folder_name = './'

In [3]:
data

Unnamed: 0,0,1,2,3
0,Aston Villa,Wigan,0,2
1,Blackburn,Man City,0,2
2,Bolton,Sunderland,0,1
3,Chelsea,Hull,2,1
4,Everton,Arsenal,1,6
...,...,...,...,...
375,Everton,Portsmouth,1,0
376,Hull,Liverpool,0,0
377,Man United,Stoke,4,0
378,West Ham,Man City,1,1


In [4]:
data2 = data.copy()

In [5]:
data2.columns = [1,0,3,2]

In [6]:
data2.index = np.arange(len(data), 2*len(data))

In [7]:
data2

Unnamed: 0,1,0,3,2
380,Aston Villa,Wigan,0,2
381,Blackburn,Man City,0,2
382,Bolton,Sunderland,0,1
383,Chelsea,Hull,2,1
384,Everton,Arsenal,1,6
...,...,...,...,...
755,Everton,Portsmouth,1,0
756,Hull,Liverpool,0,0
757,Man United,Stoke,4,0
758,West Ham,Man City,1,1


In [87]:
df = pd.concat([data,data2])

In [88]:
df['wins'] = df[2] - df[3]

In [89]:
df

Unnamed: 0,0,1,2,3,wins
0,Aston Villa,Wigan,0,2,-2
1,Blackburn,Man City,0,2,-2
2,Bolton,Sunderland,0,1,-1
3,Chelsea,Hull,2,1,1
4,Everton,Arsenal,1,6,-5
...,...,...,...,...,...
755,Portsmouth,Everton,0,1,-1
756,Liverpool,Hull,0,0,0
757,Stoke,Man United,0,4,-4
758,Man City,West Ham,1,1,0


In [90]:
df['wins'].min()

-8

In [91]:
df['wins'].max()

8

In [92]:
df = df.drop(df[df['wins'] <= 0].index)

In [93]:
df['wins'].min()

1

In [94]:
df = df[[0,1,'wins']]

In [95]:
df

Unnamed: 0,0,1,wins
3,Chelsea,Hull,1
6,Stoke,Burnley,2
8,Man United,Birmingham,1
9,Tottenham,Liverpool,1
12,Birmingham,Portsmouth,1
...,...,...,...
744,Chelsea,Liverpool,2
745,Man United,Sunderland,1
748,Stoke,Fulham,1
749,Tottenham,Man City,1


In [96]:
df.columns = ['source', 'target', 'weight']

In [97]:
graph = StellarDiGraph(edges=df)

In [98]:
print(graph.info())

StellarDiGraph: Directed multigraph
 Nodes: 20, Edges: 284

 Node types:
  default: [20]
    Features: none
    Edge types: default-default->default

 Edge types:
    default-default->default: [284]
        Weights: range=[1, 8], mean=1.96831, std=1.262
        Features: none


In [99]:
adj = graph.to_adjacency_matrix(weighted=True)

In [100]:
adj

<20x20 sparse matrix of type '<class 'numpy.int64'>'
	with 215 stored elements in Compressed Sparse Row format>

In [101]:
sp.save_npz(folder_name+'adj', adj)

In [102]:
adj-adj.transpose()

<20x20 sparse matrix of type '<class 'numpy.int64'>'
	with 328 stored elements in Compressed Sparse Row format>

In [103]:
adj.max()

11

In [104]:
adj.min()

0

In [105]:
df

Unnamed: 0,source,target,weight
3,Chelsea,Hull,1
6,Stoke,Burnley,2
8,Man United,Birmingham,1
9,Tottenham,Liverpool,1
12,Birmingham,Portsmouth,1
...,...,...,...
744,Chelsea,Liverpool,2
745,Man United,Sunderland,1
748,Stoke,Fulham,1
749,Tottenham,Man City,1


In [106]:
node_list = list(graph.nodes())

In [107]:
np.save(folder_name+'node_list', node_list)

In [2]:
def football_prepare(dataset):
    data = pd.read_csv('./'+dataset+'.csv', header=None)
    data2 = data.copy()
    data2.columns = [1,0,3,2]
    data2.index = np.arange(len(data), 2*len(data))
    df = pd.concat([data,data2])
    df['wins'] = df[2] - df[3]
    assert df['wins'].max() + df['wins'].min() == 0
    df = df.drop(df[df['wins'] <= 0].index)
    assert df['wins'].min() > 0
    df = df[[0,1,'wins']]
    df.columns = ['source', 'target', 'weight']
    graph = StellarDiGraph(edges=df)
    print(graph.info())
    adj = graph.to_adjacency_matrix(weighted=True)
    assert np.abs(adj-adj.transpose()).sum() != 0
    node_list = list(graph.nodes())
    print(adj.shape[0])
    adj, node_list = extract_network(adj, np.array(node_list), lowest_degree=1)
    print(adj.shape[0])
    sp.save_npz(dataset+'adj', adj)
    np.save(dataset+'node_list', node_list)
    print(dataset+' done!')

In [3]:
for year in range(2009, 2014):
    dataset = 'England_' + str(year) + '_' + str(year+1)
    football_prepare(dataset)

20
20
England_2009_2010 done!
20
20
England_2010_2011 done!
20
20
England_2011_2012 done!
20
20
England_2012_2013 done!
20
20
England_2013_2014 done!


In [4]:
dataset = 'England_2014_2015'
data = pd.read_csv('./'+dataset+'.csv')
data

Unnamed: 0,Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,...,BbMx>2.5,BbAv>2.5,BbMx<2.5,BbAv<2.5,BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA
0,E0,16/08/14,Arsenal,Crystal Palace,2,1,H,1,1,D,...,1.77,1.72,2.26,2.10,24,-1.50,1.81,1.78,2.20,2.10
1,E0,16/08/14,Leicester,Everton,2,2,D,1,2,A,...,2.10,2.00,1.90,1.80,22,0.25,1.88,1.85,2.10,2.02
2,E0,16/08/14,Man United,Swansea,1,2,A,0,1,A,...,1.77,1.71,2.30,2.13,25,-1.50,2.18,2.08,1.87,1.79
3,E0,16/08/14,QPR,Hull,0,1,A,0,0,D,...,2.52,2.36,1.65,1.58,24,0.00,1.80,1.73,2.25,2.14
4,E0,16/08/14,Stoke,Aston Villa,0,1,A,0,0,D,...,2.42,2.31,1.67,1.60,23,-0.50,1.95,1.91,2.02,1.96
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,E0,06/12/14,Stoke,Arsenal,3,2,H,3,0,H,...,2.10,2.00,1.87,1.80,27,0.50,1.93,1.90,2.02,1.97
146,E0,06/12/14,Tottenham,Crystal Palace,0,0,D,0,0,D,...,1.89,1.77,2.12,2.04,31,-1.00,2.12,2.05,1.86,1.82
147,E0,07/12/14,Aston Villa,Leicester,2,1,H,1,1,D,...,2.15,2.06,1.83,1.75,27,-0.25,2.08,2.02,1.90,1.85
148,E0,07/12/14,West Ham,Swansea,3,1,H,1,1,D,...,2.13,2.02,1.85,1.78,27,-0.25,2.08,2.01,1.90,1.86


In [5]:
data = data[['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG']]

In [6]:
data

Unnamed: 0,HomeTeam,AwayTeam,FTHG,FTAG
0,Arsenal,Crystal Palace,2,1
1,Leicester,Everton,2,2
2,Man United,Swansea,1,2
3,QPR,Hull,0,1
4,Stoke,Aston Villa,0,1
...,...,...,...,...
145,Stoke,Arsenal,3,2
146,Tottenham,Crystal Palace,0,0
147,Aston Villa,Leicester,2,1
148,West Ham,Swansea,3,1


In [7]:
data.columns = [0,1,2,3]

In [8]:
data

Unnamed: 0,0,1,2,3
0,Arsenal,Crystal Palace,2,1
1,Leicester,Everton,2,2
2,Man United,Swansea,1,2
3,QPR,Hull,0,1
4,Stoke,Aston Villa,0,1
...,...,...,...,...
145,Stoke,Arsenal,3,2
146,Tottenham,Crystal Palace,0,0
147,Aston Villa,Leicester,2,1
148,West Ham,Swansea,3,1


In [9]:
data2 = data.copy()
data2.columns = [1,0,3,2]
data2.index = np.arange(len(data), 2*len(data))
df = pd.concat([data,data2])
df['wins'] = df[2] - df[3]
assert df['wins'].max() + df['wins'].min() == 0
df = df.drop(df[df['wins'] <= 0].index)
assert df['wins'].min() > 0
df = df[[0,1,'wins']]
df.columns = ['source', 'target', 'weight']
graph = StellarDiGraph(edges=df)
print(graph.info())
adj = graph.to_adjacency_matrix(weighted=True)
assert np.abs(adj-adj.transpose()).sum() != 0
node_list = list(graph.nodes())
print(adj.shape[0])
adj, node_list = extract_network(adj, np.array(node_list), lowest_degree=1)
print(adj.shape[0])
sp.save_npz(dataset+'adj', adj)
np.save(dataset+'node_list', node_list)
print(dataset+' done!')

20
20
England_2014_2015 done!
