In [186]:
import torch
if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    x = torch.ones(1, device=mps_device)
    print (x)
else:
    print ("MPS device not found.")

tensor([1.], device='mps:0')


In [187]:
import pandas as pd
import torch
import dgl
# Load data
player_df = pd.read_csv("data/fifa21/tbl_player.csv")
skill_df = pd.read_csv("data/fifa21/tbl_player_skill.csv")
team_df = pd.read_csv("data/fifa21/tbl_team.csv")

# Extract subsets
player_df = player_df[["int_player_id", "str_player_name", "str_positions", "int_overall_rating", "int_team_id"]]
skill_df = skill_df[["int_player_id", "int_long_passing", "int_ball_control", "int_dribbling"]]
team_df = team_df[["int_team_id", "str_team_name", "int_overall"]]

# Merge data
player_df = player_df.merge(skill_df, on='int_player_id')
fifa_df = player_df.merge(team_df, on='int_team_id')

# Sort dataframe
fifa_df = fifa_df.sort_values(by="int_overall_rating", ascending=False)
print("Players: ", fifa_df.shape[0])
fifa_df.head()

Players:  18767


Unnamed: 0,int_player_id,str_player_name,str_positions,int_overall_rating,int_team_id,int_long_passing,int_ball_control,int_dribbling,str_team_name,int_overall
0,1,Lionel Andrés Messi Cuccittini,"RW, ST, CF",93,5.0,91,96,96,FC Barcelona,84
33,2,Cristiano Ronaldo dos Santos Aveiro,"ST, LW",92,6.0,77,92,88,Juventus,83
57,3,Jan Oblak,GK,91,8.0,40,30,12,Atlético Madrid,83
121,5,Neymar da Silva Santos Júnior,"LW, CAM",91,7.0,81,95,95,Paris Saint-Germain,83
89,4,Kevin De Bruyne,"CAM, CM",91,2.0,93,92,88,Manchester City,85


In [188]:
# Make sure that we have no duplicate nodes
max(fifa_df["int_player_id"].value_counts())

1

In [189]:
# Sort to define the order of nodes
sorted_df = fifa_df.sort_values(by="int_player_id")
# Select node features
node_features = sorted_df[["str_positions", "int_long_passing", "int_ball_control", "int_dribbling"]]
# Convert non-numeric columns
pd.set_option('mode.chained_assignment', None)
positions = node_features["str_positions"].str.split(",", expand=True)
node_features["first_position"] = positions[0]
# One-hot encoding
node_features = pd.concat([node_features, pd.get_dummies(node_features["first_position"])], axis=1, join='inner')
node_features.drop(["str_positions", "first_position"], axis=1, inplace=True)
node_features.head() 

Unnamed: 0,int_long_passing,int_ball_control,int_dribbling,CAM,CB,CDM,CF,CM,GK,LB,LM,LW,LWB,RB,RM,RW,RWB,ST
0,91,96,96,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
33,77,92,88,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
57,40,30,12,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
89,93,92,88,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
121,81,95,95,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0


In [190]:
# Convert to numpy
x = node_features.to_numpy()
x # [num_nodes x num_features]

array([[91, 96, 96, ...,  1,  0,  0],
       [77, 92, 88, ...,  0,  0,  1],
       [40, 30, 12, ...,  0,  0,  0],
       ...,
       [29, 42, 27, ...,  0,  0,  0],
       [50, 48, 46, ...,  0,  0,  0],
       [47, 43, 45, ...,  0,  0,  0]])

In [191]:
# Sort to define the order of nodes
sorted_df = fifa_df.sort_values(by="int_player_id")
# Select node features
labels = sorted_df[["int_overall"]]
labels.head()

Unnamed: 0,int_overall
0,84
33,83
57,83
89,85
121,83


In [192]:
# Convert to numpy
y = labels.to_numpy()
y.shape # [num_nodes, 1] --> node regression
y

array([[84],
       [83],
       [83],
       ...,
       [67],
       [62],
       [67]])

In [193]:
# Remap player IDs
fifa_df["int_player_id"] = fifa_df.reset_index().index

In [194]:
# This tells us how many players per team we have to connect
fifa_df["str_team_name"].value_counts()

Everton                   36
Valencia CF               34
FC Nantes                 34
Villarreal CF             34
Real Valladolid CF        34
                          ..
Wellington Phoenix        19
Central Coast Mariners    19
Melbourne Victory         19
Brisbane Roar             19
Adelaide United           19
Name: str_team_name, Length: 681, dtype: int64

In [195]:
import itertools
import numpy as np

teams = fifa_df["str_team_name"].unique()
all_edges = np.array([], dtype=np.int32).reshape((0, 2))
for team in teams:
    team_df = fifa_df[fifa_df["str_team_name"] == team]
    players = team_df["int_player_id"].values
    # Build all combinations, as all players are connected
    permutations = list(itertools.combinations(players, 2))
    edges_source = [e[0] for e in permutations]
    edges_target = [e[1] for e in permutations]
    team_edges = np.column_stack([edges_source, edges_target])
    all_edges = np.vstack([all_edges, team_edges])
# Convert to Pytorch Geometric format
edge_index = all_edges.transpose()
edge_index # [2, num_edges]
test = torch.from_numpy(edge_index)
edges_src = test[0]
edges_dst = test[1]


In [196]:
## Creating DGLGRAPH

from models import GCN


graph = dgl.graph((edges_src, edges_dst), num_nodes = fifa_df.shape[0])
#isolated_nodes = ((graph.in_degrees() == 0) & (graph.out_degrees() == 0)).nonzero().squeeze(1)
#graph = dgl.remove_nodes(graph,isolated_nodes)
graph.ndata['feat'] = torch.from_numpy(x).float()
graph.ndata['label'] = torch.from_numpy(sorted_df[["int_overall"]].to_numpy()).float()
n_nodes = fifa_df.shape[0]

#TRAINING 0.6, VALIDATION 0.2, TESTING 0.2
n_train = int(n_nodes * 0.6)
n_val = int(n_nodes * 0.2)
n_test = int(n_nodes * 0.2)
train_mask = torch.zeros(n_nodes, dtype=torch.bool)
val_mask = torch.zeros(n_nodes, dtype=torch.bool)
test_mask = torch.zeros(n_nodes, dtype=torch.bool)
train_mask[:n_train] = True
val_mask[n_train : n_train + n_val] = True
test_mask[n_train + n_val :] = True
graph.ndata["train_mask"] = train_mask
graph.ndata["val_mask"] = val_mask
graph.ndata["test_mask"] = test_mask

graph = dgl.add_self_loop(graph)

#features for first layer
in_features = graph.ndata['feat'].shape[1]

#output only one because is a regression task
out_features = 1 

hidden_layout_size = 32

model = GCN(graph.ndata['feat'].shape[1], hidden_layout_size, out_features)
print(graph.ndata['feat'], graph.ndata['feat'].dtype)


tensor([[91., 96., 96.,  ...,  1.,  0.,  0.],
        [77., 92., 88.,  ...,  0.,  0.,  1.],
        [40., 30., 12.,  ...,  0.,  0.,  0.],
        ...,
        [29., 42., 27.,  ...,  0.,  0.,  0.],
        [50., 48., 46.,  ...,  0.,  0.,  0.],
        [47., 43., 45.,  ...,  0.,  0.,  0.]]) torch.float32


In [197]:
##training
import torch.nn.functional as F
def train(g, model):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    best_val_acc = 0
    best_test_acc = 0
    features = g.ndata['feat']
    labels = g.ndata['label']
    for e in range(100):
        # Forward
        logits = model(g, features)

        # Compute prediction
        pred = logits.argmax(1)

        # Compute loss
        # Note that you should only compute the losses of the nodes in the training set.
        loss = F.mse_loss(logits[train_mask], labels[train_mask])

        # Compute accuracy on training/validation/test
        train_acc = (pred[train_mask] == labels[train_mask]).float().mean()
        val_acc = (pred[val_mask] == labels[val_mask]).float().mean()
        test_acc = (pred[test_mask] == labels[test_mask]).float().mean()

        # Save the best validation accuracy and the corresponding test accuracy.
        if best_val_acc < val_acc:
            best_val_acc = val_acc
            best_test_acc = test_acc

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if e % 10 == 0:
            print('In epoch {}, loss: {:.3f}, val acc: {:.3f} (best {:.3f}), test acc: {:.3f} (best {:.3f})'.format(
                e, loss, val_acc, best_val_acc, test_acc, best_test_acc))
train(graph,model)

In epoch 0, loss: 4442.891, val acc: 0.000 (best 0.000), test acc: 0.000 (best 0.000)


  assert input.numel() == input.storage().size(), (


In epoch 10, loss: 2017.152, val acc: 0.000 (best 0.000), test acc: 0.000 (best 0.000)
In epoch 20, loss: 2102.713, val acc: 0.000 (best 0.000), test acc: 0.000 (best 0.000)
In epoch 30, loss: 1962.792, val acc: 0.000 (best 0.000), test acc: 0.000 (best 0.000)
In epoch 40, loss: 1948.778, val acc: 0.000 (best 0.000), test acc: 0.000 (best 0.000)
In epoch 50, loss: 1938.501, val acc: 0.000 (best 0.000), test acc: 0.000 (best 0.000)
In epoch 60, loss: 1923.617, val acc: 0.000 (best 0.000), test acc: 0.000 (best 0.000)
In epoch 70, loss: 1914.748, val acc: 0.000 (best 0.000), test acc: 0.000 (best 0.000)
In epoch 80, loss: 1905.447, val acc: 0.000 (best 0.000), test acc: 0.000 (best 0.000)
In epoch 90, loss: 1895.524, val acc: 0.000 (best 0.000), test acc: 0.000 (best 0.000)


In [198]:

## Creating DGLGRAPH
""" graph = dgl.graph((edges_src, edges_dst), num_nodes = fifa_df.shape[0])
#int_long_passing	int_ball_control	int_dribbling	CAM	CB	CDM	CF	CM	GK	LB	LM	LW	LWB	RB	RM	RW	RWB	ST
graph.ndata['int_long_passing'] = torch.from_numpy(node_features['int_long_passing'].to_numpy())
graph.ndata['int_ball_control'] = torch.from_numpy(node_features['int_ball_control'].to_numpy())
graph.ndata['int_dribbling'] = torch.from_numpy(node_features['int_dribbling'].to_numpy())
graph.ndata['CAM'] = torch.from_numpy(node_features['CAM'].to_numpy())
graph.ndata['CB'] = torch.from_numpy(node_features['CDM'].to_numpy())
graph.ndata['CF'] = torch.from_numpy(node_features['CM'].to_numpy())
graph.ndata['GK'] = torch.from_numpy(node_features['GK'].to_numpy())
graph.ndata['LB'] = torch.from_numpy(node_features['LB'].to_numpy())
graph.ndata['LM'] = torch.from_numpy(node_features['LM'].to_numpy())
graph.ndata['LW'] = torch.from_numpy(node_features['LW'].to_numpy())
graph.ndata['LWB'] = torch.from_numpy(node_features['LWB'].to_numpy())
graph.ndata['RB'] = torch.from_numpy(node_features['RB'].to_numpy())
graph.ndata['RM'] = torch.from_numpy(node_features['RM'].to_numpy())
graph.ndata['RW'] = torch.from_numpy(node_features['RW'].to_numpy())
graph.ndata['RWB'] = torch.from_numpy(node_features['RWB'].to_numpy())
graph.ndata['ST'] = torch.from_numpy(node_features['ST'].to_numpy())
graph.ndata['int_overall'] = torch.from_numpy(sorted_df[["int_overall"]].to_numpy())
n_nodes = fifa_df.shape[0]
#TRAINING 0.6, VALIDATION 0.2, TESTING 0.2
n_train = int(n_nodes * 0.6)
n_val = int(n_nodes * 0.2)
n_test = int(n_nodes * 0.2)
train_mask = torch.zeros(n_nodes, dtype=torch.bool)
val_mask = torch.zeros(n_nodes, dtype=torch.bool)
test_mask = torch.zeros(n_nodes, dtype=torch.bool)
train_mask[:n_train] = True
val_mask[n_train : n_train + n_val] = True
test_mask[n_train + n_val :] = True
graph.ndata["train_mask"] = train_mask
graph.ndata["val_mask"] = val_mask
graph.ndata["test_mask"] = test_mask """

' graph = dgl.graph((edges_src, edges_dst), num_nodes = fifa_df.shape[0])\n#int_long_passing\tint_ball_control\tint_dribbling\tCAM\tCB\tCDM\tCF\tCM\tGK\tLB\tLM\tLW\tLWB\tRB\tRM\tRW\tRWB\tST\ngraph.ndata[\'int_long_passing\'] = torch.from_numpy(node_features[\'int_long_passing\'].to_numpy())\ngraph.ndata[\'int_ball_control\'] = torch.from_numpy(node_features[\'int_ball_control\'].to_numpy())\ngraph.ndata[\'int_dribbling\'] = torch.from_numpy(node_features[\'int_dribbling\'].to_numpy())\ngraph.ndata[\'CAM\'] = torch.from_numpy(node_features[\'CAM\'].to_numpy())\ngraph.ndata[\'CB\'] = torch.from_numpy(node_features[\'CDM\'].to_numpy())\ngraph.ndata[\'CF\'] = torch.from_numpy(node_features[\'CM\'].to_numpy())\ngraph.ndata[\'GK\'] = torch.from_numpy(node_features[\'GK\'].to_numpy())\ngraph.ndata[\'LB\'] = torch.from_numpy(node_features[\'LB\'].to_numpy())\ngraph.ndata[\'LM\'] = torch.from_numpy(node_features[\'LM\'].to_numpy())\ngraph.ndata[\'LW\'] = torch.from_numpy(node_features[\'LW\'].t

In [199]:
# import dgl
# import torch
# import torch.nn as nn
# import torch.nn.functional as F

# # define a GCN model for node regression
# class GCN(nn.Module):
#     def __init__(self, in_feats, hidden_size, out_feats):
#         super(GCN, self).__init__()
#         self.conv1 = dgl.nn.GraphConv(in_feats, hidden_size)
#         self.conv2 = dgl.nn.GraphConv(hidden_size, out_feats)

#     def forward(self, g, h):
#         h = F.relu(self.conv1(g, h))
#         h = self.conv2(g, h)
#         return h

# def train(g, model):
#     optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
#     best_val_acc = 0
#     best_test_acc = 0

#     features = g.ndata["feat"]
#     labels = g.ndata["label"]
#     train_mask = g.ndata["train_mask"]
#     val_mask = g.ndata["val_mask"]
#     test_mask = g.ndata["test_mask"]
#     for e in range(100):
#         # Forward
#         logits = model(g, features)

#         # Compute prediction
#         pred = logits.argmax(1)

#         # Compute loss
#         # Note that you should only compute the losses of the nodes in the training set.
#         loss = F.mse_loss(logits[train_mask], labels[train_mask])

#         # Compute accuracy on training/validation/test
#         train_acc = (pred[train_mask] == labels[train_mask]).float().mean()
#         val_acc = (pred[val_mask] == labels[val_mask]).float().mean()
#         test_acc = (pred[test_mask] == labels[test_mask]).float().mean()

#         # Save the best validation accuracy and the corresponding test accuracy.
#         if best_val_acc < val_acc:
#             best_val_acc = val_acc
#             best_test_acc = test_acc

#         # Backward
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()

#         if e % 5 == 0:
#             print(
#                 "In epoch {}, loss: {:.3f}, val acc: {:.3f} (best {:.3f}), test acc: {:.3f} (best {:.3f})".format(
#                     e, loss, val_acc, best_val_acc, test_acc, best_test_acc
#                 )
#             )


# model = GCN(g.ndata["feat"].shape[1], 16, dataset.num_classes)
# train(g, model) 
