In [1]:
import torch
import numpy as np
import pandas as pd

In [2]:
url = 'https://raw.githubusercontent.com/amanthedorkknight/fifa18-all-player-statistics/master/2019/data.csv'
df = pd.read_csv(url, index_col=0)
print('\n',df.columns)
df.head(2)


 Index(['ID', 'Name', 'Age', 'Photo', 'Nationality', 'Flag', 'Overall',
       'Potential', 'Club', 'Club Logo', 'Value', 'Wage', 'Special',
       'Preferred Foot', 'International Reputation', 'Weak Foot',
       'Skill Moves', 'Work Rate', 'Body Type', 'Real Face', 'Position',
       'Jersey Number', 'Joined', 'Loaned From', 'Contract Valid Until',
       'Height', 'Weight', 'LS', 'ST', 'RS', 'LW', 'LF', 'CF', 'RF', 'RW',
       'LAM', 'CAM', 'RAM', 'LM', 'LCM', 'CM', 'RCM', 'RM', 'LWB', 'LDM',
       'CDM', 'RDM', 'RWB', 'LB', 'LCB', 'CB', 'RCB', 'RB', 'Crossing',
       'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
       'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
       'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
       'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression',
       'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
       'Marking', 'StandingTackle', 'SlidingTackle', 'GKD

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,...,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Release Clause
0,158023,L. Messi,31,https://cdn.sofifa.org/players/4/19/158023.png,Argentina,https://cdn.sofifa.org/flags/52.png,94,94,FC Barcelona,https://cdn.sofifa.org/teams/2/light/241.png,...,96.0,33.0,28.0,26.0,6.0,11.0,15.0,14.0,8.0,€226.5M
1,20801,Cristiano Ronaldo,33,https://cdn.sofifa.org/players/4/19/20801.png,Portugal,https://cdn.sofifa.org/flags/38.png,94,94,Juventus,https://cdn.sofifa.org/teams/2/light/45.png,...,95.0,28.0,31.0,23.0,7.0,11.0,15.0,14.0,11.0,€127.1M


In [3]:
subset = df[['Overall', 'Age', 'International Reputation', 'Weak Foot', 'Skill Moves']]
subset = subset.dropna(axis=0, how='any')

In [4]:
columns = subset.columns[1:]
players = torch.tensor(subset.values).float()
print(f'{players.shape}\n\n{players.type()}')

torch.Size([18159, 5])

torch.FloatTensor


In [5]:
data = players[:, 1:]
print(f'{data.shape}\n\n{data}')

torch.Size([18159, 4])

tensor([[31.,  5.,  4.,  4.],
        [33.,  5.,  4.,  5.],
        [26.,  5.,  5.,  5.],
        ...,
        [16.,  1.,  3.,  2.],
        [17.,  1.,  3.,  2.],
        [16.,  1.,  3.,  2.]])


In [6]:
target = players[:, 0]
print(f'{target.shape}\n\n{target}')

torch.Size([18159])

tensor([94., 94., 92.,  ..., 47., 47., 46.])


In [7]:
mean = torch.mean(data, dim=0)
std = torch.std(data, dim=0)

print(f'MEAN \n{mean}\n\nSTD\n{std}')

MEAN 
tensor([25.1225,  1.1132,  2.9473,  2.3613])

STD
tensor([4.6706, 0.3940, 0.6605, 0.7562])


In [8]:
norm = (data - mean) / std**2
norm

tensor([[ 0.2694, 25.0340,  2.4133,  2.8659],
        [ 0.3611, 25.0340,  2.4133,  4.6148],
        [ 0.0402, 25.0340,  4.7058,  4.6148],
        ...,
        [-0.4182, -0.7292,  0.1208, -0.6319],
        [-0.3724, -0.7292,  0.1208, -0.6319],
        [-0.4182, -0.7292,  0.1208, -0.6319]])

In [9]:
good = data[torch.ge(target, 85)]
avg = data[torch.ge(target, 70) & torch.lt(target, 85)]
bad = data[torch.lt(target, 70)]

good_mean = torch.mean(good, dim=0)
avg_mean = torch.mean(avg, dim=0)
bad_mean = torch.mean(bad, dim=0)

print(f'GOOD\n{good_mean}\n\nAVG\n{avg_mean}\n\nBAD\n{bad_mean}')

GOOD
tensor([28.3455,  3.3818,  3.4455,  3.0364])

AVG
tensor([27.1686,  1.3066,  3.0910,  2.7241])

BAD
tensor([24.1909,  1.0079,  2.8795,  2.1952])


In [10]:
for _, args in enumerate( zip(columns, good_mean, avg_mean, bad_mean) ):
    print( '{:30} {:6.2f} {:6.2f}  {:6.2f}'.format(*args) )

Age                             28.35  27.17   24.19
International Reputation         3.38   1.31    1.01
Weak Foot                        3.45   3.09    2.88
Skill Moves                      3.04   2.72    2.20
