In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-


# Deep Neural Networks

## Session 27a: Lecture
## Introduction to Pytorch
- one hidden layer 
- ${ReLU}$ activation function
- multi-class output
- FIFA dataset
- with torch framework

<img src='../../images/prasami_color_tutorials_small.png' width='400' alt="By Pramod Sharma : pramod.sharma@prasami.com" align="left"/>

In [2]:
###-----------------
### Import Libraries
###-----------------

import os
#import numpy as np
import pandas as pd

from collections.abc import Callable
from typing import Literal


from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, mean_squared_error
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt


import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset

In [44]:
###----------------------
### Some basic parameters
###----------------------

inpDir = '../input'
outDir = '../output'

RANDOM_STATE = 177013
torch.manual_seed(RANDOM_STATE) # Set Random Seed for reproducible  results

EPOCHS = 50 # number of epochs
ALPHA = 0.001 # learning rate
TEST_SIZE = 0.2

## All about CUDA

In [45]:
print ('Is CUDA available: ', torch.cuda.is_available())

print ('CUDA version: ', torch.version.cuda )

# print ('Current Device ID: ', torch.cuda.current_device())

# print ('Name of the CUDA device: ', torch.cuda.get_device_name(torch.cuda.current_device()))

Is CUDA available:  False
CUDA version:  11.7


In [46]:
# Get cpu or gpu device for training.

device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Using {device} device")

Using cpu device


## Read FIFA 2019 data set
<img src = '../../images/fifa_2019.png' alt = 'Fifa 2019'>

In [47]:
data_df = pd.read_csv(os.path.join(inpDir, 'fifa_2019.csv'))
data_df.shape

(18207, 89)

In [48]:
# removing rows with position == null
data_df = data_df[data_df["Position"].notnull()]
data_df.head()

Unnamed: 0.1,Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,...,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Release Clause
0,0,158023,L. Messi,31,https://cdn.sofifa.org/players/4/19/158023.png,Argentina,https://cdn.sofifa.org/flags/52.png,94,94,FC Barcelona,...,96.0,33.0,28.0,26.0,6.0,11.0,15.0,14.0,8.0,€226.5M
1,1,20801,Cristiano Ronaldo,33,https://cdn.sofifa.org/players/4/19/20801.png,Portugal,https://cdn.sofifa.org/flags/38.png,94,94,Juventus,...,95.0,28.0,31.0,23.0,7.0,11.0,15.0,14.0,11.0,€127.1M
2,2,190871,Neymar Jr,26,https://cdn.sofifa.org/players/4/19/190871.png,Brazil,https://cdn.sofifa.org/flags/54.png,92,93,Paris Saint-Germain,...,94.0,27.0,24.0,33.0,9.0,9.0,15.0,15.0,11.0,€228.1M
3,3,193080,De Gea,27,https://cdn.sofifa.org/players/4/19/193080.png,Spain,https://cdn.sofifa.org/flags/45.png,91,93,Manchester United,...,68.0,15.0,21.0,13.0,90.0,85.0,87.0,88.0,94.0,€138.6M
4,4,192985,K. De Bruyne,27,https://cdn.sofifa.org/players/4/19/192985.png,Belgium,https://cdn.sofifa.org/flags/7.png,91,92,Manchester City,...,88.0,68.0,58.0,51.0,15.0,13.0,5.0,10.0,13.0,€196.4M


In [49]:
# Following columns appear to be relevant for our analysis
rel_cols = ["Position", 'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
            'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
            'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
            'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression',
            'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
            'Marking', 'StandingTackle', 'SlidingTackle', 'GKDiving', 'GKHandling',
            'GKKicking', 'GKPositioning', 'GKReflexes']

In [50]:
goalkeeper = 'GK'
forward = ['ST', 'LW', 'RW', 'LF', 'RF', 'RS','LS', 'CF']
midfielder = ['CM','RCM','LCM', 'CDM','RDM','LDM', 'CAM', 'LAM', 'RAM', 'RM', 'LM']
defender = ['CB', 'RCB', 'LCB', 'LWB', 'RWB', 'LB', 'RB']

In [51]:
#Assign labels to goalkeepers
data_df.loc[data_df["Position"] == "GK", "Position"] = 0

#Defenders
data_df.loc[data_df["Position"].isin(defender), "Position"] = 1

#Midfielders
data_df.loc[data_df["Position"].isin(midfielder), "Position"] = 2

#Forward
data_df.loc[data_df["Position"].isin(forward), "Position"] = 3

# Helps in preventing pandas from complaining while get_dummies
data_df['Position'] = pd.to_numeric(data_df['Position'], downcast="integer")

In [52]:
# Keeping relevent columns.
data_df = data_df[rel_cols]
data_df.head()

Unnamed: 0,Position,Finishing,HeadingAccuracy,ShortPassing,Volleys,Dribbling,Curve,FKAccuracy,LongPassing,BallControl,...,Penalties,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes
0,3,95.0,70.0,90.0,86.0,97.0,93.0,94.0,87.0,96.0,...,75.0,96.0,33.0,28.0,26.0,6.0,11.0,15.0,14.0,8.0
1,3,94.0,89.0,81.0,87.0,88.0,81.0,76.0,77.0,94.0,...,85.0,95.0,28.0,31.0,23.0,7.0,11.0,15.0,14.0,11.0
2,3,87.0,62.0,84.0,84.0,96.0,88.0,87.0,78.0,95.0,...,81.0,94.0,27.0,24.0,33.0,9.0,9.0,15.0,15.0,11.0
3,0,13.0,21.0,50.0,13.0,18.0,21.0,19.0,51.0,42.0,...,40.0,68.0,15.0,21.0,13.0,90.0,85.0,87.0,88.0,94.0
4,2,82.0,55.0,92.0,82.0,86.0,85.0,83.0,91.0,91.0,...,79.0,88.0,68.0,58.0,51.0,15.0,13.0,5.0,10.0,13.0


In [53]:
feature = data_df.drop('Position', axis = 1).to_numpy()
label = data_df['Position'].to_numpy()

In [54]:
X_train, X_test, y_train, y_test = train_test_split(feature, label,
                                                    stratify=label,
                                                    test_size=TEST_SIZE, 
                                                    random_state=RANDOM_STATE )

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((14517, 33), (3630, 33), (14517,), (3630,))

In [55]:
y_train

array([1, 2, 1, ..., 2, 1, 2], dtype=int8)

In [56]:
features = torch.tensor(X_train, dtype=torch.float32, device=device)
labels = torch.tensor(y_train, dtype=torch.int64, device=device)
features

tensor([[34., 40., 36.,  ...,  9.,  9., 11.],
        [48., 51., 64.,  ..., 12., 13.,  9.],
        [43., 75., 56.,  ..., 13., 12., 13.],
        ...,
        [49., 69., 68.,  ...,  9., 11.,  7.],
        [55., 66., 71.,  ..., 14.,  8., 15.],
        [57., 41., 57.,  ..., 14., 11.,  7.]])

## Prepare Network

In [57]:
# nn.Sequential network with nn.Linear layers


net = nn.Sequential(nn.Linear(X_train.shape[1],16),nn.ReLU(),nn.Linear(16,4),nn.Softmax(dim=1))
net=net.to(device)


In [58]:
net

Sequential(
  (0): Linear(in_features=33, out_features=16, bias=True)
  (1): ReLU()
  (2): Linear(in_features=16, out_features=4, bias=True)
  (3): Softmax(dim=1)
)

In [59]:
# Test its working
net(features).shape

torch.Size([14517, 4])

## Train Your model
- Define the loss function and optimizer
- Train the model for multiple epochs

- Loop for epochs
    - Zero your gradients for every batch!
    - Make predictions for this batch
    - Compute the loss and its gradients
    - Adjust learning weights
    - print (train_loss.item())

In [60]:
#define the loss fn and optimiser
loss_fn=nn.CrossEntropyLoss() # cross entopy loss
optimiser=torch.optim.Adam(net.parameters(),lr=ALPHA) #optimiser


for e in range(EPOCHS):
    #Zero your Gradients for Every batch!
    optimiser.zero_grad()
    
    #Make prediction for this batch
    outputs=net(features)
    
    #compute the loss and its gradients
    train_loss=loss_fn(outputs,labels)
    train_loss.backward()
    
    #adjust learning rates
    optimiser.step()
    
    print(train_loss.item())

1.3857232332229614
1.3260260820388794
1.2794106006622314
1.2473117113113403
1.2275009155273438
1.2156352996826172
1.2080714702606201
1.2025781869888306
1.198068380355835
1.1941041946411133
1.1906239986419678
1.1876922845840454
1.1853578090667725
1.183508276939392
1.181989312171936
1.1804871559143066
1.1788431406021118
1.1769939661026
1.1749660968780518
1.1729214191436768
1.1709972620010376
1.1692066192626953
1.1675934791564941
1.1661609411239624
1.1648521423339844
1.1636381149291992
1.1625059843063354
1.1614291667938232
1.1603933572769165
1.1594164371490479
1.158503532409668
1.1576290130615234
1.1567745208740234
1.1559574604034424
1.1551291942596436
1.1542446613311768
1.153249979019165
1.1520627737045288
1.1507081985473633
1.1492282152175903
1.1479904651641846
1.1476032733917236
1.1476573944091797
1.1470789909362793
1.146156668663025
1.1452322006225586
1.144642949104309
1.14443039894104
1.144228219985962
1.143842101097107


## Test Your Model
- Switch off grads
- set model to eval mode
- make predictions
- calculate accuracy

In [61]:
test_features=torch.tensor(X_test,device=device,dtype=torch.float32)
test_labels=torch.tensor(y_test,device=device,dtype=torch.int64)

In [62]:
output=net(test_features)
y_pred=output.argmax(dim=1).cpu().numpy()

In [63]:
accuracy_score(y_test,y_pred)

0.5969696969696969

In [64]:
test_features

tensor([[47., 42., 64.,  ..., 15., 12.,  6.],
        [12., 12., 36.,  ..., 67., 69., 68.],
        [37., 66., 69.,  ..., 14., 11., 13.],
        ...,
        [33., 80., 65.,  ...,  5.,  5.,  5.],
        [83., 68., 78.,  ..., 13.,  8., 16.],
        [55., 35., 74.,  ...,  9.,  7., 16.]])