In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-


# Deep Neural Networks

## Session 07b
## Working with PyTorch
- one hidden layer 
- ${tanh}$ activation function
- multi-class output
- FIFA dataset 

<img src='../../images/prasami_color_tutorials_small.png' width='400' alt="By Pramod Sharma : pramod.sharma@prasami.com" align="left"/>

In [None]:
import os
#import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt


import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable


In [None]:
# Some basic parameters
inpDir = '../input'
outDir = '../output'

RANDOM_STATE = 24
torch.manual_seed(RANDOM_STATE) # Set Random Seed for reproducible  results

EPOCHS = 20001 # number of epochs
ALPHA = 0.1 # learning rate
NUM_SAMPLES = 1280 # How many samples we want to generate 
NOISE = 0.2 # Noise to be introduced in the data
TEST_SIZE = 0.2

# parameters for Matplotlib
params = {'legend.fontsize': 'x-large',
          'figure.figsize': (15, 10),
          'axes.labelsize': 'x-large',
          'axes.titlesize':'x-large',
          'xtick.labelsize':'x-large',
          'ytick.labelsize':'x-large'
         }

CMAP = plt.cm.Spectral

plt.rcParams.update(params)

## Helper Function to Plot Loss and accuracy

In [None]:
def plot_hist(hist_df):
    
    fig, axes = plt.subplots(1,2 , figsize = (15,6))

    l_range = 10

    ax = axes[0]

    hist_df.plot(x = 'epoch', y = 'loss', ax = ax)
    loss = loss_df['loss'].values

    # little beautification
    txtstr = "Errors: \n  Start : {:7.4f}\n   End : {:7.4f}".format(loss[0],loss[-1]) #text to plot
    # properties  matplotlib.patch.Patch 
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)

    # place a text box in upper left in axes coords

    ax.text(0.6, 0.95, txtstr, transform=ax.transAxes, fontsize=14,
            verticalalignment='top', bbox=props)

    ax.set_xlabel("Epochs")
    ax.set_ylabel("Error")
    ax.set_title('Overall')
    ax.grid();

    ax = axes[1]

    hist_df[-l_range:].plot(x = 'epoch', y = 'loss', ax = ax)

    # little beautification
    txtstr = "Errors: \n  Start : {:7.4f}\n   End : {:7.4f}".format(loss[-l_range],loss[-1]) #text to plot
    # properties  matplotlib.patch.Patch 
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)

    # place a text box in upper left in axes coords

    ax.text(0.6, 0.95, txtstr, transform=ax.transAxes, fontsize=14,
            verticalalignment='top', bbox=props)

    ax.set_xlabel("Epochs")
    ax.set_ylabel("Error")
    ax.set_title('Last {} records'.format(l_range))
    ax.grid();
    plt.tight_layout()

## Read FIFA 2019 data set

In [None]:
data_df = pd.read_csv(os.path.join(inpDir, 'fifa_2019.csv'))
data_df.shape

In [None]:
# removing rows with position = null
data_df = data_df[data_df["Position"].notnull()]
data_df.head()

In [None]:
# Following columns appear to be relevant for our analysis
rel_cols = ["Position", 'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
            'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
            'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
            'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression',
            'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
            'Marking', 'StandingTackle', 'SlidingTackle', 'GKDiving', 'GKHandling',
            'GKKicking', 'GKPositioning', 'GKReflexes']

In [None]:
goalkeeper = 'GK'
forward = ['ST', 'LW', 'RW', 'LF', 'RF', 'RS','LS', 'CF']
midfielder = ['CM','RCM','LCM', 'CDM','RDM','LDM', 'CAM', 'LAM', 'RAM', 'RM', 'LM']
defender = ['CB', 'RCB', 'LCB', 'LWB', 'RWB', 'LB', 'RB']

In [None]:
#Assign labels to goalkeepers
data_df.loc[data_df["Position"] == "GK", "Position"] = 0

#Defenders
data_df.loc[data_df["Position"].isin(defender), "Position"] = 1

#Midfielders
data_df.loc[data_df["Position"].isin(midfielder), "Position"] = 2

#Forward
data_df.loc[data_df["Position"].isin(forward), "Position"] = 3

In [None]:
data_df = data_df[rel_cols]
data_df.head()

In [None]:
y = data_df["Position"].values

X = data_df.drop("Position", axis = 1)

# one hot encoding
y = pd.get_dummies(y).values
X.shape, y.shape

In [None]:
#  Split the data in training and test sets to measure performance of the model.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE )

print (X_train.shape, y_train.shape, X_test.shape, y_test.shape)

In [None]:
sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)


## All about CUDA

In [None]:
print ('Is CUDA available: ', torch.cuda.is_available())

print ('CUDA version: ', torch.version.cuda )

print ('Current Device ID: ', torch.cuda.current_device())

print ('Name of the CUDA device: ', torch.cuda.get_device_name(torch.cuda.current_device()))

In [None]:
# Get cpu or gpu device for training.

device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Using {device} device")

In [None]:

print (net)

In [None]:
# wrap up with Variable in pytorch and transfer them to GPU


In [None]:
print ('prediction accuracy : {:.4f}'.format(accuracy_score(y_test.argmax(axis = 1), predict_y.data.cpu())))

In [None]:
print(classification_report(y_test.argmax(axis = 1), predict_y.data.cpu()))

In [None]:
plot_hist(loss_df)

## Using DataSets

So far we used Pandas to load data and then manually attached it to GPU. We can also use PyTorch's built in functionality to load data.

This time we will be loading images from a subdirectory in the local drive 

In [None]:
from torchvision import datasets, transforms

In [None]:
# Images are all kind of shapes and sizes. We need to standardoze the data
transform = transforms.Compose([transforms.Resize(255),
                                transforms.CenterCrop(224),
                                transforms.ToTensor()
                               ])

dataset = datasets.ImageFolder(os.path.join(inpDir, 'flower_photos'), transform=transform)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True) # 

In [None]:
def imshow(image, ax=None, title=None, normalize=True):
    """Imshow for Tensor."""
    if ax is None:
        fig, ax = plt.subplots()
    image = image.numpy().transpose((1, 2, 0))

    if normalize:
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])
        image = std * image + mean
        image = np.clip(image, 0, 1)

    ax.imshow(image)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.tick_params(axis='both', length=0)
    ax.set_xticklabels('')
    ax.set_yticklabels('')

    return ax

In [None]:
# Run this to test your data loader
images, labels = next(iter(dataloader))
# helper.imshow(images[0], normalize=False)
imshow(images[0], normalize=False);

In [None]:
images[0].shape #  224,224,3

In [None]:
images[0]

## [Recommended Reading](https://pytorch.org/tutorials/beginner/pytorch_with_examples.html)

For more on PyTorch Examples