### 1. Import all the required libraries and read data:

In [1]:
# Data visualization
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd 
# Keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import SGD, Adam, Adadelta, RMSprop
import keras.backend as K
# Train-Test
from sklearn.model_selection import train_test_split
# Scaling data
from sklearn.preprocessing import StandardScaler
# Classification Report
from sklearn.metrics import classification_report
from keras.utils.np_utils import to_categorical
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" 

In [2]:
df = pd.read_csv('kl.csv',encoding='ISO-8859-1')

### 2. Remove all null values from position:

In [3]:
# Remove Missing Values 
na = pd.notnull(df["Position"])
df = df[na]

### 3. Get the required features for our classification problem:

In [4]:
df = df[["Position", 'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
       'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
       'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
       'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression',
       'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
       'Marking', 'StandingTackle', 'SlidingTackle', 'GKDiving', 'GKHandling',
       'GKKicking', 'GKPositioning', 'GKReflexes']]

### 4. Categorizing positions:

In [5]:
forward_player = ["ST", "LW", "RW", "LF", "RF", "RS","LS", "CF"]
midfielder_player = ["CM","RCM","LCM", "CDM","RDM","LDM", "CAM", "LAM", "RAM", "RM", "LM"]
defender_player = ["CB", "RCB", "LCB", "LWB", "RWB", "LB", "RB"]
df.loc[df["Position"] == "GK", "Position"] = 0

df.loc[df["Position"].isin(defender_player), "Position"] = 1

df.loc[df["Position"].isin(midfielder_player), "Position"] = 2

df.loc[df["Position"].isin(forward_player), "Position"] = 3

### 5. Standard Scaling of features:

In [6]:
x = df.drop("Position", axis = 1)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x = pd.DataFrame(sc.fit_transform(x))
y = df["Position"]

### 6. Converting prediction to categorical:

In [7]:
y_cat = to_categorical(y)

In [8]:
y_cat 

array([[0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       ...,
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.]], dtype=float32)

### 7. Test Train Split:

In [9]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x.values, y_cat, test_size=0.2)

In [10]:
x_train.shape

(14517, 33)

In [11]:
y_train.shape

(14517, 4)

### 8. Build the model:

In [None]:
model = Sequential()
model.add(Dense(60, input_shape = (33,), activation = "relu"))
model.add(Dense(15, activation = "relu"))
model.add(Dropout(0.2))
model.add(Dense(4, activation = "softmax"))
model.compile(Adam(lr = 0.01), "categorical_crossentropy", metrics = ["accuracy"])
model.summary()

### 9. Fit the model and run for 10 epochs:

In [13]:
model.fit(x_train, y_train, verbose=1, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1b892600040>

### 10. Confusion Matrix:

In [None]:
y_pred_class = model.predict_classes(x_test)
from sklearn.metrics import confusion_matrix
y_pred = model.predict(x_test)
y_test_class = np.argmax(y_test, axis=1)
confusion_matrix(y_test_class, y_pred_class)

In [15]:
y_pred

array([[0.0000000e+00, 6.4129857e-17, 7.6268923e-05, 9.9992371e-01],
       [1.4602358e-12, 1.2538767e-02, 9.8740476e-01, 5.6391800e-05],
       [7.4885314e-18, 9.3622899e-01, 6.3326858e-02, 4.4420600e-04],
       ...,
       [3.6254666e-10, 9.2757724e-02, 9.0721786e-01, 2.4420928e-05],
       [2.1254230e-26, 5.9106458e-09, 7.8228086e-02, 9.2177188e-01],
       [3.8759559e-22, 9.7102666e-01, 2.8917419e-02, 5.5919689e-05]],
      dtype=float32)

In [16]:
y_test_class

array([3, 2, 1, ..., 2, 3, 1], dtype=int64)

### 11. Classification Report:

In [17]:
from sklearn.metrics import classification_report
print(classification_report(y_test_class, y_pred_class))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       415
           1       0.93      0.90      0.91      1228
           2       0.82      0.87      0.84      1337
           3       0.84      0.78      0.81       650

    accuracy                           0.88      3630
   macro avg       0.90      0.89      0.89      3630
weighted avg       0.88      0.88      0.88      3630

