In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import ast
import keras

In [2]:
csv_path = 'archive/GoalKeepers.csv'
GK = pd.read_csv(csv_path)

GK['Attribute Vector'] = GK['Attribute Vector'].apply(ast.literal_eval)

attribute_names = [
    'PSxG-GA', 'Goals Against', 'Save Percentage', 'PSxG/SoT',
    'Save% (Penalty Kicks)', 'Clean Sheet Percentage', 'Touches',
    'Launch %', 'Goal Kicks', 'Avg. Length of Goal Kicks',
    'Crosses Stopped %', 'Def. Actions Outside Pen. Area'
]

new_GK = pd.DataFrame(columns=['Name'])

for _, row in GK.iterrows():
    new_row = {'Name': row['Name']}
    for attribute_name, value in zip(attribute_names, row['Attribute Vector']):
        new_row[attribute_name] = value
    new_GK = pd.concat([new_GK, pd.DataFrame([new_row])])

new_GK = new_GK.drop_duplicates(subset=['Name'])

new_GK

Unnamed: 0,Name,PSxG-GA,Goals Against,Save Percentage,PSxG/SoT,Save% (Penalty Kicks),Clean Sheet Percentage,Touches,Launch %,Goal Kicks,Avg. Length of Goal Kicks,Crosses Stopped %,Def. Actions Outside Pen. Area
0,Julen Agirrezabala,-0.07,0.80,68.4,0.30,28.6,33.73,29.60,8.27,55.40,11.5,1.33,15.40
0,Doğan Alemdar,-0.47,1.85,54.5,0.29,50.0,20.00,33.64,42.00,7.57,54.6,6.50,0.84
0,Alisson,0.24,1.22,70.3,0.34,40.0,37.80,40.44,18.30,5.11,31.4,6.60,2.20
0,Alphonse Areola,0.02,0.62,81.1,0.20,0.0,46.20,29.69,42.00,6.69,48.6,3.20,0.54
0,Kepa Arrizabalaga,0.18,1.09,74.2,0.29,0.0,32.40,37.52,18.90,6.08,34.1,5.00,1.65
...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,Sven Ulreich,0.22,0.67,77.8,0.24,100.0,66.70,43.00,20.30,6.50,19.4,3.90,1.17
0,Guglielmo Vicario,0.04,1.26,73.9,0.27,20.0,23.30,44.14,33.20,5.50,42.9,5.60,0.71
0,Iván Villar,-0.09,1.26,66.1,0.31,0.0,21.10,33.16,34.70,8.26,44.8,3.10,0.74
0,Danny Ward,-0.21,1.77,63.0,0.29,33.3,23.10,38.46,29.70,7.31,35.1,5.60,1.62


In [3]:
new_GK['Total Stats'] = new_GK[attribute_names].sum(axis=1)
new_GK['Total Stats'] = new_GK['Total Stats'] - new_GK['Goals Against']  - new_GK['Avg. Length of Goal Kicks']

new_GK

Unnamed: 0,Name,PSxG-GA,Goals Against,Save Percentage,PSxG/SoT,Save% (Penalty Kicks),Clean Sheet Percentage,Touches,Launch %,Goal Kicks,Avg. Length of Goal Kicks,Crosses Stopped %,Def. Actions Outside Pen. Area,Total Stats
0,Julen Agirrezabala,-0.07,0.80,68.4,0.30,28.6,33.73,29.60,8.27,55.40,11.5,1.33,15.40,240.96
0,Doğan Alemdar,-0.47,1.85,54.5,0.29,50.0,20.00,33.64,42.00,7.57,54.6,6.50,0.84,214.87
0,Alisson,0.24,1.22,70.3,0.34,40.0,37.80,40.44,18.30,5.11,31.4,6.60,2.20,221.33
0,Alphonse Areola,0.02,0.62,81.1,0.20,0.0,46.20,29.69,42.00,6.69,48.6,3.20,0.54,209.64
0,Kepa Arrizabalaga,0.18,1.09,74.2,0.29,0.0,32.40,37.52,18.90,6.08,34.1,5.00,1.65,176.22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,Sven Ulreich,0.22,0.67,77.8,0.24,100.0,66.70,43.00,20.30,6.50,19.4,3.90,1.17,319.83
0,Guglielmo Vicario,0.04,1.26,73.9,0.27,20.0,23.30,44.14,33.20,5.50,42.9,5.60,0.71,206.66
0,Iván Villar,-0.09,1.26,66.1,0.31,0.0,21.10,33.16,34.70,8.26,44.8,3.10,0.74,167.38
0,Danny Ward,-0.21,1.77,63.0,0.29,33.3,23.10,38.46,29.70,7.31,35.1,5.60,1.62,202.17


In [4]:
features = new_GK[attribute_names].values
target = new_GK['Total Stats'].values

class GoalkeeperDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return {'features': self.features[idx], 'target': self.targets[idx]}
    
class GoalkeeperModel(nn.Module):
    def __init__(self, input_size):
        super(GoalkeeperModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(128, 64)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(64, 1)
        self.regularization = nn.Linear(input_size, 1) 

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        x = self.relu3(x)
        x = self.fc4(x)
    
        reg_loss = torch.norm(self.regularization.weight, 2)
        return x + reg_loss

In [5]:
X_train, X_val, y_train, y_val = train_test_split(features, target, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

train_dataset = GoalkeeperDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

model = GoalkeeperModel(input_size=len(attribute_names))
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.005)

num_epochs = 5000
train_rmse_values = []
val_rmse_values = []
accuracy_values = []
mae_values = []

for epoch in range(num_epochs):
    for batch in train_loader:
        inputs, targets = batch['features'], batch['target']
        outputs = model(inputs)
        loss = criterion(outputs, targets.view(-1, 1))
        reg_loss = model.regularization.weight.norm(2)
        loss += 0.001 * reg_loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    with torch.no_grad():
        val_predictions = model(X_val_tensor)
        val_loss = criterion(val_predictions, y_val_tensor.view(-1, 1))
        rmse = torch.sqrt(val_loss)
        val_rmse_values.append(rmse.item())
        mae = torch.mean(torch.abs(val_predictions - y_val_tensor.view(-1, 1)))  
        mae_values.append(mae.item())
        target_range = y_val_tensor.max() - y_val_tensor.min()
        accuracy = (1 - rmse / target_range) * 100
        accuracy_values.append(accuracy.item()) 
        print(f'Epoch {epoch+1}/{num_epochs}, RMSE: {rmse.item()}, MAE: {mae.item()}, Accuracy: {accuracy.item()}%')

Epoch 1/5000, RMSE: 197.90054321289062, MAE: 194.77752685546875, Accuracy: -26.981426239013672%
Epoch 2/5000, RMSE: 193.326171875, MAE: 190.26817321777344, Accuracy: -24.046314239501953%
Epoch 3/5000, RMSE: 183.7449493408203, MAE: 180.59214782714844, Accuracy: -17.898595809936523%
Epoch 4/5000, RMSE: 166.7928924560547, MAE: 163.3193359375, Accuracy: -7.021427154541016%
Epoch 5/5000, RMSE: 144.16404724121094, MAE: 137.8804473876953, Accuracy: 7.498198509216309%
Epoch 6/5000, RMSE: 104.77166748046875, MAE: 93.79459381103516, Accuracy: 32.77403259277344%
Epoch 7/5000, RMSE: 91.4249038696289, MAE: 80.48682403564453, Accuracy: 41.33788299560547%
Epoch 8/5000, RMSE: 93.09579467773438, MAE: 69.35436248779297, Accuracy: 40.265769958496094%
Epoch 9/5000, RMSE: 158.57376098632812, MAE: 112.98905181884766, Accuracy: -1.7476916313171387%
Epoch 10/5000, RMSE: 146.5928192138672, MAE: 107.08789825439453, Accuracy: 5.939793586730957%
Epoch 11/5000, RMSE: 119.58355712890625, MAE: 79.35202026367188, Acc