In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import ast
import keras

In [2]:
csv_path = 'archive/Midfielders.csv'
MD = pd.read_csv(csv_path)

MD['Attribute Vector'] = MD['Attribute Vector'].apply(ast.literal_eval)

new_attribute_names = [
    'Non-Penalty Goals', 'Non-Penalty xG', 'Shots Total', 'Assists',
    'xAG', 'npxG + xAG', 'Shot-Creating Actions', 'Passes Attempted',
    'Pass Completion %', 'Progressive Passes', 'Progressive Carries',
    'Successful Take-Ons', 'Touches (Att Pen)', 'Progressive Passes Rec',
    'Tackles', 'Interceptions', 'Blocks', 'Clearances', 'Aerials won'
]

new_MD = pd.DataFrame(columns=['Name'])

for _, row in MD.iterrows():
    new_row = {'Name': row['Name']}
    for attribute_name, value in zip(new_attribute_names, row['Attribute Vector']):
        new_row[attribute_name] = value
    new_MD = pd.concat([new_MD, pd.DataFrame([new_row])])

new_MD

Unnamed: 0,Name,Non-Penalty Goals,Non-Penalty xG,Shots Total,Assists,xAG,npxG + xAG,Shot-Creating Actions,Passes Attempted,Pass Completion %,Progressive Passes,Progressive Carries,Successful Take-Ons,Touches (Att Pen),Progressive Passes Rec,Tackles,Interceptions,Blocks,Clearances,Aerials won
0,Himad Abdelli,0.08,0.06,1.01,0.08,0.12,0.18,2.91,52.94,83.0,6.36,2.44,1.81,0.97,4.67,2.70,1.18,1.26,0.63,0.76
0,Salis Abdul Samed,0.03,0.03,0.40,0.03,0.05,0.08,1.55,61.61,91.8,5.13,1.03,0.90,0.50,1.37,1.43,1.12,1.18,0.81,0.44
0,Laurent Abergel,0.00,0.02,0.50,0.04,0.05,0.07,1.27,56.99,88.2,5.50,0.50,0.88,0.15,0.54,3.54,1.31,1.81,1.50,0.31
0,Oliver Abildgaard,0.00,0.01,0.41,0.00,0.06,0.08,1.24,31.33,61.0,2.34,0.27,0.14,1.79,1.10,1.92,0.69,1.10,2.47,7.28
0,Tyler Adams,0.00,0.00,0.17,0.00,0.06,0.06,1.80,56.06,82.5,5.68,0.79,0.29,0.21,0.83,3.72,1.46,2.00,1.21,1.04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,Martín Zubimendi,0.03,0.05,0.36,0.08,0.05,0.10,1.48,52.89,85.2,5.04,1.30,0.29,0.55,0.81,1.61,1.27,1.27,1.59,1.72
0,Szymon Żurkowski,0.00,0.14,1.76,0.20,0.00,0.14,1.18,23.53,72.5,2.16,2.35,1.57,1.37,2.75,1.76,0.78,1.76,1.18,1.37
0,Szymon Żurkowski,0.00,0.14,1.76,0.20,0.00,0.14,1.18,23.53,72.5,2.16,2.35,1.57,1.37,2.75,1.76,0.78,1.76,1.18,1.37
0,Martin Ødegaard,0.40,0.27,2.56,0.19,0.26,0.53,4.81,52.02,80.4,7.74,2.46,1.31,3.55,5.53,1.10,0.21,0.56,0.32,0.35


In [3]:
attribute_names = ['Tackles', 'Interceptions', 'npxG + xAG', 'Shot-Creating Actions', 'Passes Attempted',
    'Pass Completion %', 'Progressive Passes', 'Progressive Carries',
    'Successful Take-Ons']
new_MD = new_MD.drop_duplicates(subset=['Name'])
new_MD['Total Stats'] = new_MD[attribute_names].sum(axis=1)

new_MD = new_MD[['Name'] + attribute_names + ['Total Stats']]
new_MD

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_MD['Total Stats'] = new_MD[attribute_names].sum(axis=1)


Unnamed: 0,Name,Tackles,Interceptions,npxG + xAG,Shot-Creating Actions,Passes Attempted,Pass Completion %,Progressive Passes,Progressive Carries,Successful Take-Ons,Total Stats
0,Himad Abdelli,2.70,1.18,0.18,2.91,52.94,83.0,6.36,2.44,1.81,153.52
0,Salis Abdul Samed,1.43,1.12,0.08,1.55,61.61,91.8,5.13,1.03,0.90,164.65
0,Laurent Abergel,3.54,1.31,0.07,1.27,56.99,88.2,5.50,0.50,0.88,158.26
0,Oliver Abildgaard,1.92,0.69,0.08,1.24,31.33,61.0,2.34,0.27,0.14,99.01
0,Tyler Adams,3.72,1.46,0.06,1.80,56.06,82.5,5.68,0.79,0.29,152.36
...,...,...,...,...,...,...,...,...,...,...,...
0,Piotr Zieliński,0.77,0.56,0.46,5.45,59.40,83.5,5.08,2.29,0.93,158.44
0,Martín Zubimendi,1.61,1.27,0.10,1.48,52.89,85.2,5.04,1.30,0.29,149.18
0,Szymon Żurkowski,1.76,0.78,0.14,1.18,23.53,72.5,2.16,2.35,1.57,105.97
0,Martin Ødegaard,1.10,0.21,0.53,4.81,52.02,80.4,7.74,2.46,1.31,150.58


In [4]:
features = new_MD[attribute_names].values
target = new_MD['Total Stats'].values

class MDDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return {'features': self.features[idx], 'target': self.targets[idx]}


class MDModel(nn.Module):
    def __init__(self, input_size):
        super(MDModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(128, 64)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(64, 1)
        self.regularization = nn.Linear(input_size, 1) 

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        x = self.relu3(x)
        x = self.fc4(x)
    
        reg_loss = torch.norm(self.regularization.weight, 2)
        return x + reg_loss

In [5]:
X_train, X_val, y_train, y_val = train_test_split(features, target, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

train_dataset = MDDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

modelMD = MDModel(input_size=len(attribute_names))
criterion = nn.MSELoss()
optimizer = optim.Adam(modelMD.parameters(), lr=0.005)
mae_values = []
num_epochs = 1500
for epoch in range(num_epochs):
    for batch in train_loader:
        inputs, targets = batch['features'], batch['target']
        outputs = modelMD(inputs)
        loss = criterion(outputs, targets.view(-1, 1))
        reg_loss = modelMD.regularization.weight.norm(2)
        loss += 0.001 * reg_loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    with torch.no_grad():
        val_predictions = modelMD(X_val_tensor)
        val_loss = criterion(val_predictions, y_val_tensor.view(-1, 1))
        rmse = torch.sqrt(val_loss)
        mae = torch.mean(torch.abs(val_predictions - y_val_tensor.view(-1, 1)))
        mae_values.append(mae.item())
        target_range = y_val_tensor.max() - y_val_tensor.min()
        accuracy = (1 - rmse / target_range) * 100
        print(f'Epoch {epoch+1}/{num_epochs}, RMSE: {rmse.item()}, MAE: {mae.item()}, Accuracy: {accuracy.item()}%')


Epoch 1/1500, RMSE: 127.777099609375, MAE: 126.38075256347656, Accuracy: -30.905746459960938%
Epoch 2/1500, RMSE: 83.2966537475586, MAE: 79.71858215332031, Accuracy: 14.663809776306152%
Epoch 3/1500, RMSE: 79.05095672607422, MAE: 53.3559684753418, Accuracy: 19.013463973999023%
Epoch 4/1500, RMSE: 50.86490249633789, MAE: 35.040870666503906, Accuracy: 47.88966369628906%
Epoch 5/1500, RMSE: 49.40584182739258, MAE: 40.98419952392578, Accuracy: 49.38444519042969%
Epoch 6/1500, RMSE: 42.389896392822266, MAE: 32.582462310791016, Accuracy: 56.572174072265625%
Epoch 7/1500, RMSE: 55.048248291015625, MAE: 36.579559326171875, Accuracy: 43.603885650634766%
Epoch 8/1500, RMSE: 43.96925354003906, MAE: 31.20399284362793, Accuracy: 54.95414733886719%
Epoch 9/1500, RMSE: 45.481483459472656, MAE: 34.04853439331055, Accuracy: 53.40489196777344%
Epoch 10/1500, RMSE: 40.72443771362305, MAE: 28.15323257446289, Accuracy: 58.278419494628906%
Epoch 11/1500, RMSE: 34.7953987121582, MAE: 28.266469955444336, Accu