In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import ast
import keras

In [2]:
csv_path = 'archive/CenterBacks.csv'
DC = pd.read_csv(csv_path)

DC['Attribute Vector'] = DC['Attribute Vector'].apply(ast.literal_eval)

new_attribute_names = [
    'Non-Penalty Goals', 'Non-Penalty xG', 'Shots Total', 'Assists',
    'xAG', 'npxG + xAG', 'Shot-Creating Actions', 'Passes Attempted',
    'Pass Completion %', 'Progressive Passes', 'Progressive Carries',
    'Successful Take-Ons', 'Touches (Att Pen)', 'Progressive Passes Rec',
    'Tackles', 'Interceptions', 'Blocks', 'Clearances', 'Aerials won'
]

new_DC = pd.DataFrame(columns=['Name'])

for _, row in DC.iterrows():
    new_row = {'Name': row['Name']}
    for attribute_name, value in zip(new_attribute_names, row['Attribute Vector']):
        new_row[attribute_name] = value
    new_DC = pd.concat([new_DC, pd.DataFrame([new_row])])

new_DC

Unnamed: 0,Name,Non-Penalty Goals,Non-Penalty xG,Shots Total,Assists,xAG,npxG + xAG,Shot-Creating Actions,Passes Attempted,Pass Completion %,Progressive Passes,Progressive Carries,Successful Take-Ons,Touches (Att Pen),Progressive Passes Rec,Tackles,Interceptions,Blocks,Clearances,Aerials won
0,Yunis Abdelhamid,0.03,0.06,0.86,0.05,0.03,0.09,1.16,54.89,82.7,5.81,1.08,0.57,1.05,0.27,2.22,1.73,1.76,3.14,2.51
0,Francesco Acerbi,0.00,0.04,0.70,0.05,0.03,0.07,1.18,59.34,88.3,3.47,1.26,0.22,0.89,0.89,1.16,1.40,0.89,3.52,2.77
0,Tosin Adarabioyo,0.04,0.03,0.69,0.00,0.02,0.05,0.78,56.12,82.0,4.10,0.69,0.22,0.82,0.13,1.30,1.12,0.86,6.04,2.55
0,Emmanuel Agbadou,0.00,0.06,0.83,0.11,0.09,0.15,1.17,56.27,85.8,4.94,0.57,0.30,0.79,0.30,2.68,1.21,1.13,4.03,2.26
0,Nayef Aguerd,0.11,0.12,0.68,0.00,0.03,0.15,0.79,43.05,78.5,1.41,0.11,0.23,1.30,0.23,1.52,1.02,1.47,4.46,1.81
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,Illia Zabarnyi,0.00,0.00,0.00,0.00,0.00,0.00,0.62,66.20,86.3,3.97,0.74,0.50,0.74,0.00,1.98,0.87,1.74,3.22,0.99
0,Dan-Axel Zagadou,0.00,0.12,0.92,0.00,0.04,0.16,0.99,69.75,85.4,3.12,0.21,0.07,1.42,0.00,0.99,1.49,1.35,5.74,3.75
0,Tanguy Zoukrou,0.00,0.00,0.00,0.00,0.02,0.02,0.53,38.92,86.4,1.94,0.00,0.35,0.18,0.53,2.47,0.35,3.17,4.23,1.23
0,Kurt Zouma,0.09,0.10,0.86,0.00,0.00,0.10,0.41,37.60,84.4,1.40,0.14,0.09,1.31,0.05,0.41,1.36,0.77,5.43,2.49


In [3]:
attribute_names = ['Tackles', 'Interceptions', 'Blocks', 'Clearances', 'Aerials won', 'Pass Completion %']
new_DC = new_DC.drop_duplicates(subset=['Name'])
new_DC['Total Stats'] = new_DC[attribute_names].sum(axis=1)

new_DC = new_DC[['Name'] + attribute_names + ['Total Stats']]
new_DC

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_DC['Total Stats'] = new_DC[attribute_names].sum(axis=1)


Unnamed: 0,Name,Tackles,Interceptions,Blocks,Clearances,Aerials won,Pass Completion %,Total Stats
0,Yunis Abdelhamid,2.22,1.73,1.76,3.14,2.51,82.7,94.06
0,Francesco Acerbi,1.16,1.40,0.89,3.52,2.77,88.3,98.04
0,Tosin Adarabioyo,1.30,1.12,0.86,6.04,2.55,82.0,93.87
0,Emmanuel Agbadou,2.68,1.21,1.13,4.03,2.26,85.8,97.11
0,Nayef Aguerd,1.52,1.02,1.47,4.46,1.81,78.5,88.78
...,...,...,...,...,...,...,...,...
0,Illia Zabarnyi,1.98,0.87,1.74,3.22,0.99,86.3,95.10
0,Dan-Axel Zagadou,0.99,1.49,1.35,5.74,3.75,85.4,98.72
0,Tanguy Zoukrou,2.47,0.35,3.17,4.23,1.23,86.4,97.85
0,Kurt Zouma,0.41,1.36,0.77,5.43,2.49,84.4,94.86


In [5]:
features = new_DC[attribute_names].values
target = new_DC['Total Stats'].values

class DCDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return {'features': self.features[idx], 'target': self.targets[idx]}


class DCModel(nn.Module):
    def __init__(self, input_size):
        super(DCModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(128, 64)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(64, 1)
        self.regularization = nn.Linear(input_size, 1) 

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        x = self.relu3(x)
        x = self.fc4(x)
    
        reg_loss = torch.norm(self.regularization.weight, 2)
        return x + reg_loss

In [7]:
X_train, X_val, y_train, y_val = train_test_split(features, target, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

train_dataset = DCDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

model = DCModel(input_size=len(attribute_names))
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.005)
val_rmse_values = []
accuracy_values = []
mae_values = []

num_epochs = 2000
for epoch in range(num_epochs):
    for batch in train_loader:
        inputs, targets = batch['features'], batch['target']
        outputs = model(inputs)
        loss = criterion(outputs, targets.view(-1, 1))
        reg_loss = model.regularization.weight.norm(2)
        loss += 0.001 * reg_loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    with torch.no_grad():
        val_predictions = model(X_val_tensor)
        val_loss = criterion(val_predictions, y_val_tensor.view(-1, 1))
        rmse = torch.sqrt(val_loss)
        val_rmse_values.append(rmse.item())
        mae = torch.mean(torch.abs(val_predictions - y_val_tensor.view(-1, 1)))
        mae_values.append(mae.item())
        target_range = y_val_tensor.max() - y_val_tensor.min()
        accuracy = (1 - rmse / target_range) * 100
        accuracy_values.append(accuracy.item()) 
        print(f'Epoch {epoch+1}/{num_epochs}, RMSE: {rmse.item()}, MAE: {mae.item()}, Accuracy: {accuracy.item()}%')


Epoch 1/2000, RMSE: 88.16705322265625, MAE: 88.02268981933594, Accuracy: -353.53436279296875%
Epoch 2/2000, RMSE: 59.37904357910156, MAE: 57.51424789428711, Accuracy: -205.44784545898438%
Epoch 3/2000, RMSE: 51.6259651184082, MAE: 35.440284729003906, Accuracy: -165.56573486328125%
Epoch 4/2000, RMSE: 31.29303741455078, MAE: 22.955455780029297, Accuracy: -60.972450256347656%
Epoch 5/2000, RMSE: 38.80641555786133, MAE: 35.12495803833008, Accuracy: -99.62153625488281%
Epoch 6/2000, RMSE: 33.541954040527344, MAE: 30.01593017578125, Accuracy: -72.54096221923828%
Epoch 7/2000, RMSE: 26.951923370361328, MAE: 19.069570541381836, Accuracy: -38.64162063598633%
Epoch 8/2000, RMSE: 27.838272094726562, MAE: 19.629623413085938, Accuracy: -43.20103073120117%
Epoch 9/2000, RMSE: 24.546232223510742, MAE: 20.748519897460938, Accuracy: -26.266658782958984%
Epoch 10/2000, RMSE: 26.512081146240234, MAE: 22.994461059570312, Accuracy: -36.379051208496094%
Epoch 11/2000, RMSE: 20.691905975341797, MAE: 15.9075