In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import ast
import keras

In [2]:
csv_path = 'archive/Forwards.csv'
FW = pd.read_csv(csv_path)

FW['Attribute Vector'] = FW['Attribute Vector'].apply(ast.literal_eval)

new_attribute_names = [
    'Non-Penalty Goals', 'Non-Penalty xG', 'Shots Total', 'Assists',
    'xAG', 'npxG + xAG', 'Shot-Creating Actions', 'Passes Attempted',
    'Pass Completion %', 'Progressive Passes', 'Progressive Carries',
    'Successful Take-Ons', 'Touches (Att Pen)', 'Progressive Passes Rec',
    'Tackles', 'Interceptions', 'Blocks', 'Clearances', 'Aerials won'
]

new_FW = pd.DataFrame(columns=['Name'])

for _, row in FW.iterrows():
    new_row = {'Name': row['Name']}
    for attribute_name, value in zip(new_attribute_names, row['Attribute Vector']):
        new_row[attribute_name] = value
    new_FW = pd.concat([new_FW, pd.DataFrame([new_row])])

new_FW

Unnamed: 0,Name,Non-Penalty Goals,Non-Penalty xG,Shots Total,Assists,xAG,npxG + xAG,Shot-Creating Actions,Passes Attempted,Pass Completion %,Progressive Passes,Progressive Carries,Successful Take-Ons,Touches (Att Pen),Progressive Passes Rec,Tackles,Interceptions,Blocks,Clearances,Aerials won
0,Matthis Abline,0.26,0.28,3.07,0.09,0.08,0.37,2.13,12.21,65.7,1.11,1.96,1.20,5.81,5.72,0.51,0.34,0.77,0.34,1.20
0,Matthis Abline,0.26,0.28,3.07,0.09,0.08,0.37,2.13,12.21,65.7,1.11,1.96,1.20,5.81,5.72,0.51,0.34,0.77,0.34,1.20
0,Tammy Abraham,0.27,0.43,2.38,0.21,0.15,0.58,2.77,20.96,64.5,2.03,1.44,0.79,4.86,5.68,0.85,0.09,0.56,0.53,3.56
0,Che Adams,0.23,0.30,2.12,0.14,0.13,0.42,2.58,22.50,66.3,1.54,0.77,0.72,4.65,5.20,0.77,0.18,0.59,0.99,2.76
0,Sargis Adamyan,0.14,0.48,2.93,0.28,0.11,0.57,3.07,25.02,68.7,3.35,1.68,0.84,6.71,8.11,1.96,0.56,1.68,0.70,1.68
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,Chris Wood,0.22,0.22,1.21,0.00,0.06,0.28,1.21,19.47,61.6,1.21,0.33,0.11,2.64,3.85,0.55,0.22,0.44,0.99,4.62
0,Chris Wood,0.22,0.22,1.21,0.00,0.06,0.28,1.21,19.47,61.6,1.21,0.33,0.11,2.64,3.85,0.55,0.22,0.44,0.99,4.62
0,Duván Zapata,0.13,0.25,2.65,0.13,0.07,0.33,2.98,26.68,68.2,2.59,2.27,1.30,6.02,9.06,0.52,0.26,0.39,1.10,2.72
0,Joshua Zirkzee,0.22,0.34,3.18,0.22,0.15,0.49,3.94,28.14,77.0,1.86,1.09,1.75,3.61,8.10,0.44,0.44,0.44,0.44,1.31


In [3]:
attribute_names = ['Non-Penalty Goals', 'Non-Penalty xG', 'Shots Total', 'Assists', 'xAG', 'npxG + xAG',
                   'Shot-Creating Actions']
new_FW = new_FW.drop_duplicates(subset=['Name'])
new_FW['Total Stats'] = new_FW[attribute_names].sum(axis=1)

new_FW = new_FW[['Name'] + attribute_names + ['Total Stats']]
new_FW

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_FW['Total Stats'] = new_FW[attribute_names].sum(axis=1)


Unnamed: 0,Name,Non-Penalty Goals,Non-Penalty xG,Shots Total,Assists,xAG,npxG + xAG,Shot-Creating Actions,Total Stats
0,Matthis Abline,0.26,0.28,3.07,0.09,0.08,0.37,2.13,6.28
0,Tammy Abraham,0.27,0.43,2.38,0.21,0.15,0.58,2.77,6.79
0,Che Adams,0.23,0.30,2.12,0.14,0.13,0.42,2.58,5.92
0,Sargis Adamyan,0.14,0.48,2.93,0.28,0.11,0.57,3.07,7.58
0,Felix Afena-Gyan,0.00,0.21,2.31,0.00,0.01,0.22,1.65,4.40
...,...,...,...,...,...,...,...,...,...
0,Jonas Wind,0.42,0.31,2.02,0.00,0.09,0.40,2.58,5.82
0,Chris Wood,0.22,0.22,1.21,0.00,0.06,0.28,1.21,3.20
0,Duván Zapata,0.13,0.25,2.65,0.13,0.07,0.33,2.98,6.54
0,Joshua Zirkzee,0.22,0.34,3.18,0.22,0.15,0.49,3.94,8.54


In [4]:
features = new_FW[attribute_names].values
target = new_FW['Total Stats'].values

class FWDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return {'features': self.features[idx], 'target': self.targets[idx]}


class FWModel(nn.Module):
    def __init__(self, input_size):
        super(FWModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(128, 64)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(64, 1)
        self.regularization = nn.Linear(input_size, 1) 

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        x = self.relu3(x)
        x = self.fc4(x)
    
        reg_loss = torch.norm(self.regularization.weight, 2)
        return x + reg_loss

In [5]:
X_train, X_val, y_train, y_val = train_test_split(features, target, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

train_dataset = FWDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

modelFW = FWModel(input_size=len(attribute_names))
criterion = nn.MSELoss()
optimizer = optim.Adam(modelFW.parameters(), lr=0.005)
mae_values = []
num_epochs = 2000
for epoch in range(num_epochs):
    for batch in train_loader:
        inputs, targets = batch['features'], batch['target']
        outputs = modelFW(inputs)
        loss = criterion(outputs, targets.view(-1, 1))
        reg_loss = modelFW.regularization.weight.norm(2)
        loss += 0.001 * reg_loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    with torch.no_grad():
        val_predictions = modelFW(X_val_tensor)
        val_loss = criterion(val_predictions, y_val_tensor.view(-1, 1))
        rmse = torch.sqrt(val_loss)
        mae = torch.mean(torch.abs(val_predictions - y_val_tensor.view(-1, 1)))
        mae_values.append(mae.item())
        target_range = y_val_tensor.max() - y_val_tensor.min()
        accuracy = (1 - rmse / target_range) * 100
        print(f'Epoch {epoch+1}/{num_epochs}, RMSE: {rmse.item()}, MAE: {mae.item()}, Accuracy: {accuracy.item()}%')

Epoch 1/2000, RMSE: 2.4712586402893066, MAE: 1.9191550016403198, Accuracy: 70.22579956054688%
Epoch 2/2000, RMSE: 1.8312872648239136, MAE: 1.5909395217895508, Accuracy: 77.93629455566406%
Epoch 3/2000, RMSE: 2.6768555641174316, MAE: 2.434343099594116, Accuracy: 67.74872589111328%
Epoch 4/2000, RMSE: 1.6285611391067505, MAE: 1.4625355005264282, Accuracy: 80.3787841796875%
Epoch 5/2000, RMSE: 1.5365715026855469, MAE: 1.2298389673233032, Accuracy: 81.48709106445312%
Epoch 6/2000, RMSE: 2.281106472015381, MAE: 1.951714038848877, Accuracy: 72.51678466796875%
Epoch 7/2000, RMSE: 1.824061393737793, MAE: 1.5449167490005493, Accuracy: 78.02335357666016%
Epoch 8/2000, RMSE: 1.1427463293075562, MAE: 0.9415776133537292, Accuracy: 86.23197174072266%
Epoch 9/2000, RMSE: 1.3549853563308716, MAE: 1.0474284887313843, Accuracy: 83.67487335205078%
Epoch 10/2000, RMSE: 1.4753988981246948, MAE: 1.2307597398757935, Accuracy: 82.22410583496094%
Epoch 11/2000, RMSE: 1.241829752922058, MAE: 1.0294134616851807,