In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import ast
import keras

In [4]:
csv_path = 'archive/FullBacks.csv'
FB = pd.read_csv(csv_path)

FB['Attribute Vector'] = FB['Attribute Vector'].apply(ast.literal_eval)

new_attribute_names = [
    'Non-Penalty Goals', 'Non-Penalty xG', 'Shots Total', 'Assists',
    'xAG', 'npxG + xAG', 'Shot-Creating Actions', 'Passes Attempted',
    'Pass Completion %', 'Progressive Passes', 'Progressive Carries',
    'Successful Take-Ons', 'Touches (Att Pen)', 'Progressive Passes Rec',
    'Tackles', 'Interceptions', 'Blocks', 'Clearances', 'Aerials won'
]

new_FB = pd.DataFrame(columns=['Name'])

for _, row in FB.iterrows():
    new_row = {'Name': row['Name']}
    for attribute_name, value in zip(new_attribute_names, row['Attribute Vector']):
        new_row[attribute_name] = value
    new_FB = pd.concat([new_FB, pd.DataFrame([new_row])])

new_FB

Unnamed: 0,Name,Non-Penalty Goals,Non-Penalty xG,Shots Total,Assists,xAG,npxG + xAG,Shot-Creating Actions,Passes Attempted,Pass Completion %,Progressive Passes,Progressive Carries,Successful Take-Ons,Touches (Att Pen),Progressive Passes Rec,Tackles,Interceptions,Blocks,Clearances,Aerials won
0,Abner,0.00,0.21,0.75,0.00,0.02,0.24,1.12,48.86,71.7,2.12,1.12,0.75,1.75,3.49,1.62,1.00,1.62,3.74,1.25
0,Marcos Acuña,0.10,0.05,0.92,0.10,0.16,0.20,2.66,57.39,73.1,4.40,2.06,1.23,1.08,4.08,2.85,0.95,0.85,1.65,1.30
0,Ruben Aguilar,0.00,0.02,0.35,0.00,0.08,0.10,1.39,64.93,82.6,7.85,2.50,0.49,1.74,5.90,1.39,1.32,1.11,1.94,0.83
0,Ola Aina,0.10,0.08,0.82,0.10,0.22,0.30,2.77,55.89,73.9,4.42,3.60,2.36,1.34,4.42,1.75,1.13,1.23,2.67,1.23
0,Rayan Aït Nouri,0.08,0.06,0.84,0.00,0.08,0.14,1.92,48.35,81.3,3.60,2.51,2.34,2.34,6.36,2.76,0.75,0.25,2.34,1.17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,Mehdi Zeffane,0.14,0.15,1.09,0.14,0.10,0.25,2.05,37.34,73.6,2.46,1.23,0.55,1.50,4.24,1.64,3.01,1.91,1.64,0.41
0,Jordan Zemura,0.00,0.02,0.25,0.06,0.12,0.14,2.17,36.77,71.2,2.79,2.41,1.11,1.92,2.97,2.41,0.99,1.92,1.98,0.43
0,Oleksandr Zinchenko,0.04,0.06,1.16,0.08,0.09,0.15,2.36,81.12,86.3,9.34,2.67,0.70,1.36,2.87,1.71,0.97,0.85,1.55,2.09
0,Nadir Zortea,0.10,0.03,0.92,0.10,0.12,0.15,2.25,52.01,72.8,4.81,3.58,1.64,1.54,3.89,2.56,0.41,1.43,2.56,1.13


In [5]:
attribute_names = ['npxG + xAG', 'Tackles', 'Interceptions', 'Blocks', 'Clearances', 'Aerials won', 'Pass Completion %']
new_FB = new_FB.drop_duplicates(subset=['Name'])
new_FB['Total Stats'] = new_FB[attribute_names].sum(axis=1)

new_FB = new_FB[['Name'] + attribute_names + ['Total Stats']]
new_FB

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_FB['Total Stats'] = new_FB[attribute_names].sum(axis=1)


Unnamed: 0,Name,npxG + xAG,Tackles,Interceptions,Blocks,Clearances,Aerials won,Pass Completion %,Total Stats
0,Abner,0.24,1.62,1.00,1.62,3.74,1.25,71.7,81.17
0,Marcos Acuña,0.20,2.85,0.95,0.85,1.65,1.30,73.1,80.90
0,Ruben Aguilar,0.10,1.39,1.32,1.11,1.94,0.83,82.6,89.29
0,Ola Aina,0.30,1.75,1.13,1.23,2.67,1.23,73.9,82.21
0,Rayan Aït Nouri,0.14,2.76,0.75,0.25,2.34,1.17,81.3,88.71
...,...,...,...,...,...,...,...,...,...
0,Akim Zedadka,0.11,1.66,1.15,0.92,2.63,1.34,78.4,86.21
0,Mehdi Zeffane,0.25,1.64,3.01,1.91,1.64,0.41,73.6,82.46
0,Jordan Zemura,0.14,2.41,0.99,1.92,1.98,0.43,71.2,79.07
0,Oleksandr Zinchenko,0.15,1.71,0.97,0.85,1.55,2.09,86.3,93.62


In [6]:
features = new_FB[attribute_names].values
target = new_FB['Total Stats'].values

class FBDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return {'features': self.features[idx], 'target': self.targets[idx]}


class FBModel(nn.Module):
    def __init__(self, input_size):
        super(FBModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(128, 64)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(64, 1)
        self.regularization = nn.Linear(input_size, 1) 

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        x = self.relu3(x)
        x = self.fc4(x)
    
        reg_loss = torch.norm(self.regularization.weight, 2)
        return x + reg_loss

In [7]:
X_train, X_val, y_train, y_val = train_test_split(features, target, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

train_dataset = FBDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

modelFB = FBModel(input_size=len(attribute_names))
criterion = nn.MSELoss()
optimizer = optim.Adam(modelFB.parameters(), lr=0.005)
mae_values = []

num_epochs = 2000
for epoch in range(num_epochs):
    for batch in train_loader:
        inputs, targets = batch['features'], batch['target']
        outputs = modelFB(inputs)
        loss = criterion(outputs, targets.view(-1, 1))
        reg_loss = modelFB.regularization.weight.norm(2)
        loss += 0.001 * reg_loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    with torch.no_grad():
        val_predictions = modelFB(X_val_tensor)
        val_loss = criterion(val_predictions, y_val_tensor.view(-1, 1))
        rmse = torch.sqrt(val_loss)
        mae = torch.mean(torch.abs(val_predictions - y_val_tensor.view(-1, 1)))
        mae_values.append(mae.item())
        target_range = y_val_tensor.max() - y_val_tensor.min()
        accuracy = (1 - rmse / target_range) * 100
        print(f'Epoch {epoch+1}/{num_epochs}, RMSE: {rmse.item()}, MAE: {mae.item()}, Accuracy: {accuracy.item()}%')

Epoch 1/2000, RMSE: 75.23184967041016, MAE: 74.94963073730469, Accuracy: -173.66981506347656%
Epoch 2/2000, RMSE: 42.54132843017578, MAE: 40.11097717285156, Accuracy: -54.75197982788086%
Epoch 3/2000, RMSE: 27.47073745727539, MAE: 21.396778106689453, Accuracy: 0.07008910179138184%
Epoch 4/2000, RMSE: 21.734716415405273, MAE: 18.00450325012207, Accuracy: 20.93593406677246%
Epoch 5/2000, RMSE: 31.46221923828125, MAE: 28.107789993286133, Accuracy: -14.449667930603027%
Epoch 6/2000, RMSE: 21.31898307800293, MAE: 17.973081588745117, Accuracy: 22.4482421875%
Epoch 7/2000, RMSE: 20.44917869567871, MAE: 15.735651969909668, Accuracy: 25.61231231689453%
Epoch 8/2000, RMSE: 17.654861450195312, MAE: 14.653091430664062, Accuracy: 35.777164459228516%
Epoch 9/2000, RMSE: 22.71152114868164, MAE: 19.537382125854492, Accuracy: 17.38262176513672%
Epoch 10/2000, RMSE: 18.138771057128906, MAE: 14.724847793579102, Accuracy: 34.01685333251953%
Epoch 11/2000, RMSE: 17.18950080871582, MAE: 14.361566543579102, 