In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import ast
import keras

In [2]:
csv_path = 'archive/AtMid_Wingers.csv'
WG = pd.read_csv(csv_path)

WG['Attribute Vector'] = WG['Attribute Vector'].apply(ast.literal_eval)

new_attribute_names = [
    'Non-Penalty Goals', 'Non-Penalty xG', 'Shots Total', 'Assists',
    'xAG', 'npxG + xAG', 'Shot-Creating Actions', 'Passes Attempted',
    'Pass Completion %', 'Progressive Passes', 'Progressive Carries',
    'Successful Take-Ons', 'Touches (Att Pen)', 'Progressive Passes Rec',
    'Tackles', 'Interceptions', 'Blocks', 'Clearances', 'Aerials won'
]

new_WG = pd.DataFrame(columns=['Name'])

for _, row in WG.iterrows():
    new_row = {'Name': row['Name']}
    for attribute_name, value in zip(new_attribute_names, row['Attribute Vector']):
        new_row[attribute_name] = value
    new_WG = pd.concat([new_WG, pd.DataFrame([new_row])])

new_WG

Unnamed: 0,Name,Non-Penalty Goals,Non-Penalty xG,Shots Total,Assists,xAG,npxG + xAG,Shot-Creating Actions,Passes Attempted,Pass Completion %,Progressive Passes,Progressive Carries,Successful Take-Ons,Touches (Att Pen),Progressive Passes Rec,Tackles,Interceptions,Blocks,Clearances,Aerials won
0,Brenden Aaronson,0.04,0.15,1.56,0.11,0.16,0.31,3.60,30.24,74.3,3.26,1.63,1.29,2.73,5.73,1.71,0.19,1.63,0.23,0.30
0,Zakaria Aboukhlal,0.36,0.42,3.02,0.18,0.12,0.54,2.91,28.28,74.1,2.62,3.64,1.53,5.46,10.04,1.20,0.80,0.66,0.84,0.44
0,Karim Adeyemi,0.41,0.31,2.45,0.26,0.25,0.56,3.88,28.50,73.8,2.86,3.68,2.25,4.44,8.27,0.87,0.46,1.07,0.20,0.56
0,Amine Adli,0.34,0.33,2.40,0.15,0.13,0.46,4.16,37.88,75.1,3.13,3.08,1.96,4.40,9.20,1.37,0.39,1.86,0.44,1.96
0,Michel Aebischer,0.06,0.09,0.88,0.00,0.04,0.12,2.04,39.34,82.6,2.68,0.82,0.35,1.63,4.96,0.70,0.41,0.82,0.53,0.58
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,Nicolò Zaniolo,0.16,0.37,3.60,0.08,0.13,0.50,3.19,19.90,65.4,2.05,3.36,1.23,5.57,7.12,0.66,0.25,0.98,0.00,0.41
0,Arber Zeneli,0.00,0.19,2.79,0.18,0.28,0.47,4.23,38.70,70.2,4.59,4.05,1.53,5.04,10.62,0.81,0.18,0.72,0.18,0.36
0,Edon Zhegrova,0.26,0.31,3.58,0.34,0.39,0.70,6.65,51.48,76.8,4.43,6.31,4.52,6.73,16.53,1.53,0.17,0.77,0.17,0.00
0,Hakim Ziyech,0.00,0.13,3.12,0.33,0.28,0.42,4.67,54.29,65.0,6.12,3.45,2.00,3.23,10.57,2.23,0.78,1.56,0.56,0.00


In [3]:
attribute_names = ['Non-Penalty Goals', 'Non-Penalty xG', 'Shots Total', 'Assists',
    'xAG', 'npxG + xAG', 'Shot-Creating Actions', 'Passes Attempted',
    'Pass Completion %', 'Progressive Passes', 'Progressive Carries',
    'Successful Take-Ons']
new_WG = new_WG.drop_duplicates(subset=['Name'])
new_WG['Total Stats'] = new_WG[attribute_names].sum(axis=1)

new_WG = new_WG[['Name'] + attribute_names + ['Total Stats']]
new_WG

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_WG['Total Stats'] = new_WG[attribute_names].sum(axis=1)


Unnamed: 0,Name,Non-Penalty Goals,Non-Penalty xG,Shots Total,Assists,xAG,npxG + xAG,Shot-Creating Actions,Passes Attempted,Pass Completion %,Progressive Passes,Progressive Carries,Successful Take-Ons,Total Stats
0,Brenden Aaronson,0.04,0.15,1.56,0.11,0.16,0.31,3.60,30.24,74.3,3.26,1.63,1.29,116.65
0,Zakaria Aboukhlal,0.36,0.42,3.02,0.18,0.12,0.54,2.91,28.28,74.1,2.62,3.64,1.53,117.72
0,Karim Adeyemi,0.41,0.31,2.45,0.26,0.25,0.56,3.88,28.50,73.8,2.86,3.68,2.25,119.21
0,Amine Adli,0.34,0.33,2.40,0.15,0.13,0.46,4.16,37.88,75.1,3.13,3.08,1.96,129.12
0,Michel Aebischer,0.06,0.09,0.88,0.00,0.04,0.12,2.04,39.34,82.6,2.68,0.82,0.35,129.02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,Nicolò Zaniolo,0.16,0.37,3.60,0.08,0.13,0.50,3.19,19.90,65.4,2.05,3.36,1.23,99.97
0,Arber Zeneli,0.00,0.19,2.79,0.18,0.28,0.47,4.23,38.70,70.2,4.59,4.05,1.53,127.21
0,Edon Zhegrova,0.26,0.31,3.58,0.34,0.39,0.70,6.65,51.48,76.8,4.43,6.31,4.52,155.77
0,Hakim Ziyech,0.00,0.13,3.12,0.33,0.28,0.42,4.67,54.29,65.0,6.12,3.45,2.00,139.81


In [4]:
features = new_WG[attribute_names].values
target = new_WG['Total Stats'].values

class WGDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return {'features': self.features[idx], 'target': self.targets[idx]}


class WGModel(nn.Module):
    def __init__(self, input_size):
        super(WGModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(128, 64)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(64, 1)
        self.regularization = nn.Linear(input_size, 1) 

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        x = self.relu3(x)
        x = self.fc4(x)
    
        reg_loss = torch.norm(self.regularization.weight, 2)
        return x + reg_loss

In [5]:
X_train, X_val, y_train, y_val = train_test_split(features, target, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

train_dataset = WGDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

modelWG = WGModel(input_size=len(attribute_names))
criterion = nn.MSELoss()
optimizer = optim.Adam(modelWG.parameters(), lr=0.005)
mae_values = []
num_epochs = 1500
for epoch in range(num_epochs):
    for batch in train_loader:
        inputs, targets = batch['features'], batch['target']
        outputs = modelWG(inputs)
        loss = criterion(outputs, targets.view(-1, 1))
        reg_loss = modelWG.regularization.weight.norm(2)
        loss += 0.001 * reg_loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    with torch.no_grad():
        val_predictions = modelWG(X_val_tensor)
        val_loss = criterion(val_predictions, y_val_tensor.view(-1, 1))
        rmse = torch.sqrt(val_loss)
        mae = torch.mean(torch.abs(val_predictions - y_val_tensor.view(-1, 1)))
        mae_values.append(mae.item())
        target_range = y_val_tensor.max() - y_val_tensor.min()
        accuracy = (1 - rmse / target_range) * 100
        print(f'Epoch {epoch+1}/{num_epochs}, RMSE: {rmse.item()}, MAE: {mae.item()}, Accuracy: {accuracy.item()}%')


Epoch 1/1500, RMSE: 118.22999572753906, MAE: 117.04853820800781, Accuracy: -40.86738967895508%
Epoch 2/1500, RMSE: 98.44209289550781, MAE: 96.84052276611328, Accuracy: -17.29071044921875%
Epoch 3/1500, RMSE: 51.67814254760742, MAE: 45.6783561706543, Accuracy: 38.42708969116211%
Epoch 4/1500, RMSE: 71.57669830322266, MAE: 52.0851936340332, Accuracy: 14.71858024597168%
Epoch 5/1500, RMSE: 41.620601654052734, MAE: 32.28333282470703, Accuracy: 50.410343170166016%
Epoch 6/1500, RMSE: 39.35707473754883, MAE: 33.21780776977539, Accuracy: 53.107261657714844%
Epoch 7/1500, RMSE: 44.763248443603516, MAE: 38.93988800048828, Accuracy: 46.66597366333008%
Epoch 8/1500, RMSE: 35.97791290283203, MAE: 30.710268020629883, Accuracy: 57.13343048095703%
Epoch 9/1500, RMSE: 35.069095611572266, MAE: 27.46542739868164, Accuracy: 58.21625518798828%
Epoch 10/1500, RMSE: 32.826114654541016, MAE: 26.663333892822266, Accuracy: 60.888694763183594%
Epoch 11/1500, RMSE: 38.63283920288086, MAE: 32.996337890625, Accura