In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import math
import csv
import tensorflow as tf

from tensorflow.keras import layers, models
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Reshape, Flatten, Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from torch.utils.data import DataLoader, TensorDataset
from itertools import islice





In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
def process_dataset(file_path, pitch_length, pitch_width):
   
    # Read every 25 line starting from the 4th line of the dataset
    with open(file_path, 'r') as file:
        lines = list(csv.reader(islice(file, 3, None, 25)))
 
    # Format the data into vectors of size 2 for x and y coordinates
    processed_data = []
    Ball_data = []
    
    for line in lines:
        period, frame, time, *coordinates = line
        p = int(period)
        processed_coordinates = []
        for i in range(0, len(coordinates), 2):
            x, y = float(coordinates[i]), float(coordinates[i + 1])
            if not math.isnan(x):
                if p == 1:
                    processed_coordinates.append([round(x * pitch_length, 2), round(y * pitch_width, 2)])
                else:
                    processed_coordinates.append([round((1-x) * pitch_length, 2), round(y * pitch_width, 2)])
        Ball = processed_coordinates[-1]
        processed_coordinates.pop()
        new_line =  processed_coordinates
        if len(new_line)==11:
            last_coord = new_line[-1]
        else:
            new_line.append(last_coord)
            
        processed_data.append(new_line)
        Ball_data.append(Ball)
    
    return processed_data, Ball_data

In [4]:
# Define the dimensions of the football pitch (in meters)
pitch_length = 105.0
pitch_width = 68.0

file_path_Home = 'C:/Users/user/Notebooks/Soccer Valuation/Sample_Game_1_RawTrackingData_Home_Team.csv'
file_path_Away = 'C:/Users/user/Notebooks/Soccer Valuation/Sample_Game_1_RawTrackingData_Away_Team.csv'
Home_team, Ball_team = process_dataset(file_path_Home, pitch_length, pitch_width)
Away_team, Ball_team = process_dataset(file_path_Away, pitch_length, pitch_width)


Compute States

In [5]:
def find_element(matrix, element):
    for i in range(len(matrix)):
        for j in range(len(matrix[0])):
            if matrix[i][j] == element:
                return np.array([i, j]) 
    return None 
    
def counting(list, mat):
    n=0
    for matrix in list:       
        if (matrix == mat).all():
            n+=1
    return n

def find_matrix(list, mat):
    index = 0
    for matrix in list:
        if (matrix == mat).all():
            return index
        index+=1

def mirror_matrix(matrix):
    mirrored_matrix = []
    for row in matrix:
        mirrored_row = list(reversed(row))  # Reverse the order of columns in the row
        mirrored_matrix.append(mirrored_row)
    return mirrored_matrix

def place_in_compo(role):
    role = torch.floor(torch.tensor(role/2))
    i,j = role
    return int(5*j + i)+2

In [6]:
def relative_positions(players, goal):
    n = len(players)
    mat = torch.zeros(n,n)
    sorted_v = sorted(players, key=lambda x: x[0])
    sorted_h = sorted(players, key=lambda x: x[1])
    k = 1
    if goal==False:
        k=2
    for player in players:
        i = sorted_h.index(player)
        j = sorted_v.index(player)
        if mat[i][j] != 0:
            if mat[i+1][j+1] == 0:
                mat[i+1][j+1] = k
            elif mat[i+1][j-1] == 0:
                mat[i+1][j-1] = k
            elif mat[i-1][j-1] == 0:
                mat[i-1][j-1] = k
            elif mat[i-1][j+1] == 0:
                mat[i-1][j+1] = k
            else:
                print("ERROR")
        else:
            mat[i][j] = k
        k+=1
    return mat

#This function gets the average team relative positions in a match
def get_average_compo(Team):
    Compositions = []
    
    for t in range(len(Team)):
        In_field_players = Team[t][1:]
        Compo = relative_positions(In_field_players, False)
        Compositions.append(Compo)
    
    #Find the most frequent Composition
    max = 0
    index = 0
    for comp in Compositions:
        n = counting(Compositions, comp)
        if n>= max:
            max = n
            index = find_matrix(Compositions, comp)
        
    average_compo = Compositions[index]
    return average_compo

In [7]:
def get_features(player, Team, average_compo_team, rel_pos, t, Delta):

    #First get indexes of the relative positions : Index_Game
    index_player = find_element(rel_pos, player)
    
    # Now compute features   
    #Team label
    if player<=11:
        team_label = 1
    elif player<=22:
        team_label = -1
    else:
        team_label = 23
    
    #Role_team
    if team_label==1:
        player -= 1
    if team_label == -1:
        player -= 12

    Role_team = find_element(average_compo_team, player)
    if Role_team is None:
        Role_team = [0,0]


    if player < 23:
        #position
        pos = Team[t][player]
        #Velocity
        if t>0:
            vel = (torch.tensor(Team[t][player]) - torch.tensor(Team[t-1][player])) / Delta
        else:
            vel = [0,0]
            vel = torch.tensor(vel)
        #Acceleration
        if t>1:
            acc = (vel - ( torch.tensor(Team[t-1][player]) - torch.tensor(Team[t-2][player])) / Delta ) /Delta
        else:
            acc = [0,0]
    else:
        #position
        pos = [Team[t]][0]
        #Velocity
        if t>0:
            vel = (torch.tensor( [Team[t]][0]) - torch.tensor( [Team[t-1]][0])) / Delta
        else:
            vel = [0,0]
            vel = torch.tensor(vel)
        #Acceleration
        if t>1:
            acc = (vel - ( torch.tensor([Team[t-1]][0]) -  torch.tensor([Team[t-2]][0])) / Delta ) /Delta
        else:
            acc = [0,0]

    return index_player, team_label, Role_team, pos, vel, acc
    
    
        

In [8]:
def get_state(Home, Away, Ball, average_compo_home, average_compo_away, t, Delta):

    # Initialize state matrices
    Mt = torch.zeros(23,23)
    Mr1 = torch.zeros(23,23)
    Mr2 = torch.zeros(23,23)
    Mp1 = torch.zeros(23,23)
    Mp2 = torch.zeros(23,23)
    Mv1 = torch.zeros(23,23)
    Mv2 = torch.zeros(23,23)
    Ma1 = torch.zeros(23,23)
    Ma2 = torch.zeros(23,23)
   
    Ball[t]
    frame = Home[t] + Away[t] + [Ball[t]]
    rel_pos = relative_positions(frame, True)
    
    for p in range(2,12):
        index_player, team_label, Role_team, pos, vel, acc = get_features(p, Home, average_compo_home, rel_pos, t, Delta)
        i,j = index_player
        Mt[i][j] = team_label
        Mr1[i][j] = Role_team[0] 
        Mr2[i][j] = Role_team[1] 
        Mp1[i][j] = pos[0] 
        Mp2[i][j] = pos[1] 
        Mv1[i][j] = vel[0] 
        Mv2[i][j] = vel[1] 
        Ma1[i][j] = acc[0] 
        Ma2[i][j] = acc[1] 
    for p in range(12,23):
        index_player, team_label, Role_team, pos, vel, acc = get_features(p, Away, average_compo_home, rel_pos, t, Delta)
        i,j = index_player
        Mt[i][j] = team_label
        Mr1[i][j] = Role_team[0] 
        Mr2[i][j] = Role_team[1] 
        Mp1[i][j] = pos[0] 
        Mp2[i][j] = pos[1] 
        Mv1[i][j] = vel[0] 
        Mv2[i][j] = vel[1] 
        Ma1[i][j] = acc[0] 
        Ma2[i][j] = acc[1] 
    p = 23
    index_player, team_label, Role_team, pos, vel, acc = get_features(p, Ball, average_compo_home, rel_pos, t, Delta)
    i,j = index_player
    Mt[i][j] = team_label
    Mr1[i][j] =i
    Mr2[i][j] = j 
    Mp1[i][j] = pos[0] 
    Mp2[i][j] = pos[1] 
    Mv1[i][j] = vel[0] 
    Mv2[i][j] = vel[1] 
    Ma1[i][j] = acc[0] 
    Ma2[i][j] = acc[1] 

    
    state = torch.stack([Mt, Mr1, Mr2, Mp1, Mp2, Mv1, Mv2, Ma1, Ma2], axis=0)
    
    return state


In [9]:

#Compute the average team relative positions in the match

average_compo_home = get_average_compo(Home_team)
average_compo_away = get_average_compo(Away_team)
average_compo_away = mirror_matrix(average_compo_away)

torch.save(average_compo_home, 'average_compo_home.pt')
torch.save(average_compo_away, 'average_compo_away.pt')


"\n#Compute the average team relative positions in the match\n\naverage_compo_home = get_average_compo(Home_team)\naverage_compo_away = get_average_compo(Away_team)\naverage_compo_away = mirror_matrix(average_compo_away)\n\ntorch.save(average_compo_home, 'average_compo_home.pt')\ntorch.save(average_compo_away, 'average_compo_away.pt')\n"

In [10]:

#Compute the states
Delta = 1
Mt = np.zeros((23, 23))
Mr1 = np.zeros((23, 23))
Mr2 = np.zeros((23, 23))
Mp1 = np.zeros((23, 23))
Mp2 = np.zeros((23, 23))
Mv1 = np.zeros((23, 23))
Mv2 = np.zeros((23, 23))
Ma1 = np.zeros((23, 23))
Ma2 = np.zeros((23, 23))
state = np.stack([Mt,Mr1, Mr2, Mp1, Mp2, Mv1, Mv2, Ma1, Ma2], axis = 2)
States = []
for t in range (len(Home_team)):
    if t%500==0:
        print(t, "/", len(Home_team))
    new_state = get_state(Home_team, Away_team, Ball_team, average_compo_home, average_compo_away, t, Delta)
    States.append(new_state)
    state = new_state
States = torch.stack(States)

torch.save(States, 'States.pt')


'\n#Compute the states\nDelta = 1\nMt = np.zeros((23, 23))\nMr1 = np.zeros((23, 23))\nMr2 = np.zeros((23, 23))\nMp1 = np.zeros((23, 23))\nMp2 = np.zeros((23, 23))\nMv1 = np.zeros((23, 23))\nMv2 = np.zeros((23, 23))\nMa1 = np.zeros((23, 23))\nMa2 = np.zeros((23, 23))\nstate = np.stack([Mt,Mr1, Mr2, Mp1, Mp2, Mv1, Mv2, Ma1, Ma2], axis = 2)\nStates = []\nfor t in range (len(Home_team)):\n    if t%500==0:\n        print(t, "/", len(Home_team))\n    new_state = get_state(Home_team, Away_team, Ball_team, average_compo_home, average_compo_away, t, Delta)\n    States.append(new_state)\n    state = new_state\nStates = torch.stack(States)\n\ntorch.save(States, \'States.pt\')\n'

In [11]:
"""
average_compo_home = torch.load('average_compo_home.pt')
average_compo_away =  torch.load('average_compo_away.pt')
States = torch.load('States.pt')
"""

Convolutional Autoencoder

In [12]:
# Create DataLoader
batch_size = 32
data_loader = DataLoader(States, batch_size=batch_size, shuffle=True)

class ConvAutoencoder(nn.Module):
    def __init__(self):
        super(ConvAutoencoder, self).__init__()
        
         # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(9, 32, kernel_size=3, stride=1, padding=1),  # Input size: 9x23x23, Output size: 32x23x23
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), # Input size: 32x23x23, Output size: 64x23x23
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),                # Input size: 64x23x23, Output size: 64x11x11
            nn.Flatten(),                                         # Flatten the output into a vector
            nn.Linear(64*11*11, 128),                             # Fully connected layer
            nn.ReLU(),
            nn.Linear(128, 10)                                    # Output layer with 10 units
        )

        
        
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(10, 128),                       # Input size: 1x10, Output size: 1x128
            nn.ReLU(),
            nn.Linear(128, 64*11*11),                 # Fully connected layer to match desired shape
            nn.ReLU(),
            nn.Unflatten(1, (64, 11, 11)),            # Reshape the output into a 3D tensor: 64x11x11
            nn.ConvTranspose2d(64, 32, kernel_size=2, stride=2), # Transposed convolutional layer: 32x23x23
            nn.ReLU(),
            nn.ConvTranspose2d(32, 9, kernel_size=3, stride=1, padding=0), # Transposed convolutional layer: 9x23x23
            nn.ReLU(),
            nn.ZeroPad2d((0, -1, 0, -1))  # Crop the output to match input size
        )        
        
        
        
    def encode(self, x):
        x = self.encoder(x)
        return x

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
        
   
model = ConvAutoencoder()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
Encoded_States = []

# Training loop
num_epochs = 15
for epoch in range(num_epochs):
    running_loss = 0.0
    # Inside the training loop
    for data in data_loader:
        optimizer.zero_grad()
        input_data = data 
        data = torch.tensor(data)

        #save encoded states for the LSTM
        encoded =  model.encode(data)
        for d in encoded:
            Encoded_States.append(d)
            
        reconstructions= model(input_data)
        loss = criterion(reconstructions, input_data)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(data_loader)}")

   
print('Finished Training')
torch.save(Encoded_States, 'Encoded_States.pt') 

  data = torch.tensor(data)


Epoch 1/15, Loss: 22.368934390309093
Epoch 2/15, Loss: 20.496269194634404
Epoch 3/15, Loss: 19.66093844109839
Epoch 4/15, Loss: 19.072323945852425
Epoch 5/15, Loss: 18.57374625153594
Epoch 6/15, Loss: 18.13703979240669
Epoch 7/15, Loss: 17.7287781893552
Epoch 8/15, Loss: 17.33902326520983
Epoch 9/15, Loss: 17.001966162042304
Epoch 10/15, Loss: 16.676660710638696
Epoch 11/15, Loss: 16.310682747390246
Epoch 12/15, Loss: 15.9401720225156
Epoch 13/15, Loss: 15.626561971811148
Epoch 14/15, Loss: 15.329637013948881
Epoch 15/15, Loss: 15.012875593625582
Finished Training


LSTM

In [13]:
#Create Dataset
Datasets = []
Delta = 1
n = len(Home_team)
T = 10
m = int(torch.floor(torch.tensor(n/T)))
#n=5

for p in range(23):
    X = torch.zeros(m, 10, 17)
    Y = torch.zeros(m, 2)
    D = torch.zeros(10,17)
    i = 0
    for t in range(n):
        if i==T:
            X[int(torch.floor(torch.tensor(t/T)))] = D
            Y[int(torch.floor(torch.tensor(t/T)))] = pos_tensor
            i = 0
        else:
            frame = Home_team[t] + Away_team[t] + [Ball_team[t]]
            rel_pos = relative_positions(frame, True)
            if p<=10:
                index_player, team_label, Role_team, pos, vel, acc = get_features(p, Home_team, average_compo_home, rel_pos, t, Delta)
            elif p<=21:
                index_player, team_label, Role_team, pos, vel, acc = get_features(p, Away_team, average_compo_home, rel_pos, t, Delta)
            else:
                index_player, team_label, Role_team, pos, vel, acc = get_features(23, Ball_team, average_compo_home, rel_pos, t, Delta)
            pos_tensor = torch.tensor(pos)
            vel_tensor = torch.tensor(vel)
            acc_tensor = torch.tensor(acc)
            team_label = [team_label]
            team_tensor = torch.tensor(team_label)
            features = torch.cat((pos_tensor, vel_tensor, acc_tensor, team_tensor))
            input = torch.cat((features, Encoded_States[t]))
            D[i] = input
            i+=1
    Datasets.append([X,Y])

# Define the percentage of data for training and testing
train_ratio = 0.8  

# Initialize lists to store the split datasets
split_datasets = []

# Loop over each dataset
for X, Y in Datasets:
    # Determine the number of samples for training and testing
    num_samples = X.size(0)
    num_train = int(train_ratio * num_samples)
    num_test = num_samples - num_train
    
    # Split the dataset into training and testing sets
    X_train, X_test = X[:num_train], X[num_train:]
    y_train, y_test = Y[:num_train], Y[num_train:]
    
    # Append the split dataset to the list
    split_datasets.append((X_train, y_train, X_test, y_test))
torch.save(split_datasets, 'split_datasets.pt')

  vel_tensor = torch.tensor(vel)
  acc_tensor = torch.tensor(acc)


In [None]:
"""
split_datasets = torch.load('split_datasets.pt')
"""

In [14]:

# Parameters
input_dim = 17
output_dim = 2
num_units = 128
epochs = 200
batch_size = 32
sequence_length = 10

# Define the LSTM model class
class LSTMModel(nn.Module):
    def __init__(self, input_dim, num_units, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, num_units, batch_first=True) 
        self.fc = nn.Linear(num_units, output_dim)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# Initialize and save all models
for i in range(27):
    model = LSTMModel(input_dim, num_units, output_dim)
    torch.save(model.state_dict(), f'lstm_model_{i+1}.pt')

# Loop to train and evaluate models
for i, data in enumerate(split_datasets):
    X_train, y_train, X_test, y_test = data
    
    # Convert data to PyTorch tensors
    X_train_tensor = torch.tensor(X_train).float()
    y_train_tensor = torch.tensor(y_train).float()
    X_test_tensor = torch.tensor(X_test).float()
    y_test_tensor = torch.tensor(y_test).float()
    
    # Create DataLoader for training and testing
    train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(TensorDataset(X_test_tensor, y_test_tensor), batch_size=batch_size)

    # Find corresponding model
    k = i+1
    if k == 1 or k==12:
        m = 1
    elif k > 1 and k < 12:
        role = find_element(average_compo_home, k)
        m = place_in_compo(role)
    elif k > 11 and k < 23:
        role = find_element(average_compo_away, k-11)
        
        m = place_in_compo(role)   
    else:
        m = 27

    # Load the model
    model = LSTMModel(input_dim, num_units, output_dim)
    model.load_state_dict(torch.load(f'lstm_model_{m}.pt'))

    # Define loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters())

    # Learning rate scheduler
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)

    # Training loop
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0.0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)  # Gradient clipping
            optimizer.step()
            epoch_loss += loss.item() * inputs.size(0)
        epoch_loss /= len(train_loader.dataset)
        
        # Step the learning rate scheduler
        scheduler.step()

        if epoch % 10 == 0:
            print(f"Epoch [{epoch}/{epochs}], Loss: {epoch_loss:.4f}")

    # Save the trained model
    torch.save(model.state_dict(), f'lstm_model_{m}.pt')

    # Evaluate the model
    model.eval()
    test_loss = 0.0
    with torch.no_grad():
        for inputs, targets in test_loader:
            outputs = model(inputs)
            test_loss += criterion(outputs, targets).item() * inputs.size(0)
    test_loss /= len(test_loader.dataset)
    print(f"Model {m} - Test Loss: {test_loss:.4f}")


  X_train_tensor = torch.tensor(X_train).float()
  y_train_tensor = torch.tensor(y_train).float()
  X_test_tensor = torch.tensor(X_test).float()
  y_test_tensor = torch.tensor(y_test).float()


Epoch [0/200], Loss: 2148.3388
Epoch [10/200], Loss: 827.1777
Epoch [20/200], Loss: 310.9864
Epoch [30/200], Loss: 144.2034
Epoch [40/200], Loss: 81.0035
Epoch [50/200], Loss: 47.6100
Epoch [60/200], Loss: 42.9464
Epoch [70/200], Loss: 38.8891
Epoch [80/200], Loss: 35.2047
Epoch [90/200], Loss: 31.7666
Epoch [100/200], Loss: 28.8495
Epoch [110/200], Loss: 28.5225
Epoch [120/200], Loss: 28.2153
Epoch [130/200], Loss: 27.8947
Epoch [140/200], Loss: 27.5804
Epoch [150/200], Loss: 27.2779
Epoch [160/200], Loss: 27.2463
Epoch [170/200], Loss: 27.2139
Epoch [180/200], Loss: 27.1823
Epoch [190/200], Loss: 27.1502
Model 1 - Test Loss: 36.3433
Epoch [0/200], Loss: 568.7514
Epoch [10/200], Loss: 71.1043
Epoch [20/200], Loss: 18.3242
Epoch [30/200], Loss: 5.4721
Epoch [40/200], Loss: 1.8768
Epoch [50/200], Loss: 0.8390
Epoch [60/200], Loss: 0.6455
Epoch [70/200], Loss: 0.5339
Epoch [80/200], Loss: 0.4451
Epoch [90/200], Loss: 0.3776
Epoch [100/200], Loss: 0.3178
Epoch [110/200], Loss: 0.3094
Epoc