### General Prediction Model for Football Players

This prediction model evaluates a player's overall performance and impact based on the following aggregated metrics

---

*Performance Index*:

A weighted index combining:
- Goals and assists (G+A)
- xG+xAG (expected contributions)
- PrgC, PrgP, PrgR (progression metrics)
- Defensive contributions (Tkl, Int, Blocks)

In [75]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from pytorch_forecasting import TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_lightning import Trainer
import torch
import torchmetrics
from pytorch_lightning.core.module import LightningModule

In [76]:
class TFTLightningModule(LightningModule):
    def __init__(self, tft_model):
        super().__init__()
        self.model = tft_model

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        y_hat = self.model(batch)
        loss = torchmetrics.functional.mean_squared_error(y_hat, batch["target"])
        self.log("train_loss", loss)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.03)

In [77]:
# Load datasets
def load_data():
    df_22_23 = pd.read_csv('Data/df_22_23.csv')
    df_23_24 = pd.read_csv('Data/df_23_24.csv')
    df_24_25 = pd.read_csv('Data/df_24_25.csv')
    return df_22_23, df_23_24, df_24_25

In [78]:
# Preprocess data for TFT
def preprocess_data_tft(df):
    # Add time_idx for temporal ordering
    df = df.reset_index()  # Reset index to ensure uniqueness
    df['time_idx'] = pd.factorize(df['MP'])[0]

    # Feature Engineering
    df['G+A'] = df['Gls'] + df['Ast']
    df['xG+xAG'] = df['xG'] + df['xAG']

    df['Performance_Index'] = (
        df['G+A'] * 0.4 + 
        df['xG+xAG'] * 0.3 + 
        (df['PrgC'] + df['PrgP'] + df['PrgR']) * 0.2 + 
        (df['Tkl'] + df['Int'] + df['Blocks']) * 0.1
    )

    df['Future_Potential'] = (
        (1 / (df['Age'] + 1)) * df['MP'] + 
        df.groupby('Player')['G+A'].transform(lambda x: x.diff().fillna(0))
    )

    # Verify uniqueness of the index
    if not df.index.is_unique:
        raise ValueError("Data index must be unique.")

    return df

In [79]:
# Create TimeSeriesDataSet
def create_tft_dataset(df):
    df = preprocess_data_tft(df)
    
    max_prediction_length = 1  # predict one season ahead
    max_encoder_length = 3  # use data from the last three seasons
    
    training = TimeSeriesDataSet(
        df,
        time_idx="time_idx",
        target="G+A",
        group_ids=["Player"],
        max_encoder_length=max_encoder_length,
        max_prediction_length=max_prediction_length,
        static_categoricals=["Player"],
        static_reals=["Age"],
        time_varying_known_reals=["time_idx"],
        time_varying_unknown_reals=["Gls", "Ast", "xG", "xAG", "PrgC", "PrgP", "PrgR", "Tkl", "Int", "Blocks", "Performance_Index", "Future_Potential"],
        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,
        allow_missing_timesteps=True
    )

    return training

In [80]:
# Train TFT model
def train_tft(training):
    trainer = Trainer(accelerator="cpu", max_epochs=30)

    tft = TemporalFusionTransformer.from_dataset(
        training,
        learning_rate=0.03,
        hidden_size=32,
        attention_head_size=4,
        dropout=0.1,
        hidden_continuous_size=16,
        output_size=7,  # for regression
        loss=torchmetrics.MeanSquaredError(),
    )

    model = TFTLightningModule(tft)
    trainer.fit(model, train_dataloaders=training.to_dataloader(train=True, batch_size=64))
    return model

In [81]:
# Evaluate TFT model
def evaluate_tft(model, test_dataloader):
    predictions = model.model.predict(test_dataloader)
    return predictions

In [82]:
# Main workflow
def main():
    df_22_23, df_23_24, df_24_25 = load_data()

    df_train = pd.concat([df_22_23, df_23_24])
    df_test = df_24_25

    training = create_tft_dataset(df_train)
    testing = create_tft_dataset(df_test)

    model = train_tft(training)

    test_dataloader = testing.to_dataloader(train=False, batch_size=64)
    predictions = evaluate_tft(model, test_dataloader)

    print(predictions)

In [83]:
# Execute the workflow
if __name__ == "__main__":
    main()

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\adity\AppData\Local\Programs\Python\Python312\Lib\site-packages\lightning\pytorch\utilities\parsing.py:209: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
c:\Users\adity\AppData\Local\Programs\Python\Python312\Lib\site-packages\lightning\pytorch\utilities\parsing.py:209: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
  super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)

  | Name  | Type                      | Params | Mode 
------------------------------------------------------------
0 | model | TemporalFusionTransformer | 239 K  | train
--------------------------------------

AttributeError: module 'tensorflow.lite.python.lite_constants' has no attribute 'UNSET'