### General Prediction Model for Football Players

This prediction model evaluates a player's overall performance and impact based on the following aggregated metrics

---

*Performance Index*:

A weighted index combining:
- Goals and assists (G+A)
- xG+xAG (expected contributions)
- PrgC, PrgP, PrgR (progression metrics)
- Defensive contributions (Tkl, Int, Blocks)

In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from pytorch_forecasting import TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_lightning import Trainer
import torch

  from tqdm.autonotebook import tqdm


In [15]:
# Load datasets
def load_data():
    df_22_23 = pd.read_csv('Data/df_22_23.csv')
    df_23_24 = pd.read_csv('Data/df_23_24.csv')
    df_24_25 = pd.read_csv('Data/df_24_25.csv')
    return df_22_23, df_23_24, df_24_25

In [29]:
# Preprocess data for TFT
def preprocess_data_tft(df):
    # Add time_idx for temporal ordering
    df = df.reset_index()  # Reset index to ensure uniqueness
    df['time_idx'] = pd.factorize(df['MP'])[0]

    # Feature Engineering
    df['G+A'] = df['Gls'] + df['Ast']
    df['xG+xAG'] = df['xG'] + df['xAG']

    df['Performance_Index'] = (
        df['G+A'] * 0.4 + 
        df['xG+xAG'] * 0.3 + 
        (df['PrgC'] + df['PrgP'] + df['PrgR']) * 0.2 + 
        (df['Tkl'] + df['Int'] + df['Blocks']) * 0.1
    )

    df['Future_Potential'] = (
        (1 / (df['Age'] + 1)) * df['MP'] + 
        df.groupby('Player')['G+A'].transform(lambda x: x.diff().fillna(0))
    )

    # Verify uniqueness of the index
    if not df.index.is_unique:
        raise ValueError("Data index must be unique.")

    return df

In [30]:
# Create TimeSeriesDataSet
def create_tft_dataset(df):
    df = preprocess_data_tft(df)
    
    max_prediction_length = 1  # predict one season ahead
    max_encoder_length = 3  # use data from the last three seasons
    
    training = TimeSeriesDataSet(
        df,
        time_idx="time_idx",
        target="G+A",
        group_ids=["Player"],
        max_encoder_length=max_encoder_length,
        max_prediction_length=max_prediction_length,
        static_categoricals=["Player"],
        static_reals=["Age"],
        time_varying_known_reals=["time_idx"],
        time_varying_unknown_reals=["Gls", "Ast", "xG", "xAG", "PrgC", "PrgP", "PrgR", "Tkl", "Int", "Blocks", "Performance_Index", "Future_Potential"],
        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,
        allow_missing_timesteps=True
    )

    return training

In [31]:
# Train TFT model
def train_tft(training):
    trainer = Trainer(gpus=1 if torch.cuda.is_available() else 0, max_epochs=30)

    tft = TemporalFusionTransformer.from_dataset(
        training,
        learning_rate=0.03,
        hidden_size=32,
        attention_head_size=4,
        dropout=0.1,
        hidden_continuous_size=16,
        output_size=7,  # for regression
        loss=torch.nn.MSELoss(),
    )

    trainer.fit(tft, train_dataloaders=training.to_dataloader(train=True, batch_size=64))
    return tft

In [32]:
# Evaluate TFT model
def evaluate_tft(tft, test_dataloader):
    predictions = tft.predict(test_dataloader)
    return predictions

In [33]:
# Main workflow
def main():
    df_22_23, df_23_24, df_24_25 = load_data()

    df_train = pd.concat([df_22_23, df_23_24])
    df_test = df_24_25

    training = create_tft_dataset(df_train)
    testing = create_tft_dataset(df_test)

    tft = train_tft(training)

    test_dataloader = testing.to_dataloader(train=False, batch_size=64)
    predictions = evaluate_tft(tft, test_dataloader)

    print(predictions)

In [34]:
# Execute the workflow
if __name__ == "__main__":
    main()



TypeError: Trainer.__init__() got an unexpected keyword argument 'gpus'