In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys
from tqdm import tqdm
import os
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import confusion_matrix, accuracy_score, ConfusionMatrixDisplay, classification_report

In [3]:
def readNFLPlay():
    subDirPath = '../NFL-Play/'
    plays = pd.read_csv(subDirPath+'plays.csv')
    return plays

In [3]:
NFLplays_raw = readNFLPlay()

In [4]:
NFLplays_raw.shape

(870384, 44)

In [5]:
NFLplays_raw.columns

Index(['playId', 'gameId', 'playSequence', 'quarter', 'possessionTeamId',
       'nonpossessionTeamId', 'playType', 'playType2', 'playTypeDetailed',
       'playNumberByTeam', 'gameClock', 'gameClockSecondsExpired',
       'gameClockStoppedAfterPlay', 'down', 'distance', 'fieldPosition',
       'distanceToGoalPre', 'noPlay', 'playDescription', 'playStats',
       'playDescriptionFull', 'typeOfPlay', 'changePossession', 'turnover',
       'safety', 'offensiveYards', 'netYards', 'firstDown', 'efficientPlay',
       'evPre', 'evPost', 'evPlay', 'fourthDownConversion',
       'thirdDownConversion', 'scorePossession', 'scoreNonpossession',
       'homeScorePre', 'visitingScorePre', 'homeScorePost',
       'visitingScorePost', 'distanceToGoalPost', 'fieldGoalProbability',
       'huddle', 'formation'],
      dtype='object')

In [6]:
NFLplays_raw.head()

Unnamed: 0,playId,gameId,playSequence,quarter,possessionTeamId,nonpossessionTeamId,playType,playType2,playTypeDetailed,playNumberByTeam,...,scorePossession,scoreNonpossession,homeScorePre,visitingScorePre,homeScorePost,visitingScorePost,distanceToGoalPost,fieldGoalProbability,huddle,formation
0,30298,26909,1,1,2200,3200,kickoff,"kickoff, returned","kickoff, returned",1,...,0,0,0,0,0,0,,,,
1,30299,26909,2,1,3200,2200,pass,"pass, complete","pass, complete",1,...,0,0,0,0,0,0,44.0,0.26,huddle,
2,30300,26909,3,1,3200,2200,pass,"pass, complete","pass, complete",2,...,0,0,0,0,0,0,30.0,0.74,no huddle,shotgun
3,30301,26909,4,1,3200,2200,pass,"pass, incomplete","pass, incomplete",3,...,0,0,0,0,0,0,30.0,0.91,no huddle,shotgun
4,30302,26909,5,1,3200,2200,pass,"pass, complete","pass, complete",4,...,0,0,0,0,0,0,28.0,0.91,no huddle,


In [7]:
indices = ['playId', 'gameId']

playCircumstance = ['playSequence', 
                'quarter', 
                'possessionTeamId',
                'nonpossessionTeamId', 
                'playNumberByTeam',
                'gameClock', 
                'down', 
                'distance',
                'distanceToGoalPre',
                'netYards',
                'scorePossession',
                'scoreNonpossession',
                'fieldGoalProbability',
                'fieldPosition',]

# classification
playType = ['playType'
            'huddle',
            'formation']

playResult = ['playType2', # only second item
                'gameClockSecondsExpired',
                'gameClockStoppedAfterPlay', 
                'noPlay', # is the play a penalty
                'offensiveYards'
                'distanceToGoalPost',
                'isClockRunning', 
                'changePossession', 
                'turnover',
                'safety',
                'firstDown',]

idk = [ 'typeOfPlay',
        'fourthDownConversion',
        'thirdDownConversion',
        'homeScorePre', 
        'visitingScorePre',
        'homeScorePost',
        'visitingScorePost',]

# the original dataset has 3 columns of their own prediction of the play we may be able to use them as a reference
reference = ['evPre',
             'evPost', 
             'evPlay',]

exclude = [ 'playTypeDetailed', # redundant to playType2
            'playDescription',
            'playStats',
            'playDescriptionFull', 
            'efficientPlay']

In [8]:
from Preprocess import printColumnsHasNan, printNonNumericColumns, runPreprocess, getStringValue, getCircumstance, getPlayType, getPlayResult

In [9]:
NFLplays_raw['playType'].unique()

array(['kickoff', 'pass', 'penalty', 'field goal', 'rush', 'punt', 'xp',
       'spike', 'kneel', 'aborted', 'two-point'], dtype=object)

In [10]:
printColumnsHasNan(NFLplays_raw)

Columns with NaN values:
playType2
playTypeDetailed
fieldPosition
distanceToGoalPre
playStats
typeOfPlay
distanceToGoalPost
fieldGoalProbability
huddle
formation


In [11]:
printNonNumericColumns(NFLplays_raw)

Non-numeric columns:
playType
playType2
playTypeDetailed
gameClock
fieldPosition
playDescription
playStats
playDescriptionFull
typeOfPlay
huddle
formation


In [12]:
NFLplays = runPreprocess(NFLplays_raw, exclude, idk)

In [14]:
NFLplays.columns

Index(['playId', 'gameId', 'playSequence', 'quarter', 'possessionTeamId',
       'nonpossessionTeamId', 'playType', 'playNumberByTeam', 'gameClock',
       'gameClockSecondsExpired', 'gameClockStoppedAfterPlay', 'down',
       'distance', 'fieldPosition', 'distanceToGoalPre', 'noPlay',
       'changePossession', 'turnover', 'safety', 'offensiveYards', 'netYards',
       'firstDown', 'evPre', 'evPost', 'evPlay', 'scorePossession',
       'scoreNonpossession', 'distanceToGoalPost', 'fieldGoalProbability',
       'huddle', 'formation', 'playResult'],
      dtype='object')

In [15]:
NFLplays.shape

(870384, 32)

In [20]:
printColumnsHasNan(NFLplays)

Columns with NaN values:
distanceToGoalPost


In [21]:
printNonNumericColumns(NFLplays)

Non-numeric columns:


In [28]:
NFLplays.head(20)

Unnamed: 0,playId,gameId,playSequence,quarter,possessionTeamId,nonpossessionTeamId,playType,playNumberByTeam,gameClock,gameClockSecondsExpired,...,evPre,evPost,evPlay,scorePossession,scoreNonpossession,distanceToGoalPost,fieldGoalProbability,huddle,formation,playResult
0,30298,26909,1,1,2200,3200,0,1,900,9,...,0.0,-0.853652,-0.853652,0,0,,-1.0,0,0,0
1,30299,26909,2,1,3200,2200,1,1,891,46,...,0.853652,2.080026,1.226374,0,0,44.0,0.26,1,0,1
2,30300,26909,3,1,3200,2200,1,2,845,38,...,2.080026,2.98367,0.903644,0,0,30.0,0.74,2,1,1
3,30301,26909,4,1,3200,2200,1,3,807,4,...,2.98367,2.422331,-0.561339,0,0,30.0,0.91,2,1,2
4,30302,26909,5,1,3200,2200,1,4,803,31,...,2.422331,2.069898,-0.352433,0,0,28.0,0.91,2,0,1
5,30303,26909,6,1,3200,2200,1,5,772,40,...,2.069898,4.016406,1.946508,0,0,14.0,0.93,2,1,1
6,30304,26909,7,1,3200,2200,1,6,732,54,...,4.016406,4.293374,0.276968,0,0,10.0,0.99,2,1,1
7,30305,26909,8,1,3200,2200,2,7,678,19,...,4.293374,3.226098,-1.067276,0,0,,0.99,1,1,-1
8,30306,26909,9,1,3200,2200,1,8,659,37,...,3.226098,2.505833,-0.720265,0,0,15.0,0.99,1,1,1
9,30307,26909,10,1,3200,2200,1,9,622,10,...,2.505833,1.66463,-0.841203,0,0,15.0,0.99,2,1,2


LSTM model Jaume:

In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Masking
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [4]:
# Load data
NFLplays_LSTM = readNFLPlay()

In [5]:
# Feature selection for prediction

indices = ['gameId']

circumstances = ['playSequence', 
                'quarter', 
                'possessionTeamId',
                'nonpossessionTeamId', 
                'playNumberByTeam',
                'gameClock', 
                'down', 
                'distance',
                'distanceToGoalPre',
                'netYards',
                'fieldGoalProbability',
                'fieldPosition',
                'homeScorePre', 
                'visitingScorePre',] #scores from home and visiting team before the play

target_play = ['playType',
            'huddle',
            'formation',]

play_result = ['playResult', # only second item from playType2
                'gameClockSecondsExpired',
                'gameClockStoppedAfterPlay', 
                'noPlay', # is the play a penalty
                'offensiveYards',
                'distanceToGoalPost', 
                'changePossession', 
                'turnover',
                'safety',
                'firstDown',
                'scorePossession', # does the possesion team score, and how many points, is result of the play
                'scoreNonpossession',
                'homeScorePost',
                'visitingScorePost',] # scores from home and visiting team after the play

final_score = ['homeScorePost',
                'visitingScorePost',]

idk = [ 'playId',
        'typeOfPlay',
        'fourthDownConversion',
        'thirdDownConversion',]

exclude = [ 'playTypeDetailed', # redundant to playType2 
            'playDescription',
            'playStats',
            'playDescriptionFull', 
            'efficientPlay']

In [6]:
import Preprocess as pp

In [None]:
# Handle NaNs
# Drop rows with NaNs in target_play
#data = data.dropna(subset=[target_play])

# Impute NaNs in features with the mean of each column
#for circumstance in circumstances:
    #data[circumstance].fillna(data[circumstance].mean(), inplace=True)

In [7]:
NFLplays_LSTM_preprocess = pp.runPreprocess(NFLplays_LSTM, exclude, idk)

In [8]:
NFLplays_LSTM_preprocess.columns

Index(['gameId', 'playSequence', 'quarter', 'possessionTeamId',
       'nonpossessionTeamId', 'playType', 'playNumberByTeam', 'gameClock',
       'gameClockSecondsExpired', 'gameClockStoppedAfterPlay', 'down',
       'distance', 'fieldPosition', 'distanceToGoalPre', 'noPlay',
       'changePossession', 'turnover', 'safety', 'offensiveYards', 'netYards',
       'firstDown', 'evPre', 'evPost', 'evPlay', 'scorePossession',
       'scoreNonpossession', 'homeScorePre', 'visitingScorePre',
       'homeScorePost', 'visitingScorePost', 'distanceToGoalPost',
       'fieldGoalProbability', 'huddle', 'formation', 'playResult'],
      dtype='object')

In [9]:
NFLplays_LSTM_preprocess.shape

(870384, 35)

In [10]:
pp.printColumnsHasNan(NFLplays_LSTM_preprocess)

Columns with NaN values:


In [11]:
pp.printNonNumericColumns(NFLplays_LSTM_preprocess)

Non-numeric columns:


In [12]:
# Encoding and scaling
#Encode target labels with value between 0 and n_classes-1.
#This transformer should be used to encode target values, i.e. y, and not the input X

#label_encoder = LabelEncoder()
#NFLplays_LSTM['playType'] = label_encoder.fit_transform(NFLplays_LSTM['playType']) #later can use the label_encoder.inverse_transform for the outputs calculated from the model to get back the original names
#NFLplays_LSTM['huddle'] = label_encoder.fit_transform(NFLplays_LSTM['huddle'])
#NFLplays_LSTM['formation'] = label_encoder.fit_transform(NFLplays_LSTM['formation'])

#scalers = {col: StandardScaler() for col in circumstances}
#for col in circumstances:
    #NFLplays_LSTM_preprocess[col] = scalers[col].fit_transform(NFLplays_LSTM_preprocess[[col]])

In [30]:
NFLplays_LSTM_preprocess['playType'].unique()
#reality no need to encode after the preprocess from Çyan's code, it ends with similar results, a value to categorise each word into a class

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10], dtype=int64)

Haven't preprocessed the data for this run (in terms of MinMax scaling, Standard scaling, Quantile transformation,...)

In [14]:
X = [] #circumstances
y_target_play = [] # predict each play
y_circumstances = [] # predict next circumstances, 
y_play_result = [] # predict the result of each play
y_final_score = [] # final score of the game

for game_id in NFLplays_LSTM_preprocess['gameId'].unique():
    game_data = NFLplays_LSTM_preprocess[NFLplays_LSTM_preprocess['gameId'] == game_id]
    if len(game_data) > 0:  # Ensure the game has at least one play
        X.append(game_data[circumstances].values)
        y_target_play.append(game_data[target_play].values)
        y_circumstances.append(game_data[circumstances].values[1:])
        y_play_result.append(game_data[play_result].values)
        y_final_score.append(game_data[final_score].values[-1])
    else:
        print(f"No valid plays for game_id: {game_id}")

In [34]:
# Function to determine dimensionality
def get_dimensionality(lst):
    if isinstance(lst, list):
        if all(isinstance(sublist, list) for sublist in lst):
            dimensions = 1 + get_dimensionality(lst[0])
            return dimensions
    return 1

# Test the function
print("Dimensionality of list_1d:", get_dimensionality(X))

Dimensionality of list_1d: 1


In [18]:
# Pad sequences
X_padded = pad_sequences(X, dtype='float32', padding='post', value=0)
y_target_play_padded = pad_sequences(y_target_play, padding='post', value=-1)
y_circumstances_padded = pad_sequences(y_circumstances, dtype='float32', padding='post', value=0)
y_play_result_padded = pad_sequences(y_play_result, padding='post', value=-1)
y_final_score = np.array(y_final_score)

# Add an extra timestep filled with zeros to y_circumstances_padded
extra_timestep = np.zeros((y_circumstances_padded.shape[0], 1, y_circumstances_padded.shape[2]))
y_circumstances_padded = np.concatenate([y_circumstances_padded, extra_timestep], axis=1)

# Print shapes after padding
print(f"Shape of X_padded: {X_padded.shape}")
print(f"Shape of y_play_padded: {y_target_play_padded.shape}")
print(f"Shape of y_circumstances_padded: {y_circumstances_padded.shape}")
print(f"Shape of y_play_result_padded: {y_play_result_padded.shape}")
print(f"Shape of y_final_score: {y_final_score.shape}")

Shape of X_padded: (5308, 225, 14)
Shape of y_play_padded: (5308, 225, 3)
Shape of y_circumstances_padded: (5308, 225, 14)
Shape of y_play_result_padded: (5308, 225, 14)
Shape of y_final_score: (5308, 2)


In [19]:
def split_data(X_padded, y_target_play_padded, y_circumstances_padded, y_play_result_padded, test_size=0.67):
    """
    Split the padded sequences into train and test sets based on the desired ratio.

    Parameters:
    - X_padded: numpy array, 3D tensor of padded sequences (batch, timesteps, features)
    - test_size: float, proportion of the dataset to include in the test split (default: 0.67)

    Returns:
    - df_train: numpy array, padded sequences for training
    - df_test: numpy array, padded sequences for testing
    """

    # Calculate the number of samples for train and test sets
    num_samples_total = X_padded.shape[0]
    num_samples_test = int(num_samples_total * test_size)
    num_samples_train = num_samples_total - num_samples_test

    # Split the data into train and test sets based on the number of samples
    X_train = X_padded[:num_samples_train]
    X_test = X_padded[num_samples_train:]
    y_target_play_train = y_target_play_padded[:num_samples_train]
    y_target_play_test = y_target_play_padded[num_samples_train:]
    y_circumstances_train = y_circumstances_padded[:num_samples_train]
    y_circumstances_test = y_circumstances_padded[num_samples_train:]
    y_play_result_train = y_play_result_padded[:num_samples_train]
    y_play_result_test = y_play_result_padded[num_samples_train:]

    return X_train, X_test, y_target_play_train, y_target_play_test, y_circumstances_train, y_circumstances_test, y_play_result_train, y_play_result_test

def split_target_data(y_final_score, test_size=0.67):
    """
    Split the target final score data into train and test sets based on the desired ratio.

    Parameters:
    - y_final_score: numpy array, target final score data (batch, 2)
    - test_size: float, proportion of the dataset to include in the test split (default: 0.67)

    Returns:
    - y_train: numpy array, target final score data for training
    - y_test: numpy array, target final score data for testing
    """

    # Calculate the number of samples for train and test sets
    num_samples_total = y_final_score.shape[0]
    num_samples_test = int(num_samples_total * test_size)
    num_samples_train = num_samples_total - num_samples_test

    # Split the data into train and test sets based on the number of samples
    y_final_score_train = y_final_score[:num_samples_train]
    y_final_score_test = y_final_score[num_samples_train:]

    return y_final_score_train, y_final_score_test


In [69]:
print(f"Shape of y_result_padded: {y_play_result_padded.shape}")

Shape of y_result_padded: (5308, 225, 14)


In [20]:
X_train, X_test, y_target_play_train, y_target_play_test, y_circumstances_train, y_circumstances_test, y_play_result_train, y_play_result_test = split_data(X_padded, y_target_play_padded, y_circumstances_padded, y_play_result_padded, test_size=0.67)

y_final_score_train, y_final_score_test = split_target_data(y_final_score, test_size=0.67)

print(f"Shape of X_train: {X_train.shape}")
print(f"Shape of X_test: {X_test.shape}")
print(f"Shape of y_play_train: {y_target_play_train.shape}")
print(f"Shape of y_play_test: {y_target_play_test.shape}")
print(f"Shape of y_circumstances_train: {y_circumstances_train.shape}")
print(f"Shape of y_circumstances_test: {y_circumstances_test.shape}")
print(f"Shape of y_result_train: {y_play_result_train.shape}")
print(f"Shape of y_result_test: {y_play_result_test.shape}")
print(f"Shape of y_final_score_train: {y_final_score_train.shape}")
print(f"Shape of y_final_score_test: {y_final_score_test.shape}")

Shape of X_train: (1752, 225, 14)
Shape of X_test: (3556, 225, 14)
Shape of y_play_train: (1752, 225, 3)
Shape of y_play_test: (3556, 225, 3)
Shape of y_circumstances_train: (1752, 225, 14)
Shape of y_circumstances_test: (3556, 225, 14)
Shape of y_result_train: (1752, 225, 14)
Shape of y_result_test: (3556, 225, 14)
Shape of y_final_score_train: (1752, 2)
Shape of y_final_score_test: (3556, 2)


In [21]:
# Custom loss function to incorporate final score prediction
def custom_loss(y_true, y_pred):
    play_loss = K.sparse_categorical_crossentropy(y_true[0], y_pred[0])
    result_loss = K.mean(K.square(y_true[1] - y_pred[1]))
    final_score_loss = K.mean(K.square(y_true[2] - y_pred[2]))
    circumstances_loss = K.mean(K.square(y_true[3] - y_pred[3]))
    return play_loss + result_loss + final_score_loss + circumstances_loss

In [29]:
def custom_loss(y_true, y_pred):
    play_loss = K.mean(K.square(y_true[0] - y_pred[0]))
    result_loss = K.mean(K.square(y_true[1] - y_pred[1]))
    final_score_loss = K.mean(K.square(y_true[2] - y_pred[2]))
    circumstances_loss = K.mean(K.square(y_true[3] - y_pred[3]))
    return play_loss + result_loss + final_score_loss + circumstances_loss


In [30]:
# Input layer
inputs = Input(shape=(X_train.shape[1], X_train.shape[2]))

# Masking layer to handle variable-length sequences
masked_inputs = Masking(mask_value=0.0)(inputs)

# LSTM layers
x = LSTM(128, return_sequences=True)(masked_inputs)
x = Dropout(0.2)(x)
x, state_h, state_c = LSTM(64, return_sequences=True, return_state=True)(x)
x = Dropout(0.2)(x)

# Output layers
play_output = Dense(3, activation='linear', name='play_output')(x)
result_output = Dense(len(play_result), activation='linear', name='result_output')(x)
final_score_output = Dense(len(final_score), activation='linear', name='final_score_output')(state_c)
circumstances_output = Dense(len(circumstances), activation='linear', name='circumstances_output')(x)

# Define the model
model = Model(inputs=inputs, outputs=[play_output, result_output, final_score_output, circumstances_output])
model.compile(optimizer=Adam(), loss=custom_loss, 
              metrics={'play_output': 'mse', 'result_output': 'mse', 'final_score_output': 'mse', 'circumstances_output': 'mse'})


In [95]:
# Define the number of classes for each categorical feature
num_classes_playType = len(np.unique(y_target_play_train[0]))
num_classes_huddle = len(np.unique(y_target_play_train[1]))
num_classes_formation = len(np.unique(y_target_play_train[2]))

print(num_classes_playType)
print(num_classes_huddle)
print(num_classes_formation)

10
10
10


In [31]:
# Training
model.fit(X_train, [y_target_play_train, y_play_result_train, y_final_score_train, y_circumstances_train], 
          epochs=50, batch_size=16, validation_split=0.2)

Epoch 1/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 116ms/step - circumstances_output_mse: 894419.0625 - final_score_output_mse: 312.7032 - loss: 3526947.0000 - play_output_mse: 3.0021 - result_output_mse: 169.1388 - val_circumstances_output_mse: 884540.1875 - val_final_score_output_mse: 114.6602 - val_loss: 3651565.0000 - val_play_output_mse: 2.1411 - val_result_output_mse: 143.5530
Epoch 2/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 112ms/step - circumstances_output_mse: 886986.6875 - final_score_output_mse: 111.4797 - loss: 3398725.7500 - play_output_mse: 2.4117 - result_output_mse: 136.0815 - val_circumstances_output_mse: 880561.2500 - val_final_score_output_mse: 108.0687 - val_loss: 3635293.7500 - val_play_output_mse: 2.1418 - val_result_output_mse: 125.6510
Epoch 3/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 117ms/step - circumstances_output_mse: 872612.3750 - final_score_output_mse: 107.6569 - loss: 3598138.

Epoch 21/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 141ms/step - circumstances_output_mse: 814919.6875 - final_score_output_mse: 101.9662 - loss: 3268576.7500 - play_output_mse: 2.0776 - result_output_mse: 108.5581 - val_circumstances_output_mse: 817635.3750 - val_final_score_output_mse: 130.8243 - val_loss: 3379022.5000 - val_play_output_mse: 2.1438 - val_result_output_mse: 109.5056
Epoch 22/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 144ms/step - circumstances_output_mse: 822206.5000 - final_score_output_mse: 113.6536 - loss: 3123531.2500 - play_output_mse: 2.0686 - result_output_mse: 108.8757 - val_circumstances_output_mse: 814670.0625 - val_final_score_output_mse: 103.0585 - val_loss: 3366814.7500 - val_play_output_mse: 2.1441 - val_result_output_mse: 109.4863
Epoch 23/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 144ms/step - circumstances_output_mse: 819381.1875 - final_score_output_mse: 99.6279 - loss: 341502

Epoch 41/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 144ms/step - circumstances_output_mse: 763543.0625 - final_score_output_mse: 47.9679 - loss: 3059755.5000 - play_output_mse: 2.0752 - result_output_mse: 109.9092 - val_circumstances_output_mse: 761058.3125 - val_final_score_output_mse: 33.4094 - val_loss: 3147720.2500 - val_play_output_mse: 2.1408 - val_result_output_mse: 109.4924
Epoch 42/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 139ms/step - circumstances_output_mse: 778698.8125 - final_score_output_mse: 47.1581 - loss: 3070284.2500 - play_output_mse: 2.0802 - result_output_mse: 110.1428 - val_circumstances_output_mse: 758367.7500 - val_final_score_output_mse: 49.3601 - val_loss: 3136778.5000 - val_play_output_mse: 2.1447 - val_result_output_mse: 109.4854
Epoch 43/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 138ms/step - circumstances_output_mse: 766413.9375 - final_score_output_mse: 35.5893 - loss: 2904042.75

<keras.src.callbacks.history.History at 0x2f680271010>

hi, keep going you'are doing great

In [38]:
# Evaluate the model
results = model.evaluate(X_test, [y_target_play_test, y_play_result_test, y_final_score_test, y_circumstances_test])

# Print the length and content of the results
print(f'Number of values in results: {len(results)}')
print(f'Contents of results: {results}')


# Unpack the results based on the actual number of returned values
if len(results) == 5:
    total_loss, play_mse, result_mse, final_score_mse, circumstances_mse = results

    # Print results
    print(f'Total Loss: {total_loss}')
    print(f'Play MSE: {play_mse}')
    print(f'Result MSE: {result_mse}')
    print(f'Final Score MSE: {final_score_mse}')
    print(f'Circumstances MSE: {circumstances_mse}')
else:
    # Handle cases where the number of returned values is not as expected
    print("Unexpected number of values returned from model evaluation. Check model.compile configuration.")

# Unpack the results
#loss = results[0]
#play_loss = results[1]
#result_loss = results[2]
#final_score_loss = results[3]
#circumstances_loss = results[4]

# Assuming the metrics are returned in the order of the outputs, unpack them accordingly
# Note: Ensure you have defined the metrics correctly in model.compile and match this with the expected outputs
#play_mse = results[5]
#result_mse = results[6]
#final_score_mse = results[7]
#circumstances_mse = results[8]

# Print results
#print(f'Test Play Loss: {play_loss}')
#print(f'Test Result Loss: {result_loss}')
#print(f'Test Final Score Loss: {final_score_loss}')
#print(f'Test Circumstances Loss: {circumstances_loss}')
#print(f'Test Play MSE: {play_mse}')
#print(f'Test Result MSE: {result_mse}')
#print(f'Test Final Score MSE: {final_score_mse}')
#print(f'Test Circumstances MSE: {circumstances_mse}')


[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 53ms/step - circumstances_output_mse: 751135.5000 - final_score_output_mse: 16.2153 - loss: 2995759.2500 - play_output_mse: 2.1829 - result_output_mse: 111.5998
Number of values in results: 5
Contents of results: [3012344.5, 755758.5, 16.08893585205078, 2.2054314613342285, 111.06853485107422]
Total Loss: 3012344.5
Play MSE: 755758.5
Result MSE: 16.08893585205078
Final Score MSE: 2.2054314613342285
Circumstances MSE: 111.06853485107422


In [35]:
# Predict using model.predict
predictions = model.predict(X_test)

[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 53ms/step


In [41]:
print(len(predictions))

4


In [42]:
from sklearn.metrics import mean_squared_error

# Reshape predictions to 2D arrays
predictions_flat = [pred.reshape(-1, pred.shape[-1]) for pred in predictions]

# Compute mean squared error
play_mse = mean_squared_error(y_target_play_test.reshape(-1, y_target_play_test.shape[-1]), predictions_flat[0])
result_mse = mean_squared_error(y_play_result_test.reshape(-1, y_play_result_test.shape[-1]), predictions_flat[1])
final_score_mse = mean_squared_error(y_final_score_test.reshape(-1, y_final_score_test.shape[-1]), predictions_flat[2])
circumstances_mse = mean_squared_error(y_circumstances_test.reshape(-1, y_circumstances_test.shape[-1]), predictions_flat[3])


print(f'Play MSE: {play_mse}')
print(f'Result MSE: {result_mse}')
print(f'Final Score MSE: {final_score_mse}')
print(f'Circumstances MSE: {final_score_mse}')



play_mse = mean_squared_error(y_target_play_test, predictions[0])
result_mse = mean_squared_error(y_play_result_test, predictions[1])
final_score_mse = mean_squared_error(y_final_score_test, predictions[2])
circumstances_mse = mean_squared_error(y_circumstances_test, predictions[3])

print(f'Play MSE: {play_mse}')
print(f'Result MSE: {result_mse}')
print(f'Final Score MSE: {final_score_mse}')
print(f'Circumstances MSE: {circumstances_mse}')

ValueError: Found array with dim 3. None expected <= 2.

In [83]:
# Before model.fit()

# Get the model's predictions
predictions = model.predict(X_train)

# Print shapes of labels and predictions for play_output
print("Shapes of labels and predictions for play_output:")
print("Labels shape:", y_target_play_train.shape)
print("Predictions shape:", predictions[0].shape)  # Assuming play_output is the first output

# Now proceed with model.fit()

[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 54ms/step
Shapes of labels and predictions for play_output:
Labels shape: (1752, 225, 3)
Predictions shape: (1752, 225, 3)


In [84]:
# Before model.fit()

# Print shapes of labels and predictions for result_output
print("Shapes of labels and predictions for result_output:")
print("Labels shape:", y_play_result_train.shape)
print("Predictions shape:", predictions[1].shape)  # Assuming result_output is the second output

# Print shapes of labels and predictions for final_score_output
print("Shapes of labels and predictions for final_score_output:")
print("Labels shape:", y_final_score_train.shape)
print("Predictions shape:", predictions[2].shape)  # Assuming final_score_output is the third output

# Print shapes of labels and predictions for circumstances_output
print("Shapes of labels and predictions for circumstances_output:")
print("Labels shape:", y_circumstances_train.shape)
print("Predictions shape:", predictions[3].shape)  # Assuming circumstances_output is the fourth output

# Now proceed with model.fit()


Shapes of labels and predictions for result_output:
Labels shape: (1752, 225, 14)
Predictions shape: (1752, 225, 14)
Shapes of labels and predictions for final_score_output:
Labels shape: (1752, 2)
Predictions shape: (1752, 2)
Shapes of labels and predictions for circumstances_output:
Labels shape: (1752, 225, 13)
Predictions shape: (1752, 225, 13)
