In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.utils.class_weight import compute_sample_weight

In [2]:
# Load the dataset
data = pd.read_csv('Football 49 Data_Season 2023.csv')

In [3]:
# Preprocessing: Encoding categorical variables
categorical_columns = data.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

In [4]:
# Creating lagged features for previous plays (5 lags)
num_lags = 5
for lag in range(1, num_lags + 1):
    data[f'QB_Comment_lag_{lag}'] = data['QB Comment'].shift(lag)

In [5]:
# Dropping rows with NaN values created due to lagging
data.dropna(inplace=True)

In [6]:
# Extracting features and target variable
features_seq = data.drop(columns=['QB Comment'])
target_seq = data['QB Comment']

In [7]:
# Stratified Splitting the dataset into training, validation, and test sets
X_train, X_remaining, y_train, y_remaining = train_test_split(
    features_seq, target_seq,
    test_size=0.4,
    random_state=42,
    stratify=target_seq
)
X_validation, X_test, y_validation, y_test = train_test_split(
    X_remaining, y_remaining,
    test_size=0.5,
    random_state=42,
    stratify=y_remaining
)

In [8]:
# Selecting relevant features based on feature importance and domain knowledge
relevant_features = [
    'QB_Comment_lag_1', 'Down', 'Distance', 'Field Position', 'Series', 'Play',
    'Play Number', 'QB_Comment_lag_2', 'Formation', 'The_Play',
    'Run Concept', 'QB_Comment_lag_4', 'R/P', 'Pass Result',
    'Gain', 'QB_Comment_lag_3', 'Motion', 'QB_Comment_lag_5'
]

In [9]:
# Reducing the dataset to the selected features
X_train_reduced = X_train[relevant_features]
X_validation_reduced = X_validation[relevant_features]
X_test_reduced = X_test[relevant_features]

In [10]:
# Manually setting a grid of parameters for Grid Search
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.05, 0.1, 0.2],
    'max_depth': [3, 4, 5]
}

In [11]:
# Initialize the GridSearchCV object
grid_search = GridSearchCV(
    GradientBoostingClassifier(random_state=42),
    param_grid,
    cv=5,
    scoring='accuracy',
    verbose=1,
    n_jobs=-1
)

In [13]:
# Adjusting class weights for the Gradient Boosting classifier
class_weights = compute_sample_weight(class_weight='balanced', y=y_train)

In [14]:
# Fit the grid search to the data
grid_search.fit(X_train_reduced, y_train, sample_weight=class_weights)

Fitting 5 folds for each of 27 candidates, totalling 135 fits


In [15]:
# Best parameters and best score
print("Best parameters found: ", grid_search.best_params_)
print("Best accuracy found: ", grid_search.best_score_)

Best parameters found:  {'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 100}
Best accuracy found:  0.8302521008403362


In [16]:
# Using the best estimator found by GridSearchCV
gb_classifier_best = grid_search.best_estimator_

In [30]:
# Evaluate the model with best parameters on the training set
y_pred_train_best = gb_classifier_best.predict(X_train_reduced)
accuracy_train_best = accuracy_score(y_train, y_pred_train_best)
classification_rep_train_best = classification_report(y_train, y_pred_train_best)
print("Training Set Accuracy (Best Model):", accuracy_train_best)
print("Training Set Classification Report (Best Model):\n", classification_rep_train_best)

Training Set Accuracy (Best Model): 1.0
Training Set Classification Report (Best Model):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        47
           1       1.00      1.00      1.00        11
           2       1.00      1.00      1.00         8
           3       1.00      1.00      1.00        54
           4       1.00      1.00      1.00        16
           5       1.00      1.00      1.00        22
           6       1.00      1.00      1.00        25
           7       1.00      1.00      1.00       178
           8       1.00      1.00      1.00        63

    accuracy                           1.00       424
   macro avg       1.00      1.00      1.00       424
weighted avg       1.00      1.00      1.00       424



In [17]:
# Evaluate the model with best parameters on the validation set
y_pred_validation_best = gb_classifier_best.predict(X_validation_reduced)
accuracy_validation_best = accuracy_score(y_validation, y_pred_validation_best)
classification_rep_validation_best = classification_report(y_validation, y_pred_validation_best)
print("Validation Set Accuracy (Best Model):", accuracy_validation_best)
print("Validation Set Classification Report (Best Model):\n", classification_rep_validation_best)

Validation Set Accuracy (Best Model): 0.851063829787234
Validation Set Classification Report (Best Model):
               precision    recall  f1-score   support

           0       0.80      0.80      0.80        15
           1       1.00      1.00      1.00         4
           2       1.00      1.00      1.00         3
           3       0.88      0.83      0.86        18
           4       0.75      0.60      0.67         5
           5       1.00      0.75      0.86         8
           6       0.80      1.00      0.89         8
           7       0.85      0.86      0.86        59
           8       0.82      0.86      0.84        21

    accuracy                           0.85       141
   macro avg       0.88      0.86      0.86       141
weighted avg       0.85      0.85      0.85       141



In [18]:
# Evaluate the best model on the test set
y_pred_test_best = gb_classifier_best.predict(X_test_reduced)
accuracy_test_best = accuracy_score(y_test, y_pred_test_best)
classification_rep_test_best = classification_report(y_test, y_pred_test_best)
print("Test Set Accuracy (Best Model):", accuracy_test_best)
print("Test Set Classification Report (Best Model):\n", classification_rep_test_best)

Test Set Accuracy (Best Model): 0.8098591549295775
Test Set Classification Report (Best Model):
               precision    recall  f1-score   support

           0       0.93      0.81      0.87        16
           1       1.00      0.75      0.86         4
           2       0.33      0.33      0.33         3
           3       0.79      0.83      0.81        18
           4       0.75      0.60      0.67         5
           5       0.80      0.57      0.67         7
           6       0.67      0.75      0.71         8
           7       0.83      0.90      0.86        60
           8       0.80      0.76      0.78        21

    accuracy                           0.81       142
   macro avg       0.77      0.70      0.73       142
weighted avg       0.81      0.81      0.81       142



In [19]:
# Load and preprocess the holdout dataset
holdout_data = pd.read_csv('Hold-out Data.csv')

In [20]:
# Apply the same preprocessing to the holdout data
for col in categorical_columns:
    if col in holdout_data.columns:
        # Using 'apply' to safely encode labels, setting unseen labels to NaN
        holdout_data[col] = holdout_data[col].apply(lambda x: label_encoders[col].transform([x])[0] if x in label_encoders[col].classes_ else np.nan)

In [21]:
for lag in range(1, num_lags + 1):
    holdout_data[f'QB_Comment_lag_{lag}'] = holdout_data['QB Comment'].shift(lag)

In [22]:
holdout_data.dropna(inplace=True)

In [23]:
# Selecting the same relevant features for the holdout data
X_holdout_reduced = holdout_data[relevant_features]
y_holdout = holdout_data['QB Comment']

In [24]:
# Evaluate the best model on the holdout dataset
y_pred_holdout_best = gb_classifier_best.predict(X_holdout_reduced)
accuracy_holdout_best = accuracy_score(y_holdout, y_pred_holdout_best)
classification_rep_holdout_best = classification_report(y_holdout, y_pred_holdout_best)
print("Holdout Dataset Accuracy (Best Model):", accuracy_holdout_best)
print("Holdout Dataset Classification Report (Best Model):\n", classification_rep_holdout_best)

Holdout Dataset Accuracy (Best Model): 0.5714285714285714
Holdout Dataset Classification Report (Best Model):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           3       1.00      1.00      1.00         1
           4       0.00      0.00      0.00         0
           5       1.00      1.00      1.00         1
           7       0.00      0.00      0.00         1
           8       0.50      0.33      0.40         3

    accuracy                           0.57         7
   macro avg       0.58      0.56      0.57         7
weighted avg       0.64      0.57      0.60         7



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [25]:
def predict_next_play(sample, model, label_encoders):
    # Prepare the data (ensure it has the same format as the training data)
    prepared_sample = sample[relevant_features].to_numpy().reshape(1, -1)

    # Predict the probabilities for each possible outcome
    probabilities = model.predict_proba(prepared_sample)[0]
    outcome_labels = label_encoders['QB Comment'].classes_

    # Mapping the probabilities to the corresponding outcomes
    prob_dict = dict(zip(outcome_labels, probabilities))

    # Sorting the probabilities in descending order
    sorted_prob_dict = dict(sorted(prob_dict.items(), key=lambda item: item[1], reverse=True))

    return sorted_prob_dict

In [26]:
# Example usage of the prediction function with the holdout data
# Select a single sample (row) from the holdout dataset
example_data = X_holdout_reduced.iloc[6]  # Adjust the index if you want to test a different sample

In [28]:
# Predicting probabilities for the next play
predicted_probabilities = predict_next_play(example_data, gb_classifier_best, label_encoders)



In [29]:
print("Predicted Probabilities for Each Possible Outcome of the Next Play:")
for outcome, prob in predicted_probabilities.items():
    print(f"  {outcome}: {prob:.4f}")

Predicted Probabilities for Each Possible Outcome of the Next Play:
  TOUCHDOWN: 0.9036
  PUNT: 0.0875
  FUMBLE: 0.0087
  FIELD GOAL: 0.0001
  MISSED FG: 0.0000
  DOWNS: 0.0000
  INTERCEPTION: 0.0000
  END OF HALF: 0.0000
  END OF GAME: 0.0000
