<a href="https://colab.research.google.com/github/SantiagoAlvarezb/Thesis/blob/main/Simulations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!apt-get update && apt-get in/tall ffmpeg freeglut3-dev xvfb
!pip install "stable-baselines3[extra]>=2.0.0a4"
!pip install setuptools==66
!pip install stable-baselines3[extra]

In [None]:
#For visualization and data manipulation
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
import pandas as pd
from matplotlib.animation import FuncAnimation
from IPython import display
import os
import random
import tensorflow as tf
import tensorboard
import torch.optim as optim
from torch import nn

# Reinforcement Learning
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
import heapq

# Import Data

The data cleaning process was done in the previous notebook. Therefore we only need to import the cleaned datasets.

In [None]:
#Define the path for each excel document that contains data
tracking_2018_path  = '/content/drive/MyDrive/Enhancing Performance in Special Teams within the NFL through Reinforcement Learning: A Data-Driven Approach/clean-data-nfl-big-data-bowl-2020/tracking2018.csv'
tracking_2019_path  = '/content/drive/MyDrive/Enhancing Performance in Special Teams within the NFL through Reinforcement Learning: A Data-Driven Approach/clean-data-nfl-big-data-bowl-2020/tracking2019.csv'
tracking_2020_path  = '/content/drive/MyDrive/Enhancing Performance in Special Teams within the NFL through Reinforcement Learning: A Data-Driven Approach/clean-data-nfl-big-data-bowl-2020/tracking2020.csv'

In [None]:
#Import tracking datasets
tracking_2018 = pd.read_csv(tracking_2018_path)
tracking_2019 = pd.read_csv(tracking_2019_path)
tracking_2020 = pd.read_csv(tracking_2020_path)

Furthermore, we extract the pair of `gameId` and `playId` in order to run simulations

In [None]:
pairs_2018 = list(set(zip(tracking_2018['gameId'], tracking_2018['playId'])))
pairs_2019 = list(set(zip(tracking_2019['gameId'], tracking_2019['playId'])))
pairs_2020 = list(set(zip(tracking_2020['gameId'], tracking_2020['playId'])))
pairs = pairs_2018 + pairs_2019 + pairs_2020

# Field

We use the dimensions given in the NFL 2022 Big Data Bowl to make the visualization as accurate as possible.

In [None]:
def generate_nfl_field():
  #Functionality: generates nfl pitch with accurate dimensions

  #General figure settings
  fig=plt.figure()
  ax=fig.add_subplot(1,1,1)
  fig.set_size_inches(18.5, 10.5)

  #Pitch outline with respect to dimensions given in Kaggle
  plt.plot([0,0],[0,53.3], color="#50AD80",linewidth=2)
  plt.plot([0,120],[53.3,53.3], color="#50AD80",linewidth=2)
  plt.plot([120,120],[53.3,0], color="#50AD80",linewidth=2)
  plt.plot([120,0],[0,0], color="#50AD80",linewidth=2)

  #Vertical yard lines
  for yard in [10,20,30,40,50,60,70,80,90,100,110]:
      plt.vlines(x=yard, ymin=0, ymax=53.3, color='#202924',linewidth=1)

  #Vertical yard line numbers
  for x in range(20,110,10):
      num=x
      if x>50:
          num = 120-x
      ax.text(x,5,str(num-10), horizontalalignment='center', fontsize=20, color='white', fontdict={'family': 'serif'})
      ax.text(x-0.35,48.3,str(num-10),horizontalalignment='center',fontsize=20,color='white', rotation = 180, fontdict={'family': 'serif'})

  #Sideline and middle hashmarks
  for x in range(11,110):
      ax.plot([x,x],[0,0.7], color='#50AD80',linewidth=1)
      ax.plot([x,x],[53.3,52.6], color='#50AD80',linewidth=1)
      ax.plot([x,x],[23.3,24.3], color='#50AD80',linewidth=1)
      ax.plot([x,x],[29,30], color='#50AD80',linewidth=1)
  ax.plot([12,12],[26.3,27], color='white',linewidth=1)
  ax.plot([108,108],[26.3,27], color='white',linewidth=1)


  #Pitch color shading the pitch
  ax.axvspan(xmin=10, xmax=20, ymin=0.049, ymax=0.955, alpha=0.9, color='#2c3530')
  ax.axvspan(xmin=30, xmax=40, ymin=0.049, ymax=0.955, alpha=0.9, color='#2c3530')
  ax.axvspan(xmin=50, xmax=60, ymin=0.049, ymax=0.955, alpha=0.9, color='#2c3530')
  ax.axvspan(xmin=70, xmax=80, ymin=0.049, ymax=0.955, alpha=0.9, color='#2c3530')
  ax.axvspan(xmin=90, xmax=100, ymin=0.049, ymax=0.955, alpha=0.9, color='#2c3530')
  ax.axvspan(xmin=110, xmax=120, ymin=0.049, ymax=0.955, color='#252e29')

  ax.axvspan(xmin=0, xmax=10, ymin=0.049, ymax=0.955, color='#252e29')
  ax.axvspan(xmin=20, xmax=30, ymin=0.049, ymax=0.955, alpha=0.9, color='#252e29')
  ax.axvspan(xmin=40, xmax=50, ymin=0.049, ymax=0.955, alpha=0.9, color='#252e29')
  ax.axvspan(xmin=60, xmax=70, ymin=0.049, ymax=0.955, alpha=0.9, color='#252e29')
  ax.axvspan(xmin=80, xmax=90, ymin=0.049, ymax=0.955, alpha=0.9, color='#252e29')
  ax.axvspan(xmin=100, xmax=110, ymin=0.049, ymax=0.955, alpha=0.9, color='#252e29')

  plt.axis('off')
  return fig, ax

# Simulation functions

We create two seperate functions, one for each agent, that have the same functionality of simulating the play.

In [None]:
def simulation_returner(season, game_id, play_id, frame):
  #Functionality: plots players and football position per frame on the pitch created previosuly

  wanted_plays = ["kick_received", "punt_received"]

  #Obtains information with respects to 2018 season
  if season == 2018:
    home_team = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["team"]=="home")]
    start_frame = home_team.loc[home_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
    home_team = home_team[home_team["frameId"] >= start_frame]
    home_team_line = home_team.copy()

    frame_list = []
    for i in range((home_team["frameId"].max()+1) - home_team["frameId"].min()):
      frame_list.append(i)

    frame_list = frame_list * 11

    home_team["frameId"] = frame_list
    home_team_line = home_team.copy()
    home_team = home_team[home_team["frameId"] == frame]

    away_team = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["team"]=="away")]
    start_frame = away_team.loc[away_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
    away_team = away_team[away_team["frameId"] >= start_frame]

    frame_list = []
    for i in range((away_team["frameId"].max()+1) - away_team["frameId"].min()):
      frame_list.append(i)

    frame_list = frame_list * 11

    away_team["frameId"] = frame_list
    away_team_line = away_team.copy()
    away_team = away_team[away_team["frameId"] == frame]

    ball = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["team"]=="football")]
    start_frame = ball.loc[ball['event'].isin(wanted_plays)]["frameId"].unique()[0]
    ball = ball[ball["frameId"] >= start_frame]

    frame_list = []
    for i in range((ball["frameId"].max()+1) - ball["frameId"].min()):
      frame_list.append(i)

    ball["frameId"] = frame_list
    ball = ball[ball["frameId"] == frame]

    df = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["frameId"]==start_frame)]
    df.reset_index(inplace=True)

    football_x = df.iloc[-1]["x"]
    football_y = df.iloc[-1]["y"]

    ret = df.loc[(df['x'] > football_x-3) & (df['x'] <football_x+3) & (df['y'] > football_y-1.5) & (df['y'] <football_y+1.5)]
    ret_id = ret.iloc[0]["nflId"]

    if ret_id in home_team["nflId"].values:
      returner = home_team[home_team["nflId"] == ret_id]
      returner_line = home_team_line[home_team_line["nflId"] == ret_id]
    elif ret_id in away_team["nflId"].values:
      returner = away_team[away_team["nflId"] == ret_id]
      returner_line = away_team_line[away_team_line["nflId"] == ret_id]
    else:
      print("Something went wrong")

  #Obtains information with respects to 2019 season
  elif season == 2019:
    home_team = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["team"]=="home")]
    start_frame = home_team.loc[home_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
    home_team = home_team[home_team["frameId"] >= start_frame]

    frame_list = []
    for i in range((home_team["frameId"].max()+1) - home_team["frameId"].min()):
      frame_list.append(i)

    frame_list = frame_list * 11

    home_team["frameId"] = frame_list
    home_team_line = home_team.copy()
    home_team = home_team[home_team["frameId"] == frame]

    away_team = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["team"]=="away")]
    start_frame = away_team.loc[away_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
    away_team = away_team[away_team["frameId"] >= start_frame]

    frame_list = []
    for i in range((away_team["frameId"].max()+1) - away_team["frameId"].min()):
      frame_list.append(i)

    frame_list = frame_list * 11

    away_team["frameId"] = frame_list
    away_team_line = away_team.copy()
    away_team = away_team[away_team["frameId"] == frame]

    ball = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["team"]=="football")]
    start_frame = ball.loc[ball['event'].isin(wanted_plays)]["frameId"].unique()[0]
    ball = ball[ball["frameId"] >= start_frame]

    frame_list = []
    for i in range((ball["frameId"].max()+1) - ball["frameId"].min()):
      frame_list.append(i)

    ball["frameId"] = frame_list
    ball = ball[ball["frameId"] == frame]

    df = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["frameId"]==start_frame)]
    df.reset_index(inplace=True)

    football_x = df.iloc[-1]["x"]
    football_y = df.iloc[-1]["y"]

    ret = df.loc[(df['x'] > football_x-3) & (df['x'] <football_x+3) & (df['y'] > football_y-1.5) & (df['y'] <football_y+1.5)]
    ret_id = ret.iloc[0]["nflId"]

    if ret_id in home_team["nflId"].values:
      returner = home_team[home_team["nflId"] == ret_id]
      returner_line = home_team_line[home_team_line["nflId"] == ret_id]
    elif ret_id in away_team["nflId"].values:
      returner = away_team[away_team["nflId"] == ret_id]
      returner_line = away_team_line[away_team_line["nflId"] == ret_id]
    else:
      print("Something went wrong")

  #Obtains information with respects to 2020 season
  elif season == 2020:
    home_team = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["team"]=="home")]
    start_frame = home_team.loc[home_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
    home_team = home_team[home_team["frameId"] >= start_frame]

    frame_list = []
    for i in range((home_team["frameId"].max()+1) - home_team["frameId"].min()):
      frame_list.append(i)

    frame_list = frame_list * 11

    home_team["frameId"] = frame_list
    home_team_line = home_team.copy()
    home_team = home_team[home_team["frameId"] == frame]

    away_team = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["team"]=="away")]
    start_frame = away_team.loc[away_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
    away_team = away_team[away_team["frameId"] >= start_frame]

    frame_list = []
    for i in range((away_team["frameId"].max()+1) - away_team["frameId"].min()):
      frame_list.append(i)

    frame_list = frame_list * 11

    away_team["frameId"] = frame_list
    away_team_line = away_team.copy()
    away_team = away_team[away_team["frameId"] == frame]

    ball = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["team"]=="football")]
    start_frame = ball.loc[ball['event'].isin(wanted_plays)]["frameId"].unique()[0]
    ball = ball[ball["frameId"] >= start_frame]

    frame_list = []
    for i in range((ball["frameId"].max()+1) - ball["frameId"].min()):
      frame_list.append(i)

    ball["frameId"] = frame_list
    ball = ball[ball["frameId"] == frame]

    df = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["frameId"]==start_frame)]
    df.reset_index(inplace=True)

    football_x = df.iloc[-1]["x"]
    football_y = df.iloc[-1]["y"]

    ret = df.loc[(df['x'] > football_x-3) & (df['x'] <football_x+3) & (df['y'] > football_y-1.5) & (df['y'] <football_y+1.5)]
    ret_id = ret.iloc[0]["nflId"]

    if ret_id in home_team["nflId"].values:
      returner = home_team[home_team["nflId"] == ret_id]
      returner_line = home_team_line[home_team_line["nflId"] == ret_id]
    elif ret_id in away_team["nflId"].values:
      returner = away_team[away_team["nflId"] == ret_id]
      returner_line = away_team_line[away_team_line["nflId"] == ret_id]
    else:
      print("Something went wrong")

  #If season value doesnt match, error is raised
  else:
    raise ValueError("Season must have values: 2018, 2019 or 2020")

  #Data found in the previous code lines is plotted onto the pitch
  home_players.set_data(home_team['x'], home_team['y'])
  away_players.set_data(away_team['x'], away_team['y'])
  poi.set_data(returner['x'], returner['y'])
  football.set_data(ball['x'], ball['y'])
  player_line.set_data(returner_line.iloc[:,1], returner_line.iloc[:,2])

In [None]:
def simulation_pursuer(season, game_id, play_id, frame, player_num):
  #Functionality: plots players and football position per frame on the pitch created previosuly

  wanted_plays = ["kick_received", "punt_received"]

  #Obtains information with respects to 2018 season
  if season == 2018:
    home_team = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["team"]=="home")]
    start_frame = home_team.loc[home_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
    home_team = home_team[home_team["frameId"] >= start_frame]
    home_team_line = home_team.copy()

    frame_list = []
    for i in range((home_team["frameId"].max()+1) - home_team["frameId"].min()):
      frame_list.append(i)

    frame_list = frame_list * 11

    home_team["frameId"] = frame_list
    home_team_line = home_team.copy()
    home_team = home_team[home_team["frameId"] == frame]

    away_team = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["team"]=="away")]
    start_frame = away_team.loc[away_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
    away_team = away_team[away_team["frameId"] >= start_frame]

    frame_list = []
    for i in range((away_team["frameId"].max()+1) - away_team["frameId"].min()):
      frame_list.append(i)

    frame_list = frame_list * 11

    away_team["frameId"] = frame_list
    away_team_line = away_team.copy()
    away_team = away_team[away_team["frameId"] == frame]

    ball = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["team"]=="football")]
    start_frame = ball.loc[ball['event'].isin(wanted_plays)]["frameId"].unique()[0]
    ball = ball[ball["frameId"] >= start_frame]

    frame_list = []
    for i in range((ball["frameId"].max()+1) - ball["frameId"].min()):
      frame_list.append(i)

    ball["frameId"] = frame_list
    ball = ball[ball["frameId"] == frame]

    df = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["frameId"]==start_frame)]
    df.reset_index(inplace=True)

    football_x = df.iloc[-1]["x"]
    football_y = df.iloc[-1]["y"]

    ret = df.loc[(df['x'] > football_x-3) & (df['x'] <football_x+3) & (df['y'] > football_y-1.5) & (df['y'] <football_y+1.5)]
    ret_id = ret.iloc[0]["nflId"]

    if ret_id in home_team["nflId"].values:
      # We choose a random player to be our "kicker"
      chaser = away_team[away_team["nflId"] == away_team["nflId"].iloc[player_num]]
      chaser_line = away_team_line[away_team_line["nflId"] == away_team["nflId"].iloc[player_num]]
    elif ret_id in away_team["nflId"].values:
      # We choose a random player to be our "kicker"
      chaser = home_team[home_team["nflId"] == home_team["nflId"].iloc[player_num]]
      chaser_line = home_team_line[home_team_line["nflId"] == home_team["nflId"].iloc[player_num]]
    else:
      print("Something went wrong")

  #Obtains information with respects to 2019 season
  elif season == 2019:
    home_team = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["team"]=="home")]
    start_frame = home_team.loc[home_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
    home_team = home_team[home_team["frameId"] >= start_frame]

    frame_list = []
    for i in range((home_team["frameId"].max()+1) - home_team["frameId"].min()):
      frame_list.append(i)

    frame_list = frame_list * 11

    home_team["frameId"] = frame_list
    home_team_line = home_team.copy()
    home_team = home_team[home_team["frameId"] == frame]

    away_team = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["team"]=="away")]
    start_frame = away_team.loc[away_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
    away_team = away_team[away_team["frameId"] >= start_frame]

    frame_list = []
    for i in range((away_team["frameId"].max()+1) - away_team["frameId"].min()):
      frame_list.append(i)

    frame_list = frame_list * 11

    away_team["frameId"] = frame_list
    away_team_line = away_team.copy()
    away_team = away_team[away_team["frameId"] == frame]

    ball = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["team"]=="football")]
    start_frame = ball.loc[ball['event'].isin(wanted_plays)]["frameId"].unique()[0]
    ball = ball[ball["frameId"] >= start_frame]

    frame_list = []
    for i in range((ball["frameId"].max()+1) - ball["frameId"].min()):
      frame_list.append(i)

    ball["frameId"] = frame_list
    ball = ball[ball["frameId"] == frame]

    df = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["frameId"]==start_frame)]
    df.reset_index(inplace=True)

    football_x = df.iloc[-1]["x"]
    football_y = df.iloc[-1]["y"]

    ret = df.loc[(df['x'] > football_x-3) & (df['x'] <football_x+3) & (df['y'] > football_y-1.5) & (df['y'] <football_y+1.5)]
    ret_id = ret.iloc[0]["nflId"]

    if ret_id in home_team["nflId"].values:
      # We choose a random player to be our "kicker"
      chaser = away_team[away_team["nflId"] == away_team["nflId"].iloc[player_num]]
      chaser_line = away_team_line[away_team_line["nflId"] == away_team["nflId"].iloc[player_num]]
    elif ret_id in away_team["nflId"].values:
      # We choose a random player to be our "kicker"
      chaser = home_team[home_team["nflId"] == home_team["nflId"].iloc[player_num]]
      chaser_line = home_team_line[home_team_line["nflId"] == home_team["nflId"].iloc[player_num]]
    else:
      print("Something went wrong")

  #Obtains information with respects to 2020 season
  elif season == 2020:
    home_team = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["team"]=="home")]
    start_frame = home_team.loc[home_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
    home_team = home_team[home_team["frameId"] >= start_frame]

    frame_list = []
    for i in range((home_team["frameId"].max()+1) - home_team["frameId"].min()):
      frame_list.append(i)

    frame_list = frame_list * 11

    home_team["frameId"] = frame_list
    home_team_line = home_team.copy()
    home_team = home_team[home_team["frameId"] == frame]

    away_team = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["team"]=="away")]
    start_frame = away_team.loc[away_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
    away_team = away_team[away_team["frameId"] >= start_frame]

    frame_list = []
    for i in range((away_team["frameId"].max()+1) - away_team["frameId"].min()):
      frame_list.append(i)

    frame_list = frame_list * 11

    away_team["frameId"] = frame_list
    away_team_line = away_team.copy()
    away_team = away_team[away_team["frameId"] == frame]

    ball = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["team"]=="football")]
    start_frame = ball.loc[ball['event'].isin(wanted_plays)]["frameId"].unique()[0]
    ball = ball[ball["frameId"] >= start_frame]

    frame_list = []
    for i in range((ball["frameId"].max()+1) - ball["frameId"].min()):
      frame_list.append(i)

    ball["frameId"] = frame_list
    ball = ball[ball["frameId"] == frame]

    df = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["frameId"]==start_frame)]
    df.reset_index(inplace=True)

    football_x = df.iloc[-1]["x"]
    football_y = df.iloc[-1]["y"]

    ret = df.loc[(df['x'] > football_x-3) & (df['x'] <football_x+3) & (df['y'] > football_y-1.5) & (df['y'] <football_y+1.5)]
    ret_id = ret.iloc[0]["nflId"]

    if ret_id in home_team["nflId"].values:
      # We choose a random player to be our "kicker"
      chaser = away_team[away_team["nflId"] == away_team["nflId"].iloc[player_num]]
      chaser_line = away_team_line[away_team_line["nflId"] == away_team["nflId"].iloc[player_num]]
    elif ret_id in away_team["nflId"].values:
      # We choose a random player to be our "kicker"
      chaser = home_team[home_team["nflId"] == home_team["nflId"].iloc[player_num]]
      chaser_line = home_team_line[home_team_line["nflId"] == home_team["nflId"].iloc[player_num]]
    else:
      print("Something went wrong")

  #If season value doesnt match, error is raised
  else:
    raise ValueError("Season must have values: 2018, 2019 or 2020")

  #Data found in the previous code lines is plotted onto the pitch
  home_players.set_data(home_team['x'], home_team['y'])
  away_players.set_data(away_team['x'], away_team['y'])
  poi.set_data(chaser['x'], chaser['y'])
  football.set_data(ball['x'], ball['y'])
  player_line.set_data(chaser_line.iloc[:,1], chaser_line.iloc[:,2])

Furthermore we obtian the total number of frames from the moment the ball is fielded.

In [None]:
def total_frames(season, game_id, play_id):
  #Functionality: obtains the total number of frames in a specific play once the ball is caught by the player

  wanted_plays = ["kick_received", "punt_received"]

  if season == 2018:
    frame_df = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id)]
    start_frame = frame_df.loc[frame_df['event'].isin(wanted_plays)]["frameId"].unique()[0]
    frame_col = frame_df["frameId"]
    return (frame_col.max() - start_frame) + 1

  elif season == 2019:
    frame_df = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id)]
    start_frame = frame_df.loc[frame_df['event'].isin(wanted_plays)]["frameId"].unique()[0]
    frame_col = frame_df["frameId"]
    return (frame_col.max() - start_frame) + 1

  elif season == 2020:
    frame_df = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id)]
    start_frame = frame_df.loc[frame_df['event'].isin(wanted_plays)]["frameId"].unique()[0]
    frame_col = frame_df["frameId"]
    return (frame_col.max() - start_frame) + 1

  else:
    raise ValueError("Season must have values: 2018, 2019 or 2020")

# Simulations

Choose a random play

In [None]:
random_pair = random.choice(pairs)
random_game = random_pair[0]
random_play = random_pair[1]

if random_game in tracking_2018["gameId"].unique():
  random_season = 2018
elif random_game in tracking_2019["gameId"].unique():
  random_season = 2019
elif random_game in tracking_2020["gameId"].unique():
  random_season = 2020

print("This is game:", random_game)
print("This is play:", random_play)
print("This is season:", random_season)

Visualize the actual simulations

In [None]:
# Simulation for the Returner

season = random_season
game_id = random_game
play_id = random_play

#Set NFL pitch
fig, ax = generate_nfl_field()

#Set color of plot points to differentiate between home team, away team and football
marker_kwargs = {'marker': 'o', 'markeredgecolor': 'black', 'linestyle': 'None'}
home_players, = ax.plot([], [], ms=12, markerfacecolor='#E01E38',**marker_kwargs)
away_players, = ax.plot([], [], ms=12, markerfacecolor='#EC4B27',**marker_kwargs)
poi, = ax.plot([], [], ms=12, markerfacecolor='#00b4d8',**marker_kwargs)
football, = ax.plot([], [], ms=10, markerfacecolor='#F7A41A',**marker_kwargs)
player_line, = ax.plot([], [], color='#48cae4')

#Update formula used in animation
def visualize_play(frame):
  simulation_returner(season, game_id, play_id,frame)

#With positional data of players and football for every frame, we animate
animation = FuncAnimation(fig, visualize_play, frames=total_frames(season, game_id, play_id), interval=100) #make the 94 automize
animation.save("/content/drive/MyDrive/Enhancing Performance in Special Teams within the NFL through Reinforcement Learning: A Data-Driven Approach/actual-simulation/Real_Animation_Returner_5.mp4")

video =animation.to_html5_video()
html = display.HTML(video)
display.display(html)
plt.close()

In [None]:
#Simulation for the Pursuer

season = random_season
game_id = random_game
play_id = random_play

#Set NFL pitch
fig, ax = generate_nfl_field()

#set color of plot points to differentiate between home team, away team and football
marker_kwargs = {'marker': 'o', 'markeredgecolor': 'black', 'linestyle': 'None'}
home_players, = ax.plot([], [], ms=12, markerfacecolor='#E01E38',**marker_kwargs)
away_players, = ax.plot([], [], ms=12, markerfacecolor='#EC4B27',**marker_kwargs)
poi, = ax.plot([], [], ms=12, markerfacecolor='#00b4d8',**marker_kwargs)
football, = ax.plot([], [], ms=10, markerfacecolor='#F7A41A',**marker_kwargs)
player_line, = ax.plot([], [], color='#48cae4')


#Update formula used in animation
def visualize_play(frame):
  simulation_pursuer(season, game_id, play_id,frame,5)

#With positional data of players and football for every frame, we animate
animation = FuncAnimation(fig, visualize_play, frames=total_frames(season, game_id, play_id), interval=100) #make the 94 automize
animation.save("/content/drive/MyDrive/Enhancing Performance in Special Teams within the NFL through Reinforcement Learning: A Data-Driven Approach/actual-simulation/Real_Animation_Chaser_5.mp4")

video =animation.to_html5_video()
html = display.HTML(video)
display.display(html)
plt.close()

# Reinforcement Learning

We create custom environments using tools derived from `Gym`, an open source library for developing and comparing reinforcement learning algorithms, one for each agent.


## Dataframe functions

### Agent 1: Returner

In [None]:
# Output: initial position of returner, home and away players, direction, time

def initial_positions_returner(season, game_id, play_id):

  # Home, Away and Returner Data ---------------------------------------------
    wanted_plays = ["kick_received", "punt_received"]

    #Obtains information for 2018 season
    if season == 2018:
      home_team = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["team"]=="home")]
      start_frame = home_team.loc[home_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      home_team = home_team[home_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((home_team["frameId"].max()+1) - home_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      home_team["frameId"] = frame_list
      home_team = home_team[home_team["frameId"] == 0]

      away_team = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["team"]=="away")]
      start_frame = away_team.loc[away_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      away_team = away_team[away_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((away_team["frameId"].max()+1) - away_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      away_team["frameId"] = frame_list
      away_team = away_team[away_team["frameId"] == 0]

      df = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["frameId"]==start_frame)]
      df.reset_index(inplace=True)

      football_x = df.iloc[-1]["x"]
      football_y = df.iloc[-1]["y"]

      ret = df.loc[(df['x'] > football_x-3) & (df['x'] <football_x+3) & (df['y'] > football_y-1.5) & (df['y'] <football_y+1.5)]
      ret_id = ret.iloc[0]["nflId"]

      if ret_id in home_team["nflId"].values:
        returner = home_team[home_team["nflId"] == ret_id]
        home_team.drop(home_team[home_team["nflId"] == ret_id].index, inplace = True)
      elif ret_id in away_team["nflId"].values:
        returner = away_team[away_team["nflId"] == ret_id]
        away_team.drop(away_team[away_team["nflId"] == ret_id].index, inplace = True)
      else:
        print("Something went wrong")

      returner_initial_position = returner[returner["frameId"] == 0]
      home_team_initial_position = home_team[home_team["frameId"] == 0]
      away_team_initial_position  = away_team[away_team["frameId"] == 0]

      frame_df = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id)]
      start_frame = frame_df.loc[frame_df['event'].isin(wanted_plays)]["frameId"].unique()[0]
      frame_col = frame_df["frameId"]
      time = (frame_col.max() - start_frame)

      direction = returner["playDirection"].unique()[0]

      return returner_initial_position, home_team_initial_position, away_team_initial_position, time, direction

    #Obtains information for 2019 season
    elif season == 2019:
      home_team = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["team"]=="home")]
      start_frame = home_team.loc[home_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      home_team = home_team[home_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((home_team["frameId"].max()+1) - home_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      home_team["frameId"] = frame_list
      home_team = home_team[home_team["frameId"] == 0]

      away_team = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["team"]=="away")]
      start_frame = away_team.loc[away_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      away_team = away_team[away_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((away_team["frameId"].max()+1) - away_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      away_team["frameId"] = frame_list
      away_team = away_team[away_team["frameId"] == 0]

      df = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["frameId"]==start_frame)]
      df.reset_index(inplace=True)

      football_x = df.iloc[-1]["x"]
      football_y = df.iloc[-1]["y"]

      ret = df.loc[(df['x'] > football_x-3) & (df['x'] <football_x+3) & (df['y'] > football_y-1.5) & (df['y'] <football_y+1.5)]
      ret_id = ret.iloc[0]["nflId"]

      if ret_id in home_team["nflId"].values:
        returner = home_team[home_team["nflId"] == ret_id]
        home_team.drop(home_team[home_team["nflId"] == ret_id].index, inplace = True)
      elif ret_id in away_team["nflId"].values:
        returner = away_team[away_team["nflId"] == ret_id]
        away_team.drop(away_team[away_team["nflId"] == ret_id].index, inplace = True)
      else:
        print("Something went wrong")

      returner_initial_position = returner[returner["frameId"] == 0]
      home_team_initial_position  = home_team[home_team["frameId"] == 0]
      away_team_initial_position  = away_team[away_team["frameId"] == 0]

      frame_df = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id)]
      start_frame = frame_df.loc[frame_df['event'].isin(wanted_plays)]["frameId"].unique()[0]
      frame_col = frame_df["frameId"]
      time = (frame_col.max() - start_frame)

      direction = returner["playDirection"].unique()[0]

      return returner_initial_position, home_team_initial_position, away_team_initial_position, time, direction

    #Obtains information for 2020 season
    elif season == 2020:
      home_team = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["team"]=="home")]
      start_frame = home_team.loc[home_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      home_team = home_team[home_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((home_team["frameId"].max()+1) - home_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      home_team["frameId"] = frame_list
      home_team = home_team[home_team["frameId"] == 0]

      away_team = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["team"]=="away")]
      start_frame = away_team.loc[away_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      away_team = away_team[away_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((away_team["frameId"].max()+1) - away_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      away_team["frameId"] = frame_list
      away_team = away_team[away_team["frameId"] == 0]

      df = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["frameId"]==start_frame)]
      df.reset_index(inplace=True)

      football_x = df.iloc[-1]["x"]
      football_y = df.iloc[-1]["y"]

      ret = df.loc[(df['x'] > football_x-3) & (df['x'] <football_x+3) & (df['y'] > football_y-1.5) & (df['y'] <football_y+1.5)]
      ret_id = ret.iloc[0]["nflId"]

      if ret_id in home_team["nflId"].values:
        returner = home_team[home_team["nflId"] == ret_id]
        home_team.drop(home_team[home_team["nflId"] == ret_id].index, inplace = True)
      elif ret_id in away_team["nflId"].values:
        returner = away_team[away_team["nflId"] == ret_id]
        away_team.drop(away_team[away_team["nflId"] == ret_id].index, inplace = True)
      else:
        print("Something went wrong")

      returner_initial_position = returner[returner["frameId"] == 0]
      home_team_initial_position  = home_team[home_team["frameId"] == 0]
      away_team_initial_position  = away_team[away_team["frameId"] == 0]

      frame_df = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id)]
      start_frame = frame_df.loc[frame_df['event'].isin(wanted_plays)]["frameId"].unique()[0]
      frame_col = frame_df["frameId"]
      time = (frame_col.max() - start_frame)

      direction = returner["playDirection"].unique()[0]

      return returner_initial_position, home_team_initial_position, away_team_initial_position, time, direction

In [None]:
# Output: dataframes starting at initial frame

def data_frames_returner(season, game_id, play_id, frame):
  # Home, Away and Returner Data----------------------------------------------
    wanted_plays = ["kick_received", "punt_received"]

    #Obtains information for 2018 season
    if season == 2018:
      home_team = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["team"]=="home")]
      start_frame = home_team.loc[home_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      home_team = home_team[home_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((home_team["frameId"].max()+1) - home_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      home_team["frameId"] = frame_list

      away_team = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["team"]=="away")]
      start_frame = away_team.loc[away_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      away_team = away_team[away_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((away_team["frameId"].max()+1) - away_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      away_team["frameId"] = frame_list

      df = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["frameId"]==start_frame)]
      df.reset_index(inplace=True)

      football_x = df.iloc[-1]["x"]
      football_y = df.iloc[-1]["y"]

      ret = df.loc[(df['x'] > football_x-3) & (df['x'] <football_x+3) & (df['y'] > football_y-1.5) & (df['y'] <football_y+1.5)]
      ret_id = ret.iloc[0]["nflId"]

      if ret_id in home_team["nflId"].values:
        returner = home_team[home_team["nflId"] == ret_id]
        home_team.drop(home_team[home_team["nflId"] == ret_id].index, inplace = True)
      elif ret_id in away_team["nflId"].values:
        returner = away_team[away_team["nflId"] == ret_id]
        away_team.drop(away_team[away_team["nflId"] == ret_id].index, inplace = True)
      else:
        print("Something went wrong")

      return home_team[home_team["frameId"] == frame], away_team[away_team["frameId"] == frame]

    #Obtains information for 2019 season
    elif season == 2019:
      home_team = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["team"]=="home")]
      start_frame = home_team.loc[home_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      home_team = home_team[home_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((home_team["frameId"].max()+1) - home_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      home_team["frameId"] = frame_list

      away_team = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["team"]=="away")]
      start_frame = away_team.loc[away_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      away_team = away_team[away_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((away_team["frameId"].max()+1) - away_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      away_team["frameId"] = frame_list

      df = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["frameId"]==start_frame)]
      df.reset_index(inplace=True)

      football_x = df.iloc[-1]["x"]
      football_y = df.iloc[-1]["y"]

      ret = df.loc[(df['x'] > football_x-3) & (df['x'] <football_x+3) & (df['y'] > football_y-1.5) & (df['y'] <football_y+1.5)]
      ret_id = ret.iloc[0]["nflId"]

      if ret_id in home_team["nflId"].values:
        returner = home_team[home_team["nflId"] == ret_id]
        home_team.drop(home_team[home_team["nflId"] == ret_id].index, inplace = True)
      elif ret_id in away_team["nflId"].values:
        returner = away_team[away_team["nflId"] == ret_id]
        away_team.drop(away_team[away_team["nflId"] == ret_id].index, inplace = True)
      else:
        print("Something went wrong")

      return home_team[home_team["frameId"] == frame], away_team[away_team["frameId"] == frame]

    #Obtains information for 2020 season
    elif season == 2020:
      home_team = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["team"]=="home")]
      start_frame = home_team.loc[home_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      home_team = home_team[home_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((home_team["frameId"].max()+1) - home_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      home_team["frameId"] = frame_list

      away_team = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["team"]=="away")]
      start_frame = away_team.loc[away_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      away_team = away_team[away_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((away_team["frameId"].max()+1) - away_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      away_team["frameId"] = frame_list

      df = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["frameId"]==start_frame)]
      df.reset_index(inplace=True)

      football_x = df.iloc[-1]["x"]
      football_y = df.iloc[-1]["y"]

      ret = df.loc[(df['x'] > football_x-3) & (df['x'] <football_x+3) & (df['y'] > football_y-1.5) & (df['y'] <football_y+1.5)]
      ret_id = ret.iloc[0]["nflId"]

      if ret_id in home_team["nflId"].values:
        returner = home_team[home_team["nflId"] == ret_id]
        home_team.drop(home_team[home_team["nflId"] == ret_id].index, inplace = True)
      elif ret_id in away_team["nflId"].values:
        returner = away_team[away_team["nflId"] == ret_id]
        away_team.drop(away_team[away_team["nflId"] == ret_id].index, inplace = True)
      else:
        print("Something went wrong")

      return home_team[home_team["frameId"] == frame], away_team[away_team["frameId"] == frame]

### Agent 2: Pursuer

In [None]:
# Output: Initial position of chaser, home and away players, direction, time

def initial_positions_chasers(season, game_id, play_id, player_num):

  # Home, Away and chaser Data ---------------------------------------------
    wanted_plays = ["kick_received", "punt_received"]

    #Obtains information for 2018 season
    if season == 2018:
      home_team = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["team"]=="home")]
      start_frame = home_team.loc[home_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      home_team = home_team[home_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((home_team["frameId"].max()+1) - home_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      home_team["frameId"] = frame_list
      home_team = home_team[home_team["frameId"] == 0]

      away_team = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["team"]=="away")]
      start_frame = away_team.loc[away_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      away_team = away_team[away_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((away_team["frameId"].max()+1) - away_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      away_team["frameId"] = frame_list
      away_team = away_team[away_team["frameId"] == 0]

      df = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["frameId"]==start_frame)]
      df.reset_index(inplace=True)

      football_x = df.iloc[-1]["x"]
      football_y = df.iloc[-1]["y"]

      ret = df.loc[(df['x'] > football_x-3) & (df['x'] <football_x+3) & (df['y'] > football_y-1.5) & (df['y'] <football_y+1.5)]
      ret_id = ret.iloc[0]["nflId"]

      if ret_id in home_team["nflId"].values:
        # We choose a random player to be our "kicker"
        returner = home_team[home_team["nflId"] == ret_id]
        chaser = away_team[away_team["nflId"] == away_team["nflId"].iloc[player_num]]
        away_team.drop(away_team[away_team["nflId"] == away_team["nflId"].iloc[player_num]].index, inplace = True)
      elif ret_id in away_team["nflId"].values:
        # We choose a random player to be our "kicker"
        returner = away_team[away_team["nflId"] == ret_id]
        chaser = home_team[home_team["nflId"] == home_team["nflId"].iloc[player_num]]
        home_team.drop(home_team[home_team["nflId"] == home_team["nflId"].iloc[player_num]].index, inplace = True)
      else:
        print("Something went wrong")

      chaser_initial_position = chaser[chaser["frameId"] == 0]
      home_team_initial_position = home_team[home_team["frameId"] == 0]
      away_team_initial_position  = away_team[away_team["frameId"] == 0]

      frame_df = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id)]
      start_frame = frame_df.loc[frame_df['event'].isin(wanted_plays)]["frameId"].unique()[0]
      frame_col = frame_df["frameId"]
      time = (frame_col.max() - start_frame)

      direction = returner["playDirection"].unique()[0]

      return chaser_initial_position, home_team_initial_position, away_team_initial_position, time, direction

    #Obtains information for 2019 season
    elif season == 2019:
      home_team = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["team"]=="home")]
      start_frame = home_team.loc[home_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      home_team = home_team[home_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((home_team["frameId"].max()+1) - home_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      home_team["frameId"] = frame_list
      home_team = home_team[home_team["frameId"] == 0]

      away_team = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["team"]=="away")]
      start_frame = away_team.loc[away_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      away_team = away_team[away_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((away_team["frameId"].max()+1) - away_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      away_team["frameId"] = frame_list
      away_team = away_team[away_team["frameId"] == 0]

      df = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["frameId"]==start_frame)]
      df.reset_index(inplace=True)

      football_x = df.iloc[-1]["x"]
      football_y = df.iloc[-1]["y"]

      ret = df.loc[(df['x'] > football_x-3) & (df['x'] <football_x+3) & (df['y'] > football_y-1.5) & (df['y'] <football_y+1.5)]
      ret_id = ret.iloc[0]["nflId"]

      if ret_id in home_team["nflId"].values:
        # We choose a random player to be our "kicker"
        returner = home_team[home_team["nflId"] == ret_id]
        chaser = away_team[away_team["nflId"] == away_team["nflId"].iloc[player_num]]
        away_team.drop(away_team[away_team["nflId"] == away_team["nflId"].iloc[player_num]].index, inplace = True)
      elif ret_id in away_team["nflId"].values:
        # We choose a random player to be our "kicker"
        returner = away_team[away_team["nflId"] == ret_id]
        chaser = home_team[home_team["nflId"] == home_team["nflId"].iloc[player_num]]
        home_team.drop(home_team[home_team["nflId"] == home_team["nflId"].iloc[player_num]].index, inplace = True)
      else:
        print("Something went wrong")

      chaser_initial_position = chaser[chaser["frameId"] == 0]
      home_team_initial_position  = home_team[home_team["frameId"] == 0]
      away_team_initial_position  = away_team[away_team["frameId"] == 0]

      frame_df = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id)]
      start_frame = frame_df.loc[frame_df['event'].isin(wanted_plays)]["frameId"].unique()[0]
      frame_col = frame_df["frameId"]
      time = (frame_col.max() - start_frame)

      direction = returner["playDirection"].unique()[0]

      return chaser_initial_position, home_team_initial_position, away_team_initial_position, time, direction

    #Obtains information for 2020 season
    elif season == 2020:
      home_team = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["team"]=="home")]
      start_frame = home_team.loc[home_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      home_team = home_team[home_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((home_team["frameId"].max()+1) - home_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      home_team["frameId"] = frame_list
      home_team = home_team[home_team["frameId"] == 0]

      away_team = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["team"]=="away")]
      start_frame = away_team.loc[away_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      away_team = away_team[away_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((away_team["frameId"].max()+1) - away_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      away_team["frameId"] = frame_list
      away_team = away_team[away_team["frameId"] == 0]


      df = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["frameId"]==start_frame)]
      df.reset_index(inplace=True)

      football_x = df.iloc[-1]["x"]
      football_y = df.iloc[-1]["y"]

      ret = df.loc[(df['x'] > football_x-3) & (df['x'] <football_x+3) & (df['y'] > football_y-1.5) & (df['y'] <football_y+1.5)]
      ret_id = ret.iloc[0]["nflId"]

      if ret_id in home_team["nflId"].values:
        # We choose a random player to be our "kicker"
        returner = home_team[home_team["nflId"] == ret_id]
        chaser = away_team[away_team["nflId"] == away_team["nflId"].iloc[player_num]]
        away_team.drop(away_team[away_team["nflId"] == away_team["nflId"].iloc[player_num]].index, inplace = True)
      elif ret_id in away_team["nflId"].values:
        # We choose a random player to be our "kicker"
        returner = away_team[away_team["nflId"] == ret_id]
        chaser = home_team[home_team["nflId"] == home_team["nflId"].iloc[player_num]]
        home_team.drop(home_team[home_team["nflId"] == home_team["nflId"].iloc[player_num]].index, inplace = True)
      else:
        print("Something went wrong")

      chaser_initial_position = chaser[chaser["frameId"] == 0]
      home_team_initial_position  = home_team[home_team["frameId"] == 0]
      away_team_initial_position  = away_team[away_team["frameId"] == 0]

      frame_df = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id)]
      start_frame = frame_df.loc[frame_df['event'].isin(wanted_plays)]["frameId"].unique()[0]
      frame_col = frame_df["frameId"]
      time = (frame_col.max() - start_frame)

      direction = returner["playDirection"].unique()[0]

      return chaser_initial_position, home_team_initial_position, away_team_initial_position, time, direction

In [None]:
# Output: dataframes starting at initial frame

def data_frames_chaser(season, game_id, play_id, frame, player_num):

  # Home, Away and Returner Data----------------------------------------------
    wanted_plays = ["kick_received", "punt_received"]

    #Obtains information for 2018 season
    if season == 2018:
      home_team = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["team"]=="home")]
      start_frame = home_team.loc[home_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      home_team = home_team[home_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((home_team["frameId"].max()+1) - home_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      home_team["frameId"] = frame_list

      away_team = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["team"]=="away")]
      start_frame = away_team.loc[away_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      away_team = away_team[away_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((away_team["frameId"].max()+1) - away_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      away_team["frameId"] = frame_list

      df = tracking_2018[(tracking_2018["gameId"] == game_id) & (tracking_2018["playId"]==play_id) & (tracking_2018["frameId"]==start_frame)]
      df.reset_index(inplace=True)

      football_x = df.iloc[-1]["x"]
      football_y = df.iloc[-1]["y"]

      ret = df.loc[(df['x'] > football_x-3) & (df['x'] <football_x+3) & (df['y'] > football_y-1.5) & (df['y'] <football_y+1.5)]
      ret_id = ret.iloc[0]["nflId"]

      if ret_id in home_team["nflId"].values:
        # We choose a random player to be our "kicker"
        returner = home_team[home_team["nflId"] == ret_id]
        chaser = away_team[away_team["nflId"] == away_team["nflId"].iloc[player_num]]
        away_team.drop(away_team[away_team["nflId"] == away_team["nflId"].iloc[player_num]].index, inplace = True)
      elif ret_id in away_team["nflId"].values:
        # We choose a random player to be our "kicker"
        returner = away_team[away_team["nflId"] == ret_id]
        chaser = home_team[home_team["nflId"] == home_team["nflId"].iloc[player_num]]
        home_team.drop(home_team[home_team["nflId"] == home_team["nflId"].iloc[player_num]].index, inplace = True)
      else:
        print("Something went wrong")

      return home_team[home_team["frameId"] == frame], away_team[away_team["frameId"] == frame], returner[returner["frameId"] == frame]

    #Obtains information for 2019 season
    elif season == 2019:
      home_team = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["team"]=="home")]
      start_frame = home_team.loc[home_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      home_team = home_team[home_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((home_team["frameId"].max()+1) - home_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      home_team["frameId"] = frame_list

      away_team = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["team"]=="away")]
      start_frame = away_team.loc[away_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      away_team = away_team[away_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((away_team["frameId"].max()+1) - away_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      away_team["frameId"] = frame_list

      df = tracking_2019[(tracking_2019["gameId"] == game_id) & (tracking_2019["playId"]==play_id) & (tracking_2019["frameId"]==start_frame)]
      df.reset_index(inplace=True)

      football_x = df.iloc[-1]["x"]
      football_y = df.iloc[-1]["y"]

      ret = df.loc[(df['x'] > football_x-3) & (df['x'] <football_x+3) & (df['y'] > football_y-1.5) & (df['y'] <football_y+1.5)]
      ret_id = ret.iloc[0]["nflId"]

      if ret_id in home_team["nflId"].values:
        # We choose a random player to be our "kicker"
        returner = home_team[home_team["nflId"] == ret_id]
        chaser = away_team[away_team["nflId"] == away_team["nflId"].iloc[player_num]]
        away_team.drop(away_team[away_team["nflId"] == away_team["nflId"].iloc[player_num]].index, inplace = True)
      elif ret_id in away_team["nflId"].values:
        # We choose a random player to be our "kicker"
        returner = away_team[away_team["nflId"] == ret_id]
        chaser = home_team[home_team["nflId"] == home_team["nflId"].iloc[player_num]]
        home_team.drop(home_team[home_team["nflId"] == home_team["nflId"].iloc[player_num]].index, inplace = True)
      else:
        print("Something went wrong")

      return home_team[home_team["frameId"] == frame], away_team[away_team["frameId"] == frame], returner[returner["frameId"] == frame]

    #Obtains information for 2019 season
    elif season == 2020:
      home_team = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["team"]=="home")]
      start_frame = home_team.loc[home_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      home_team = home_team[home_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((home_team["frameId"].max()+1) - home_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      home_team["frameId"] = frame_list

      away_team = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["team"]=="away")]
      start_frame = away_team.loc[away_team['event'].isin(wanted_plays)]["frameId"].unique()[0]
      away_team = away_team[away_team["frameId"] >= start_frame]

      frame_list = []
      for i in range((away_team["frameId"].max()+1) - away_team["frameId"].min()):
        frame_list.append(i)
      frame_list = frame_list * 11
      away_team["frameId"] = frame_list

      df = tracking_2020[(tracking_2020["gameId"] == game_id) & (tracking_2020["playId"]==play_id) & (tracking_2020["frameId"]==start_frame)]
      df.reset_index(inplace=True)

      football_x = df.iloc[-1]["x"]
      football_y = df.iloc[-1]["y"]

      ret = df.loc[(df['x'] > football_x-3) & (df['x'] <football_x+3) & (df['y'] > football_y-1.5) & (df['y'] <football_y+1.5)]
      ret_id = ret.iloc[0]["nflId"]

      if ret_id in home_team["nflId"].values:
        # We choose a random player to be our "kicker"
        returner = home_team[home_team["nflId"] == ret_id]
        chaser = away_team[away_team["nflId"] == away_team["nflId"].iloc[player_num]]
        away_team.drop(away_team[away_team["nflId"] == away_team["nflId"].iloc[player_num]].index, inplace = True)
      elif ret_id in away_team["nflId"].values:
        # We choose a random player to be our "kicker"
        returner = away_team[away_team["nflId"] == ret_id]
        chaser = home_team[home_team["nflId"] == home_team["nflId"].iloc[player_num]]
        home_team.drop(home_team[home_team["nflId"] == home_team["nflId"].iloc[player_num]].index, inplace = True)
      else:
        print("Something went wrong")

      return home_team[home_team["frameId"] == frame], away_team[away_team["frameId"] == frame], returner[returner["frameId"] == frame]

## Environments


### Agent 1: Returner

First we generate the agent that has no knowledge of its surrounding

In [None]:
class CustomEnvironment_Returner_Random(gym.Env):
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  def __init__(self, season, game_id, play_id):
    super(CustomEnvironment_Returner_Random,self).__init__()

    # Home, Away, Returner, Time and Direction ---------------------------------------------
    returner_initial_position, home_team_initial_position, away_team_initial_position, time, direction = initial_positions_returner(season, game_id, play_id)

    # Initial Data -------------------------------------------------------------
    # Initial position
    self.start_position =  np.array([returner_initial_position["x"].unique()[0],returner_initial_position["y"].unique()[0]])
    self.position = self.start_position

    # Teammates
    self.start_teammates_positions = []

    for ind in home_team_initial_position.index:
      self.start_teammates_positions.append(np.array([home_team_initial_position["x"][ind], home_team_initial_position["y"][ind]]))
    self.teammates_positions = self.start_teammates_positions

    # Opponents
    self.start_opponent_positions = []

    for ind in away_team_initial_position.index:
      self.start_opponent_positions.append(np.array([away_team_initial_position["x"][ind], away_team_initial_position["y"][ind]]))
    self.opponents_positions = self.start_opponent_positions

    # All players but returner
    self.players = self.teammates_positions + self.opponents_positions

    # Total reward
    self.total_reward = 0

    # Direction
    self.direction = direction

    # Length of the play
    self.time = time
    self.play_length = time

    # Frames for futur movement
    self.frame = 0

    # Yard setps
    self.yards_step = 1

    # Nearest players
    self.N_nearest = 3

    # Closest n opponents to returner per frame
    self.closest_n_players_per_frame = []

    # Centroid list
    self.centroid = []
    self.threshold = 1

    # Dictionary with possible actions and action space as well as observation space
    self.action_dictionary = {0: "Forward", 1: "Backwards", 2: "Left", 3: "Right", 4: "Forward Left", 5: "Forward Right", 6: "Backwards Left", 7: "Backwards Right"}
    self.action_space = gym.spaces.Discrete(8)
    self.observation_space = gym.spaces.Box(low = np.array([0,0]), high = np.array([120,53.3]), dtype=np.float64)

# ------------------------------------------------------
  def step(self, action):
    done = False
    info = {}
    reward = 0

    # Home, Away and Returner Data----------------------------------------------
    home_team, away_team = data_frames_returner(season, game_id, play_id, self.frame)

    # Step Variations-----------------------------------------------------------
    # Reduces the play length by 1 second
    self.play_length -= 1

    # Increases number of frame
    self.frame += 1

    # Take into account boundaries of the field
    self.position = np.array([np.clip(self.position[0],0,120), np.clip(self.position[1],0,53.3)])
    self.teammates_position = np.array([np.clip(self.position[0],0,120), np.clip(self.position[1],0,53.3)])
    self.opponents_position = np.array([np.clip(self.position[0],0,120), np.clip(self.position[1],0,53.3)])

    # Distances between returner and players------------------------------------
    distances = []
    dictionary = {}

    def get_key(val):
      for key, value in dictionary.items():
          if (val == value) and (key not in self.closest_players_idx):
            return key

    for i in range(1):
      for j in range(i, len(self.players)):
        distance = ((self.position[0] - self.players[j][0])**2 + (self.position[1] - self.players[j][1])**2)**0.5
        distances.append(distance)
        dictionary[j] = distance

      n_shortest = heapq.nsmallest(self.N_nearest, distances)

      self.closest_players_idx=[]
      for i in n_shortest:
        self.closest_players_idx.append(get_key(i))

      positions = []
      for i in self.closest_players_idx:
        self.closest_players_idx_coordinates = []
        positions.append(self.players[i])
        self.closest_players_idx_coordinates.append(self.players[i][0])
        self.closest_players_idx_coordinates.append(self.players[i][1])
        self.closest_n_players_per_frame.append(self.closest_players_idx_coordinates)

    # Teammates and Opponents movement------------------------------------------
    self.current_teammates_positions = []
    self.current_opponent_positions = []

    for ind in home_team.index:
      self.current_teammates_positions.append(np.array([home_team["x"][ind], home_team["y"][ind]]))
    self.teammates_positions = self.current_teammates_positions

    for ind in away_team.index:
      self.current_opponent_positions.append(np.array([away_team["x"][ind], away_team["y"][ind]]))
    self.opponents_positions = self.current_opponent_positions

    self.players = self.teammates_positions + self.opponents_positions

    # Centroids
    def Centroid(list_of_points):
      x_cord = sum(list(zip(*list_of_points))[0])
      y_cord = sum(list(zip(*list_of_points))[1])
      centroid = (x_cord/len(list_of_points),y_cord/len(list_of_points))
      return centroid

    self.centroid.append(Centroid(positions))
    distance_centroid = ((self.position[0] - Centroid(positions)[0])**2 + (self.position[1] - Centroid(positions)[1])**2)**0.5

    # Agent movement and Rewards------------------------------------------------
    # Movement (x,y) if scoring in the left endzone
    if self.direction == "right":

      # Forward
      if action == 0:
        self.position -= (self.yards_step,0)
        self.total_reward += 1
        reward = 1
        if distance_centroid > self.threshold:
          self.total_reward -= 0.5
          reward = 0.5
        else:
          pass

      # Backwards
      elif action == 1:
        self.position += (self.yards_step,0)
        self.total_reward -= 1
        reward = -1

      # Left
      elif action == 2:
        self.position -= (0,self.yards_step)
        self.total_reward += 0
        reward = 0

      # Right
      elif action == 3:
        self.position += (0,self.yards_step)
        self.total_reward += 0
        reward = 0

      # Forwards Left
      elif action == 4:
        self.position -= (self.yards_step,self.yards_step)
        self.total_reward += 1
        reward = 1
        if distance_centroid > self.threshold:
          self.total_reward -= 0.5
          reward = 0.5
        else:
          pass

      # Forwards Right
      elif action == 5:
        self.position += (-(self.yards_step),self.yards_step)
        self.total_reward += 1
        reward = 1
        if distance_centroid > self.threshold:
          self.total_reward -= 0.5
          reward = 0.5
        else:
          pass

      # Backwards Left
      elif action == 6:
        self.position += (self.yards_step,-(self.yards_step))
        self.total_reward -= 1
        reward = -1

      # Backwards Right
      elif action == 7:
        self.position += (self.yards_step,self.yards_step)
        self.total_reward -= 1
        reward = -1

      # Rewards taking into account out of bounds and TDs
      # Touchdown
      if self.position[0] <= 10:
        self.total_reward += 4
        reward = 4

      # Out of Bounds
      elif self.position[1] <= 0:
        self.total_reward -= 3
        reward = -3
      elif self.position[1] >= 53.3:
        self.total_reward -= 3
        reward = -3

      # Hit an obstacle
      elif any((i == self.position).all() for i in self.players):
        self.total_reward -= 2
        reward = -2

    # Movement (x,y) if scoring in the right endzone
    elif self.direction == "left":

      # Forward
      if action == 0:
        self.position += (self.yards_step,0)
        self.total_reward += 1
        reward = 1
        if distance_centroid > self.threshold:
          self.total_reward -= 0.5
          reward = 0.5
        else:
          pass

      # Backwards
      elif action == 1:
        self.position -= (self.yards_step,0)
        self.total_reward -= 1
        reward = -1

      # Left
      elif action == 2:
        self.position += (0,self.yards_step)
        self.total_reward += 0
        reward = 0

      # Right
      elif action == 3:
        self.position -= (0,self.yards_step)
        self.total_reward += 0
        reward = 0

      # Forwards Left
      elif action == 4:
        self.position += (self.yards_step,self.yards_step)
        self.total_reward += 1
        reward = 1
        if distance_centroid > self.threshold:
          self.total_reward -= 0.5
          reward = 0.5
        else:
          pass

      # Forwards Right
      elif action == 5:
        self.position += (self.yards_step,-(self.yards_step))
        self.total_reward += 1
        reward = 1
        if distance_centroid > self.threshold:
          self.total_reward -= 0.5
          reward = 0.5
        else:
          pass

      # Backwards Left
      elif action == 6:
        self.position += (-(self.yards_step),self.yards_step)
        self.total_reward -= 1
        reward = -1

      # Backwards Right
      elif action == 7:
        self.position -= (self.yards_step,self.yards_step)
        self.total_reward -= 1
        reward = -1

      # Rewards taking into account out of bounds and TDs
      # Touchdown
      if self.position[0] >= 110:
        self.total_reward += 4
        reward = 4

      # Out of Bounds
      elif self.position[1] <= 0:
        self.total_reward -= 3
        reward = -3
      elif self.position[1] >= 53.3:
        self.total_reward -= 3
        reward = -3

      # Hit an obstacle
      elif any((i == self.position).all() for i in self.players):
        self.total_reward -= 2
        reward = -2

    # Stopping Criteria---------------------------------------------------------
    # Time expires
    if self.play_length == 0:
      print("STOP: time expired")
      done = True
      positions.append(self.position)
      for i in self.players:
        blocker_position.append(list(i))
      return self.position, reward, done, info

    # Went out of bounds
    elif (self.position[1] <= 0):
      print("STOP: Went out of bounds")
      done = True
      self.position[1] = 0
      positions.append(self.position)
      for i in self.players:
        blocker_position.append(list(i))
      return self.position, reward, done, info

    # Went out of bounds
    elif (self.position[1] >= 53.3):
      print("STOP: Went out of bounds")
      done = True
      self.position[1] = 53.3
      positions.append(self.position)
      for i in self.players:
        blocker_position.append(list(i))

      return self.position, reward, done, info

    # Did we reach the end zone
    elif self.direction == "right":
      if (self.position[0] <= 10):
        print("STOP: Scored a TD")
        done = True
        positions.append(self.position)
        for i in self.players:
          blocker_position.append(i)

        return self.position, reward, done, info

    elif self.direction == "left":
      if (self.position[0] >= 110):
        print("STOP: Scored a TD")
        done = True
        positions.append(self.position)
        for i in self.players:
          blocker_position.append(i)

        return self.position, reward, done, info

    # Did we hit an opponent
    elif any((i == self.position).all() for i in self.players):
      print("STOP: Hit an opponent")
      done = True
      positions.append(self.position)
      for i in self.players:
        blocker_position.append(i)

      return self.position, reward, done, info

    return self.position, reward, done, info

#---------------------------------------------------------------------------
  def add_positions(self):
    positions.append(self.position)
    for i in self.players:
      blocker_position.append(i)

# ------------------------------------------------------
  def animation(self, episode_list, mode='console'):

    # Returner df---------------------------------------------------------------
    column_values = ['x', 'y']
    returner_df = pd.DataFrame(data = episode_list[0], columns = column_values)
    returner_df['frames'] = returner_df.index

    # Blocker df ---------------------------------------------------------------
    frame_num = []
    k=0
    for i in range(1,len(blocker_episode_list[0])+1):
      frame_num.append(k)
      if i % len(self.players) == 0:
        k += 1
    blocker_df = pd.DataFrame(data = blocker_episode_list[0], columns = column_values)
    blocker_df['frames'] = frame_num

    team_name = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]*(k)
    blocker_df['teams'] = team_name

    # Distance df---------------------------------------------------------------
    frame_num_dist = []
    k_dist=0
    for i in range(1,len(self.closest_n_players_per_frame)+1):
      frame_num_dist.append(k_dist)
      if i % self.N_nearest == 0:
        k_dist += 1
    distance_df = pd.DataFrame(data=self.closest_n_players_per_frame, columns = column_values)
    distance_df['frames'] = frame_num_dist

    # Centroid df---------------------------------------------------------------
    centroid_df = pd.DataFrame(data = self.centroid, columns = column_values)
    centroid_df['frames'] = centroid_df.index

    # NFL Pitch-----------------------------------------------------------------
    fig, ax = generate_nfl_field()

    #set color of plot points to differentiate between home team, away team and football
    marker_kwargs = {'marker': 'o', 'markeredgecolor': 'black', 'linestyle': 'None'}
    returner, = ax.plot([], [], ms=12, markerfacecolor='#00b4d8',**marker_kwargs)
    centroid, = ax.plot([], [], ms=10, markerfacecolor='#F7A41A',**marker_kwargs)
    distance, = plt.plot([], [], '#FB8500', animated=True)
    fav, = ax.plot([], [], ms=10, markerfacecolor='#E01E38',**marker_kwargs)
    opp, = ax.plot([], [], ms=10, markerfacecolor='#EC4B27',**marker_kwargs)
    player_line, = ax.plot([], [], color='#48cae4')

    # Update Formula------------------------------------------------------------
    def visualize_play(frame):
      connections = []

      player = returner_df[returner_df["frames"] == frame]
      p1 = player.iloc[0, [0,1]].values.flatten().tolist()
      connections.append(p1)
      obstacle = blocker_df[blocker_df["frames"] == frame]
      center = centroid_df[centroid_df["frames"] == frame]
      distances = distance_df[distance_df["frames"] == frame]
      for i in range(self.N_nearest):
        p2 = distances.iloc[i, [0,1]].values.flatten().tolist()
        connections.append(p2)
        connections.append(p1)
      local = blocker_df[blocker_df["teams"] == 0]
      local = local[local["frames"] == frame]
      visiting = blocker_df[blocker_df["teams"] == 1]
      visiting = visiting[visiting["frames"] == frame]
      links = list(zip(*connections))
      x = links[0]
      y = links[1]

      returner.set_data(player['x'], player['y'])
      centroid.set_data(center['x'], center['y'])
      distance.set_data(x, y)
      player_line.set_data(returner_df.iloc[:,0], returner_df.iloc[:,1])
      fav.set_data(local["x"], local["y"])
      opp.set_data(visiting["x"], visiting["y"])

    #With positional data of players and football for every frame, we animate
    animation = FuncAnimation(fig, visualize_play, frames=len(returner_df), interval=100)
    animation.save("/content/drive/MyDrive/Enhancing Performance in Special Teams within the NFL through Reinforcement Learning: A Data-Driven Approach/random-simulation/Random_Animation_Returner_5.mp4")
    video =animation.to_html5_video()
    html = display.HTML(video)
    display.display(html)
    plt.close()

# ------------------------------------------------------
  def reset(self):
    self.position = self.start_position
    self.teammates_positions = self.start_teammates_positions
    self.opponents_positions = self.start_opponent_positions
    self.players = self.teammates_positions + self.opponents_positions
    self.total_reward = 0
    self.play_length = self.time
    self.frame = 0
    self.closest_n_players_per_frame = []
    self.centroid = []
    return self.position

Then we generate a trainable agent

In [None]:
class CustomEnvironment_Returner(gym.Env):
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  def __init__(self, season, game_id, play_id):
    super(CustomEnvironment_Returner,self).__init__()

    # Home, Away, Returner, Time and Direction ---------------------------------------------
    returner_initial_position, home_team_initial_position, away_team_initial_position, time, direction = initial_positions_returner(season, game_id, play_id)

    # Initial Data -------------------------------------------------------------
    # Initial position
    self.start_position =  np.array([returner_initial_position["x"].unique()[0],returner_initial_position["y"].unique()[0]])
    self.position = self.start_position

    # Teammates
    self.start_teammates_positions = []

    for ind in home_team_initial_position.index:
      self.start_teammates_positions.append(np.array([home_team_initial_position["x"][ind], home_team_initial_position["y"][ind]]))
    self.teammates_positions = self.start_teammates_positions

    # Opponents
    self.start_opponent_positions = []

    for ind in away_team_initial_position.index:
      self.start_opponent_positions.append(np.array([away_team_initial_position["x"][ind], away_team_initial_position["y"][ind]]))
    self.opponents_positions = self.start_opponent_positions

    # All players but returner
    self.players = self.teammates_positions + self.opponents_positions

    # Total reward
    self.total_reward = 0

    # Direction
    self.direction = direction

    # Length of the play
    self.time = time
    self.play_length = time

    # Frames for futur movement
    self.frame = 0

    # Yard setps
    self.yards_step = 1

    # Nearest players
    self.N_nearest = 3

    # Closest n opponents to returner per frame
    self.closest_n_players_per_frame = []

    # Centroid list
    self.centroid = []
    self.threshold = 1

    # Dictionary with possible actions and action space as well as observation space
    self.action_dictionary = {0: "Forward", 1: "Backwards", 2: "Left", 3: "Right", 4: "Forward Left", 5: "Forward Right", 6: "Backwards Left", 7: "Backwards Right"}
    self.action_space = gym.spaces.Discrete(8)
    self.observation_space = gym.spaces.Box(low = np.array([0,0]), high = np.array([120,53.3]), dtype=np.float64)

# ------------------------------------------------------
  def step(self, action):
    done = False
    info = {}
    reward = 0

    # Home, Away and Returner Data----------------------------------------------
    home_team, away_team = data_frames_returner(season, game_id, play_id, self.frame)

    # Step Variations-----------------------------------------------------------
    # Reduces the play length by 1 second
    self.play_length -= 1

    # Increases number of frame
    self.frame += 1

    # Take into account boundaries of the field
    self.position = np.array([np.clip(self.position[0],0,120), np.clip(self.position[1],0,53.3)])
    self.teammates_position = np.array([np.clip(self.position[0],0,120), np.clip(self.position[1],0,53.3)])
    self.opponents_position = np.array([np.clip(self.position[0],0,120), np.clip(self.position[1],0,53.3)])

    # Distances between returner and players------------------------------------
    distances = []
    dictionary = {}

    def get_key(val):
      for key, value in dictionary.items():
          if (val == value) and (key not in self.closest_players_idx):
            return key

    for i in range(1):
      for j in range(i, len(self.players)):
        distance = ((self.position[0] - self.players[j][0])**2 + (self.position[1] - self.players[j][1])**2)**0.5
        distances.append(distance)
        dictionary[j] = distance

      n_shortest = heapq.nsmallest(self.N_nearest, distances)

      self.closest_players_idx=[]
      for i in n_shortest:
        self.closest_players_idx.append(get_key(i))

      positions = []
      for i in self.closest_players_idx:
        self.closest_players_idx_coordinates = []
        positions.append(self.players[i])
        self.closest_players_idx_coordinates.append(self.players[i][0])
        self.closest_players_idx_coordinates.append(self.players[i][1])
        self.closest_n_players_per_frame.append(self.closest_players_idx_coordinates)

    # Teammates and Opponents movement------------------------------------------
    self.current_teammates_positions = []
    self.current_opponent_positions = []

    for ind in home_team.index:
      self.current_teammates_positions.append(np.array([home_team["x"][ind], home_team["y"][ind]]))
    self.teammates_positions = self.current_teammates_positions

    for ind in away_team.index:
      self.current_opponent_positions.append(np.array([away_team["x"][ind], away_team["y"][ind]]))
    self.opponents_positions = self.current_opponent_positions

    self.players = self.teammates_positions + self.opponents_positions

    # Centroids
    def Centroid(list_of_points):
      x_cord = sum(list(zip(*list_of_points))[0])
      y_cord = sum(list(zip(*list_of_points))[1])
      centroid = (x_cord/len(list_of_points),y_cord/len(list_of_points))
      return centroid

    self.centroid.append(Centroid(positions))
    distance_centroid = ((self.position[0] - Centroid(positions)[0])**2 + (self.position[1] - Centroid(positions)[1])**2)**0.5

    # Agent movement and Rewards------------------------------------------------
    # Movement (x,y) if scoring in the left endzone
    if self.direction == "right":

      # Forward
      if action == 0:
        self.position -= (self.yards_step,0)
        self.total_reward += 1
        reward = 1
        if distance_centroid > self.threshold:
          self.total_reward -= 0.5
          reward = 0.5
        else:
          pass

      # Backwards
      elif action == 1:
        self.position += (self.yards_step,0)
        self.total_reward -= 1
        reward = -1

      # Left
      elif action == 2:
        self.position -= (0,self.yards_step)
        self.total_reward += 0
        reward = 0

      # Right
      elif action == 3:
        self.position += (0,self.yards_step)
        self.total_reward += 0
        reward = 0

      # Forwards Left
      elif action == 4:
        self.position -= (self.yards_step,self.yards_step)
        self.total_reward += 1
        reward = 1
        if distance_centroid > self.threshold:
          self.total_reward -= 0.5
          reward = 0.5
        else:
          pass

      # Forwards Right
      elif action == 5:
        self.position += (-(self.yards_step),self.yards_step)
        self.total_reward += 1
        reward = 1
        if distance_centroid > self.threshold:
          self.total_reward -= 0.5
          reward = 0.5
        else:
          pass

      # Backwards Left
      elif action == 6:
        self.position += (self.yards_step,-(self.yards_step))
        self.total_reward -= 1
        reward = -1

      # Backwards Right
      elif action == 7:
        self.position += (self.yards_step,self.yards_step)
        self.total_reward -= 1
        reward = -1

      # Rewards taking into account out of bounds and TDs
      # Touchdown
      if self.position[0] <= 10:
        self.total_reward += 4
        reward = 4

      # Out of Bounds
      elif self.position[1] <= 0:
        self.total_reward -= 3
        reward = -3
      elif self.position[1] >= 53.3:
        self.total_reward -= 3
        reward = -3

      # Hit an obstacle
      elif any((i == self.position).all() for i in self.players):
        self.total_reward -= 2
        reward = -2

    # Movement (x,y) if scoring in the right endzone
    elif self.direction == "left":

      # Forward
      if action == 0:
        self.position += (self.yards_step,0)
        self.total_reward += 1
        reward = 1
        if distance_centroid > self.threshold:
          self.total_reward -= 0.5
          reward = 0.5
        else:
          pass

      # Backwards
      elif action == 1:
        self.position -= (self.yards_step,0)
        self.total_reward -= 1
        reward = -1

      # Left
      elif action == 2:
        self.position += (0,self.yards_step)
        self.total_reward += 0
        reward = 0

      # Right
      elif action == 3:
        self.position -= (0,self.yards_step)
        self.total_reward += 0
        reward = 0

      # Forwards Left
      elif action == 4:
        self.position += (self.yards_step,self.yards_step)
        self.total_reward += 1
        reward = 1
        if distance_centroid > self.threshold:
          self.total_reward -= 0.5
          reward = 0.5
        else:
          pass

      # Forwards Right
      elif action == 5:
        self.position += (self.yards_step,-(self.yards_step))
        self.total_reward += 1
        reward = 1
        if distance_centroid > self.threshold:
          self.total_reward -= 0.5
          reward = 0.5
        else:
          pass

      # Backwards Left
      elif action == 6:
        self.position += (-(self.yards_step),self.yards_step)
        self.total_reward -= 1
        reward = -1

      # Backwards Right
      elif action == 7:
        self.position -= (self.yards_step,self.yards_step)
        self.total_reward -= 1
        reward = -1

      # Rewards taking into account out of bounds and TDs
      # Touchdown
      if self.position[0] >= 110:
        self.total_reward += 4
        reward = 4

      # Out of Bounds
      elif self.position[1] <= 0:
        self.total_reward -= 3
        reward = -3
      elif self.position[1] >= 53.3:
        self.total_reward -= 3
        reward = -3

      # Hit an obstacle
      elif any((i == self.position).all() for i in self.players):
        self.total_reward -= 2
        reward = -2

    # Stopping Criteria---------------------------------------------------------
    # Time expires
    if self.play_length == 0:
      print("STOP: time expired")
      done = True
      positions.append(self.position)
      for i in self.players:
        blocker_position.append(list(i))
      return self.position, reward, done, info

    # Went out of bounds
    elif (self.position[1] <= 0):
      print("STOP: Went out of bounds")
      done = True
      self.position[1] = 0
      positions.append(self.position)
      for i in self.players:
        blocker_position.append(list(i))
      return self.position, reward, done, info

    # Went out of bounds
    elif (self.position[1] >= 53.3):
      print("STOP: Went out of bounds")
      done = True
      self.position[1] = 53.3
      positions.append(self.position)
      for i in self.players:
        blocker_position.append(list(i))

      return self.position, reward, done, info

    # Did we reach the end zone
    elif self.direction == "right":
      if (self.position[0] <= 10):
        print("STOP: Scored a TD")
        done = True
        positions.append(self.position)
        for i in self.players:
          blocker_position.append(i)

        return self.position, reward, done, info

    elif self.direction == "left":
      if (self.position[0] >= 110):
        print("STOP: Scored a TD")
        done = True
        positions.append(self.position)
        for i in self.players:
          blocker_position.append(i)

        return self.position, reward, done, info

    # Did we hit an opponent
    elif any((i == self.position).all() for i in self.players):
      print("STOP: Hit an opponent")
      done = True
      positions.append(self.position)
      for i in self.players:
        blocker_position.append(i)

      return self.position, reward, done, info

    return self.position, reward, done, info

#---------------------------------------------------------------------------
  def add_positions(self):
    positions.append(self.position)
    for i in self.players:
      blocker_position.append(i)

# ------------------------------------------------------
  def animation(self, episode_list, mode='console'):

    # Returner df---------------------------------------------------------------
    column_values = ['x', 'y']
    returner_df = pd.DataFrame(data = episode_list[0], columns = column_values)
    returner_df['frames'] = returner_df.index

    # Blocker df ---------------------------------------------------------------
    frame_num = []
    k=0
    for i in range(1,len(blocker_episode_list[0])+1):
      frame_num.append(k)
      if i % len(self.players) == 0:
        k += 1
    blocker_df = pd.DataFrame(data = blocker_episode_list[0], columns = column_values)
    blocker_df['frames'] = frame_num

    team_name = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]*(k)
    blocker_df['teams'] = team_name

    # Distance df---------------------------------------------------------------
    frame_num_dist = []
    k_dist=0
    for i in range(1,len(self.closest_n_players_per_frame)+1):
      frame_num_dist.append(k_dist)
      if i % self.N_nearest == 0:
        k_dist += 1
    distance_df = pd.DataFrame(data=self.closest_n_players_per_frame, columns = column_values)
    distance_df['frames'] = frame_num_dist

    # Centroid df---------------------------------------------------------------
    centroid_df = pd.DataFrame(data = self.centroid, columns = column_values)
    centroid_df['frames'] = centroid_df.index

    # NFL Pitch-----------------------------------------------------------------
    fig, ax = generate_nfl_field()

    #set color of plot points to differentiate between home team, away team and football
    marker_kwargs = {'marker': 'o', 'markeredgecolor': 'black', 'linestyle': 'None'}
    returner, = ax.plot([], [], ms=12, markerfacecolor='#00b4d8',**marker_kwargs)
    centroid, = ax.plot([], [], ms=10, markerfacecolor='#F7A41A',**marker_kwargs)
    distance, = plt.plot([], [], '#FB8500', animated=True)
    fav, = ax.plot([], [], ms=10, markerfacecolor='#E01E38',**marker_kwargs)
    opp, = ax.plot([], [], ms=10, markerfacecolor='#EC4B27',**marker_kwargs)
    player_line, = ax.plot([], [], color='#48cae4')

    # Update Formula------------------------------------------------------------
    def visualize_play(frame):
      connections = []

      player = returner_df[returner_df["frames"] == frame]
      p1 = player.iloc[0, [0,1]].values.flatten().tolist()
      connections.append(p1)
      obstacle = blocker_df[blocker_df["frames"] == frame]
      center = centroid_df[centroid_df["frames"] == frame]
      distances = distance_df[distance_df["frames"] == frame]
      for i in range(self.N_nearest):
        p2 = distances.iloc[i, [0,1]].values.flatten().tolist()
        connections.append(p2)
        connections.append(p1)
      local = blocker_df[blocker_df["teams"] == 0]
      local = local[local["frames"] == frame]
      visiting = blocker_df[blocker_df["teams"] == 1]
      visiting = visiting[visiting["frames"] == frame]
      links = list(zip(*connections))
      x = links[0]
      y = links[1]

      returner.set_data(player['x'], player['y'])
      centroid.set_data(center['x'], center['y'])
      distance.set_data(x, y)
      player_line.set_data(returner_df.iloc[:,0], returner_df.iloc[:,1])
      fav.set_data(local["x"], local["y"])
      opp.set_data(visiting["x"], visiting["y"])

    #With positional data of players and football for every frame, we animate
    animation = FuncAnimation(fig, visualize_play, frames=len(returner_df), interval=100)
    animation.save("/content/drive/MyDrive/Enhancing Performance in Special Teams within the NFL through Reinforcement Learning: A Data-Driven Approach/learned-simulation/Learned_Animation_Returner_5.mp4")
    video =animation.to_html5_video()
    html = display.HTML(video)
    display.display(html)
    plt.close()

# ------------------------------------------------------
  def reset(self):
    self.position = self.start_position
    self.teammates_positions = self.start_teammates_positions
    self.opponents_positions = self.start_opponent_positions
    self.players = self.teammates_positions + self.opponents_positions
    self.total_reward = 0
    self.play_length = self.time
    self.frame = 0
    self.closest_n_players_per_frame = []
    self.centroid = []
    return self.position

### Agent 2: Pursuer

First we generate the agent that has no knowledge of its surrounding

In [None]:
class CustomEnvironment_pursuer_Random(gym.Env):
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  def __init__(self, season, game_id, play_id):
    super(CustomEnvironment_pursuer_Random,self).__init__()

    # Home, Away, Returner, Time and Direction ---------------------------------------------
    idx = 5
    returner_initial_position, home_team_initial_position, away_team_initial_position, time, direction = initial_positions_pursuer(season, game_id, play_id, idx)

    # Initial Data -------------------------------------------------------------
    # Initial position
    self.start_position =  np.array([returner_initial_position["x"].unique()[0],returner_initial_position["y"].unique()[0]])
    self.position = self.start_position

    # Teammates
    self.start_teammates_positions = []

    for ind in home_team_initial_position.index:
      self.start_teammates_positions.append(np.array([home_team_initial_position["x"][ind], home_team_initial_position["y"][ind]]))
    self.teammates_positions = self.start_teammates_positions

    # Opponents
    self.start_opponent_positions = []

    for ind in away_team_initial_position.index:
      self.start_opponent_positions.append(np.array([away_team_initial_position["x"][ind], away_team_initial_position["y"][ind]]))
    self.opponents_positions = self.start_opponent_positions

    # All players but returner
    self.players = self.teammates_positions + self.opponents_positions

    # Total reward
    self.total_reward = 0

    # Direction
    self.direction = direction

    # Length of the play
    self.time = time
    self.play_length = time

    # Frames for futur movement
    self.frame = 0

    # Yard setps
    self.yards_step = 1

    # Nearest players
    self.N_nearest = 1

    # Catch radius
    self.catch_radius = 5

    # Closest n opponents to returner per frame
    self.closest_n_players_per_frame = []

    # Centroid list
    self.centroid = []
    self.threshold = 1

    # Dictionary with possible actions and action space as well as observation space
    self.action_dictionary = {0: "Forward", 1: "Backwards", 2: "Left", 3: "Right", 4: "Forward Left", 5: "Forward Right", 6: "Backwards Left", 7: "Backwards Right"}
    self.action_space = gym.spaces.Discrete(8)
    self.observation_space = gym.spaces.Box(low = np.array([0,0]), high = np.array([120,53.3]), dtype=np.float64)

# ------------------------------------------------------
  def step(self, action):
    done = False
    info = {}
    reward = 0
    idx = 5

    # Home, Away and Returner Data----------------------------------------------
    home_team, away_team, target = data_frames_pursuer(season, game_id, play_id, self.frame, idx)
    self.target =  np.array([target["x"].unique()[0],target["y"].unique()[0]])

    # Step Variations-----------------------------------------------------------
    # Reduces the play length by 1 second
    self.play_length -= 1

    # Increases number of frame
    self.frame += 1

    # Take into account boundaries of the field
    self.position = np.array([np.clip(self.position[0],0,120), np.clip(self.position[1],0,53.3)])
    self.teammates_position = np.array([np.clip(self.position[0],0,120), np.clip(self.position[1],0,53.3)])
    self.opponents_position = np.array([np.clip(self.position[0],0,120), np.clip(self.position[1],0,53.3)])

    # Distances between returner and players------------------------------------
    distances = []
    dictionary = {}

    def get_key(val):
      for key, value in dictionary.items():
          if (val == value) and (key not in self.closest_players_idx):
            return key

    for i in range(1):
      for j in range(i, len(self.players)):
        distance = ((self.position[0] - self.players[j][0])**2 + (self.position[1] - self.players[j][1])**2)**0.5
        distances.append(distance)
        dictionary[j] = distance

      n_shortest = heapq.nsmallest(self.N_nearest, distances)

      self.closest_players_idx=[]
      for i in n_shortest:
        self.closest_players_idx.append(get_key(i))

      positions = []
      for i in self.closest_players_idx:
        self.closest_players_idx_coordinates = []
        positions.append(self.target)
        self.closest_players_idx_coordinates.append(self.target[0])
        self.closest_players_idx_coordinates.append(self.target[1])
        self.closest_n_players_per_frame.append(self.closest_players_idx_coordinates)

    # Teammates and Opponents movement------------------------------------------
    self.current_teammates_positions = []
    self.current_opponent_positions = []

    for ind in home_team.index:
      self.current_teammates_positions.append(np.array([home_team["x"][ind], home_team["y"][ind]]))
    self.teammates_positions = self.current_teammates_positions

    for ind in away_team.index:
      self.current_opponent_positions.append(np.array([away_team["x"][ind], away_team["y"][ind]]))
    self.opponents_positions = self.current_opponent_positions

    self.players = self.teammates_positions + self.opponents_positions

    # Centroids
    def Centroid(list_of_points):
      x_cord = sum(list(zip(*list_of_points))[0])
      y_cord = sum(list(zip(*list_of_points))[1])
      centroid = (x_cord/len(list_of_points),y_cord/len(list_of_points))
      return centroid

    self.centroid.append(Centroid(positions))
    distance_centroid = ((self.position[0] - Centroid(positions)[0])**2 + (self.position[1] - Centroid(positions)[1])**2)**0.5

    # Agent movement and Rewards------------------------------------------------
    # Movement (x,y) if scoring in the left endzone
    if self.direction == "right":

      # Distance before
      old_dist_to_tar = np.sqrt(np.sum((self.position - self.target)**2))

      # Forward
      if action == 0:
        self.position += (self.yards_step,0)

      # Backwards
      elif action == 1:
        self.position -= (self.yards_step,0)

      # Left
      elif action == 2:
        self.position += (0,self.yards_step)

      # Right
      elif action == 3:
        self.position -= (0,self.yards_step)

      # Forwards Left
      elif action == 4:
        self.position += (self.yards_step,self.yards_step)

      # Forwards Right
      elif action == 5:
        self.position += (self.yards_step,-(self.yards_step))

      # Backwards Left
      elif action == 6:
        self.position += (-(self.yards_step),self.yards_step)

      # Backwards Right
      elif action == 7:
        self.position -= (self.yards_step,self.yards_step)

      # Rewards taking into account out of bounds and TDs

      # Distances
      disance = np.sqrt(np.sum((self.position - self.target)**2))

      if distance < old_dist_to_tar:
        self.total_reward += 5
        reward = 5
      elif distance == old_dist_to_tar:
        self.total_reward -= 2
        reward = -2
      else:
        self.total_reward -= 10
        reward = -10

      if distance <= self.catch_radius:
        self.total_reward += 20
        reward = 20

      # Out of Bounds
      elif self.position[1] <= 0:
        self.total_reward -= 3
        reward = -3
      elif self.position[1] >= 53.3:
        self.total_reward -= 3
        reward = -3

    # Movement (x,y) if scoring in the right endzone
    elif self.direction == "left":

      # Distance before
      old_dist_to_tar = np.sqrt(np.sum((self.position - self.target)**2))

      # Forward
      if action == 0:
        self.position -= (self.yards_step,0)


      # Backwards
      elif action == 1:
        self.position += (self.yards_step,0)

      # Left
      elif action == 2:
        self.position -= (0,self.yards_step)

      # Right
      elif action == 3:
        self.position += (0,self.yards_step)

      # Forwards Left
      elif action == 4:
        self.position -= (self.yards_step,self.yards_step)


      # Forwards Right
      elif action == 5:
        self.position -= (self.yards_step,-(self.yards_step))


      # Backwards Left
      elif action == 6:
        self.position -= (-(self.yards_step),self.yards_step)

      # Backwards Right
      elif action == 7:
        self.position += (self.yards_step,self.yards_step)

      # Rewards taking into account out of bounds and TDs

      # Distances
      disance = np.sqrt(np.sum((self.position - self.target)**2))

      if distance < old_dist_to_tar:
        self.total_reward += 5
        reward = 5
      elif distance == old_dist_to_tar:
        self.total_reward -= 2
        reward = -2
      else:
        self.total_reward -= 10
        reward = -10

      if distance <= self.catch_radius:
        self.total_reward += 20
        reward = 20

      # Out of Bounds
      elif self.position[1] <= 0:
        self.total_reward -= 3
        reward = -3
      elif self.position[1] >= 53.3:
        self.total_reward -= 3
        reward = -3


    # Stopping Criteria---------------------------------------------------------
    # Time expires
    if self.play_length == 0:
      print("STOP: time expired")
      done = True
      positions.append(self.position)
      for i in self.players:
        blocker_position.append(list(i))
      return self.position, reward, done, info

    # Went out of bounds
    elif (self.position[1] <= 0):
      print("STOP: Went out of bounds")
      done = True
      self.position[1] = 0
      positions.append(self.position)
      for i in self.players:
        blocker_position.append(list(i))
      return self.position, reward, done, info

    # Went out of bounds
    elif (self.position[1] >= 53.3):
      print("STOP: Went out of bounds")
      done = True
      self.position[1] = 53.3
      positions.append(self.position)
      for i in self.players:
        blocker_position.append(list(i))

      return self.position, reward, done, info

    # Did we hit an opponent
    elif any((i == self.position).all() for i in self.players):
      print("STOP: Hit and opponent")
      done = True
      positions.append(self.position)
      for i in self.players:
        blocker_position.append(i)

      return self.position, reward, done, info

    # Did we catch the returner
    elif distance < 3:
      print("STOP: Tackle returner")
      done = True
      positions.append(self.position)
      for i in self.players:
        blocker_position.append(i)

      return self.position, reward, done, info

    return self.position, reward, done, info

#---------------------------------------------------------------------------
  def add_positions(self):
    positions.append(self.position)
    for i in self.players:
      blocker_position.append(i)

# ------------------------------------------------------
  def animation(self, episode_list, mode='console'):

    # Returner df---------------------------------------------------------------
    column_values = ['x', 'y']
    returner_df = pd.DataFrame(data = episode_list[0], columns = column_values)
    returner_df['frames'] = returner_df.index

    # Blocker df ---------------------------------------------------------------
    frame_num = []
    k=0
    for i in range(1,len(blocker_episode_list[0])+1):
      frame_num.append(k)
      if i % len(self.players) == 0:
        k += 1
    blocker_df = pd.DataFrame(data = blocker_episode_list[0], columns = column_values)
    blocker_df['frames'] = frame_num

    team_name = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]*(k)
    blocker_df['teams'] = team_name

    # Distance df---------------------------------------------------------------
    frame_num_dist = []
    k_dist=0
    for i in range(1,len(self.closest_n_players_per_frame)+1):
      frame_num_dist.append(k_dist)
      if i % self.N_nearest == 0:
        k_dist += 1
    distance_df = pd.DataFrame(data=self.closest_n_players_per_frame, columns = column_values)
    distance_df['frames'] = frame_num_dist

    # Centroid df---------------------------------------------------------------
    centroid_df = pd.DataFrame(data = self.centroid, columns = column_values)
    centroid_df['frames'] = centroid_df.index

    # NFL Pitch-----------------------------------------------------------------
    fig, ax = generate_nfl_field()

    #set color of plot points to differentiate between home team, away team and football
    marker_kwargs = {'marker': 'o', 'markeredgecolor': 'black', 'linestyle': 'None'}
    returner, = ax.plot([], [], ms=12, markerfacecolor='#00b4d8',**marker_kwargs)
    centroid, = ax.plot([], [], ms=10, markerfacecolor='#F7A41A',**marker_kwargs)
    distance, = plt.plot([], [], '#FB8500', animated=True)
    fav, = ax.plot([], [], ms=10, markerfacecolor='#E01E38',**marker_kwargs)
    opp, = ax.plot([], [], ms=10, markerfacecolor='#EC4B27',**marker_kwargs)
    player_line, = ax.plot([], [], color='#48cae4')

    # Update Formula------------------------------------------------------------
    def visualize_play(frame):
      connections = []

      player = returner_df[returner_df["frames"] == frame]
      p1 = player.iloc[0, [0,1]].values.flatten().tolist()
      connections.append(p1)
      obstacle = blocker_df[blocker_df["frames"] == frame]
      center = centroid_df[centroid_df["frames"] == frame]
      distances = distance_df[distance_df["frames"] == frame]
      for i in range(self.N_nearest):
        p2 = distances.iloc[i, [0,1]].values.flatten().tolist()
        connections.append(p2)
        connections.append(p1)
      local = blocker_df[blocker_df["teams"] == 0]
      local = local[local["frames"] == frame]
      visiting = blocker_df[blocker_df["teams"] == 1]
      visiting = visiting[visiting["frames"] == frame]
      links = list(zip(*connections))
      x = links[0]
      y = links[1]

      returner.set_data(player['x'], player['y'])
      # centroid.set_data(center['x'], center['y'])
      distance.set_data(x, y)
      player_line.set_data(returner_df.iloc[:,0], returner_df.iloc[:,1])
      fav.set_data(local["x"], local["y"])
      opp.set_data(visiting["x"], visiting["y"])

    #With positional data of players and football for every frame, we animate
    animation = FuncAnimation(fig, visualize_play, frames=len(returner_df), interval=100)
    animation.save("/content/drive/MyDrive/Enhancing Performance in Special Teams within the NFL through Reinforcement Learning: A Data-Driven Approach/random-simulation/Random_Animation_Chaser_5.mp4")
    video =animation.to_html5_video()
    html = display.HTML(video)
    display.display(html)
    plt.close()

# ------------------------------------------------------
  def reset(self):
    self.position = self.start_position
    self.teammates_positions = self.start_teammates_positions
    self.opponents_positions = self.start_opponent_positions
    self.players = self.teammates_positions + self.opponents_positions
    self.total_reward = 0
    self.play_length = self.time
    self.frame = 0
    self.closest_n_players_per_frame = []
    self.centroid = []
    return self.position

Then we generate a trainable agent

In [None]:
class CustomEnvironment_Pursuer(gym.Env):
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  def __init__(self, season, game_id, play_id):
    super(CustomEnvironment_Pursuer,self).__init__()

    # Home, Away, Returner, Time and Direction ---------------------------------------------
    idx = 5
    returner_initial_position, home_team_initial_position, away_team_initial_position, time, direction = initial_positions_pursuer(season, game_id, play_id, idx)

    # Initial Data -------------------------------------------------------------
    # Initial position
    self.start_position =  np.array([returner_initial_position["x"].unique()[0],returner_initial_position["y"].unique()[0]])
    self.position = self.start_position

    # Teammates
    self.start_teammates_positions = []

    for ind in home_team_initial_position.index:
      self.start_teammates_positions.append(np.array([home_team_initial_position["x"][ind], home_team_initial_position["y"][ind]]))
    self.teammates_positions = self.start_teammates_positions

    # Opponents
    self.start_opponent_positions = []

    for ind in away_team_initial_position.index:
      self.start_opponent_positions.append(np.array([away_team_initial_position["x"][ind], away_team_initial_position["y"][ind]]))
    self.opponents_positions = self.start_opponent_positions

    # All players but returner
    self.players = self.teammates_positions + self.opponents_positions

    # Total reward
    self.total_reward = 0

    # Direction
    self.direction = direction

    # Length of the play
    self.time = time
    self.play_length = time

    # Frames for futur movement
    self.frame = 0

    # Yard setps
    self.yards_step = 1

    # Nearest players
    self.N_nearest = 1

    # Catch radius
    self.catch_radius = 5

    # Closest n opponents to returner per frame
    self.closest_n_players_per_frame = []

    # Centroid list
    self.centroid = []
    self.threshold = 1

    # Dictionary with possible actions and action space as well as observation space
    self.action_dictionary = {0: "Forward", 1: "Backwards", 2: "Left", 3: "Right", 4: "Forward Left", 5: "Forward Right", 6: "Backwards Left", 7: "Backwards Right"}
    self.action_space = gym.spaces.Discrete(8)
    self.observation_space = gym.spaces.Box(low = np.array([0,0]), high = np.array([120,53.3]), dtype=np.float64)

# ------------------------------------------------------
  def step(self, action):
    done = False
    info = {}
    reward = 0
    idx = 5

    # Home, Away and Returner Data----------------------------------------------
    home_team, away_team, target = data_frames_chaser(season, game_id, play_id, self.frame,idx)
    self.target =  np.array([target["x"].unique()[0],target["y"].unique()[0]])

    # Step Variations-----------------------------------------------------------
    # Reduces the play length by 1 second
    self.play_length -= 1

    # Increases number of frame
    self.frame += 1

    # Take into account boundaries of the field
    self.position = np.array([np.clip(self.position[0],0,120), np.clip(self.position[1],0,53.3)])
    self.teammates_position = np.array([np.clip(self.position[0],0,120), np.clip(self.position[1],0,53.3)])
    self.opponents_position = np.array([np.clip(self.position[0],0,120), np.clip(self.position[1],0,53.3)])

    # Distances between returner and players------------------------------------
    distances = []
    dictionary = {}

    def get_key(val):
      for key, value in dictionary.items():
          if (val == value) and (key not in self.closest_players_idx):
            return key

    for i in range(1):
      for j in range(i, len(self.players)):
        distance = ((self.position[0] - self.players[j][0])**2 + (self.position[1] - self.players[j][1])**2)**0.5
        distances.append(distance)
        dictionary[j] = distance

      n_shortest = heapq.nsmallest(self.N_nearest, distances)

      self.closest_players_idx=[]
      for i in n_shortest:
        self.closest_players_idx.append(get_key(i))

      positions = []
      for i in self.closest_players_idx:
        self.closest_players_idx_coordinates = []
        positions.append(self.target)
        self.closest_players_idx_coordinates.append(self.target[0])
        self.closest_players_idx_coordinates.append(self.target[1])
        self.closest_n_players_per_frame.append(self.closest_players_idx_coordinates)

    # Teammates and Opponents movement------------------------------------------
    self.current_teammates_positions = []
    self.current_opponent_positions = []

    for ind in home_team.index:
      self.current_teammates_positions.append(np.array([home_team["x"][ind], home_team["y"][ind]]))
    self.teammates_positions = self.current_teammates_positions

    for ind in away_team.index:
      self.current_opponent_positions.append(np.array([away_team["x"][ind], away_team["y"][ind]]))
    self.opponents_positions = self.current_opponent_positions

    self.players = self.teammates_positions + self.opponents_positions

    # Centroids
    def Centroid(list_of_points):
      x_cord = sum(list(zip(*list_of_points))[0])
      y_cord = sum(list(zip(*list_of_points))[1])
      centroid = (x_cord/len(list_of_points),y_cord/len(list_of_points))
      return centroid

    self.centroid.append(Centroid(positions))
    distance_centroid = ((self.position[0] - Centroid(positions)[0])**2 + (self.position[1] - Centroid(positions)[1])**2)**0.5

    # Agent movement and Rewards------------------------------------------------
    # Movement (x,y) if scoring in the left endzone
    if self.direction == "right":

      # Distance before
      old_dist_to_tar = np.sqrt(np.sum((self.position - self.target)**2))

      # Forward
      if action == 0:
        self.position += (self.yards_step,0)

      # Backwards
      elif action == 1:
        self.position -= (self.yards_step,0)

      # Left
      elif action == 2:
        self.position += (0,self.yards_step)

      # Right
      elif action == 3:
        self.position -= (0,self.yards_step)

      # Forwards Left
      elif action == 4:
        self.position += (self.yards_step,self.yards_step)

      # Forwards Right
      elif action == 5:
        self.position += (self.yards_step,-(self.yards_step))

      # Backwards Left
      elif action == 6:
        self.position += (-(self.yards_step),self.yards_step)

      # Backwards Right
      elif action == 7:
        self.position -= (self.yards_step,self.yards_step)

      # Rewards taking into account out of bounds and TDs

      # Distances
      disance = np.sqrt(np.sum((self.position - self.target)**2))

      if distance < old_dist_to_tar:
        self.total_reward += 5
        reward = 5
      elif distance == old_dist_to_tar:
        self.total_reward -= 2
        reward = -2
      else:
        self.total_reward -= 10
        reward = -10

      if distance <= self.catch_radius:
        self.total_reward += 20
        reward = 20

      # Out of Bounds
      elif self.position[1] <= 0:
        self.total_reward -= 3
        reward = -3
      elif self.position[1] >= 53.3:
        self.total_reward -= 3
        reward = -3

    # Movement (x,y) if scoring in the right endzone
    elif self.direction == "left":

      # Distance before
      old_dist_to_tar = np.sqrt(np.sum((self.position - self.target)**2))

      # Forward
      if action == 0:
        self.position -= (self.yards_step,0)

      # Backwards
      elif action == 1:
        self.position += (self.yards_step,0)

      # Left
      elif action == 2:
        self.position -= (0,self.yards_step)

      # Right
      elif action == 3:
        self.position += (0,self.yards_step)

      # Forwards Left
      elif action == 4:
        self.position -= (self.yards_step,self.yards_step)

      # Forwards Right
      elif action == 5:
        self.position -= (self.yards_step,-(self.yards_step))

      # Backwards Left
      elif action == 6:
        self.position -= (-(self.yards_step),self.yards_step)

      # Backwards Right
      elif action == 7:
        self.position += (self.yards_step,self.yards_step)

      # Rewards taking into account out of bounds and TDs

      # Distances
      disance = np.sqrt(np.sum((self.position - self.target)**2))

      if distance < old_dist_to_tar:
        self.total_reward += 5
        reward = 5
      elif distance == old_dist_to_tar:
        self.total_reward -= 2
        reward = -2
      else:
        self.total_reward -= 10
        reward = -10

      if distance <= self.catch_radius:
        self.total_reward += 20
        reward = 20

      # Out of Bounds
      elif self.position[1] <= 0:
        self.total_reward -= 3
        reward = -3
      elif self.position[1] >= 53.3:
        self.total_reward -= 3
        reward = -3


    # Stopping Criteria---------------------------------------------------------
    # Time expires
    if self.play_length == 0:
      print("STOP: time expired")
      done = True
      positions.append(self.position)
      for i in self.players:
        blocker_position.append(list(i))
      return self.position, reward, done, info

    # Went out of bounds
    elif (self.position[1] <= 0):
      print("STOP: Went out of bounds")
      done = True
      self.position[1] = 0
      positions.append(self.position)
      for i in self.players:
        blocker_position.append(list(i))
      return self.position, reward, done, info

    # Went out of bounds
    elif (self.position[1] >= 53.3):
      print("STOP: Went out of bounds")
      done = True
      self.position[1] = 53.3
      positions.append(self.position)
      for i in self.players:
        blocker_position.append(list(i))

      return self.position, reward, done, info

    # Did we hit an opponent
    elif any((i == self.position).all() for i in self.players):
      print("STOP: Hit and opponent")
      done = True
      positions.append(self.position)
      for i in self.players:
        blocker_position.append(i)

      return self.position, reward, done, info

    # Did we catch the returner
    elif distance < 3:
      print("STOP: Tackle returner")
      done = True
      positions.append(self.position)
      for i in self.players:
        blocker_position.append(i)

      return self.position, reward, done, info

    return self.position, reward, done, info

#---------------------------------------------------------------------------
  def add_positions(self):
    positions.append(self.position)
    for i in self.players:
      blocker_position.append(i)

# ------------------------------------------------------
  def animation(self, episode_list, mode='console'):

    # Returner df---------------------------------------------------------------
    column_values = ['x', 'y']
    returner_df = pd.DataFrame(data = episode_list[0], columns = column_values)
    returner_df['frames'] = returner_df.index

    # Blocker df ---------------------------------------------------------------
    frame_num = []
    k=0
    for i in range(1,len(blocker_episode_list[0])+1):
      frame_num.append(k)
      if i % len(self.players) == 0:
        k += 1
    blocker_df = pd.DataFrame(data = blocker_episode_list[0], columns = column_values)
    blocker_df['frames'] = frame_num

    team_name = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]*(k)
    blocker_df['teams'] = team_name

    # Distance df---------------------------------------------------------------
    frame_num_dist = []
    k_dist=0
    for i in range(1,len(self.closest_n_players_per_frame)+1):
      frame_num_dist.append(k_dist)
      if i % self.N_nearest == 0:
        k_dist += 1
    distance_df = pd.DataFrame(data=self.closest_n_players_per_frame, columns = column_values)
    distance_df['frames'] = frame_num_dist

    # Centroid df---------------------------------------------------------------
    centroid_df = pd.DataFrame(data = self.centroid, columns = column_values)
    centroid_df['frames'] = centroid_df.index

    # NFL Pitch-----------------------------------------------------------------
    fig, ax = generate_nfl_field()

    #set color of plot points to differentiate between home team, away team and football
    marker_kwargs = {'marker': 'o', 'markeredgecolor': 'black', 'linestyle': 'None'}
    returner, = ax.plot([], [], ms=12, markerfacecolor='#00b4d8',**marker_kwargs)
    centroid, = ax.plot([], [], ms=10, markerfacecolor='#F7A41A',**marker_kwargs)
    distance, = plt.plot([], [], '#FB8500', animated=True)
    fav, = ax.plot([], [], ms=10, markerfacecolor='#E01E38',**marker_kwargs)
    opp, = ax.plot([], [], ms=10, markerfacecolor='#EC4B27',**marker_kwargs)
    player_line, = ax.plot([], [], color='#48cae4')

    # Update Formula------------------------------------------------------------
    def visualize_play(frame):
      connections = []

      player = returner_df[returner_df["frames"] == frame]
      p1 = player.iloc[0, [0,1]].values.flatten().tolist()
      connections.append(p1)
      obstacle = blocker_df[blocker_df["frames"] == frame]
      center = centroid_df[centroid_df["frames"] == frame]
      distances = distance_df[distance_df["frames"] == frame]
      for i in range(self.N_nearest):
        p2 = distances.iloc[i, [0,1]].values.flatten().tolist()
        connections.append(p2)
        connections.append(p1)
      local = blocker_df[blocker_df["teams"] == 0]
      local = local[local["frames"] == frame]
      visiting = blocker_df[blocker_df["teams"] == 1]
      visiting = visiting[visiting["frames"] == frame]
      links = list(zip(*connections))
      x = links[0]
      y = links[1]

      returner.set_data(player['x'], player['y'])
      # centroid.set_data(center['x'], center['y'])
      distance.set_data(x, y)
      player_line.set_data(returner_df.iloc[:,0], returner_df.iloc[:,1])
      fav.set_data(local["x"], local["y"])
      opp.set_data(visiting["x"], visiting["y"])

    #With positional data of players and football for every frame, we animate
    animation = FuncAnimation(fig, visualize_play, frames=len(returner_df), interval=100)
    animation.save("/content/drive/MyDrive/Enhancing Performance in Special Teams within the NFL through Reinforcement Learning: A Data-Driven Approach/learned-simulation/Learned_Animation_Chaser_5.mp4")
    video =animation.to_html5_video()
    html = display.HTML(video)
    display.display(html)
    plt.close()

# ------------------------------------------------------
  def reset(self):
    self.position = self.start_position
    self.teammates_positions = self.start_teammates_positions
    self.opponents_positions = self.start_opponent_positions
    self.players = self.teammates_positions + self.opponents_positions
    self.total_reward = 0
    self.play_length = self.time
    self.frame = 0
    self.closest_n_players_per_frame = []
    self.centroid = []
    return self.position

### Environment initialization

We create all the environments and run them

In [None]:
print("This is game:", random_game)
print("This is play:", random_play)
print("This is season:", random_season)

# ------------------------

season = random_season
game_id = random_game
play_id = random_play

returner_env_random = CustomEnvironment_Returner_Random(season, game_id, play_id)
returner_env = CustomEnvironment_Returner(season, game_id, play_id)

pursuer_env_random = CustomEnvironment_pursuer_Random(season, game_id, play_id)
pursuer_env = CustomEnvironment_Pursuer(season, game_id, play_id)

In [None]:
print("A random observation:", returner_env.observation_space.sample())
print("This is the shape:", returner_env.observation_space.shape)
print("A random action:", returner_env.action_space.sample())
print("This is the shape:", returner_env.action_space.n)

## Random Simulations

Lets look at the random movement of our agents

In [None]:
# Visualization

episodes = 1
num_of_opponents = 21
episode_list = []
blocker_episode_list = []
for episode in range(1, episodes+1):
  obs = returner_env_random.reset()
  done = False
  score = 0
  positions = []
  blocker_position = []

  state = [0] * num_of_opponents
  while not done:
    returner_env_random.add_positions()
    action = returner_env_random.action_space.sample()
    obs, reward, done, info = returner_env_random.step(action)
    score += reward
  positions.pop()
  print("Episode:{} Score:{}".format(episode,score))
  episode_list.append(positions)
  blocker_episode_list.append(blocker_position)
  returner_env_random.animation(episode_list)
  episode_list= []
  blocker_episode_list= []
returner_env_random.close()

In [None]:
# Visualization

episodes = 1
num_of_opponents = 21
episode_list = []
blocker_episode_list = []
for episode in range(1, episodes+1):
  obs = pursuer_env_random.reset()
  done = False
  score = 0
  positions = []
  blocker_position = []

  state = [0] * num_of_opponents
  while not done:
    pursuer_env_random.add_positions()
    action = pursuer_env_random.action_space.sample()
    obs, reward, done, info = pursuer_env_random.step(action)
    score += reward
  positions.pop()
  print("Episode:{} Score:{}".format(episode,score))
  episode_list.append(positions)
  blocker_episode_list.append(blocker_position)
  pursuer_env_random.animation(episode_list)
  episode_list= []
  blocker_episode_list= []
pursuer_env_random.close()

## Neural Network

Lets create our model based on our environment. This model will use the **Proximal Policy Optimization** algorithm. This combines ideas from A2C (having multiple workers) and TRPO (it uses a trust region to improve the actor)

The main idea is that after an update, the new policy should be not too far from the old policy. For that, ppo uses clipping to avoid too large update.

Furthermore, for the policy we are using 'MlpPolicy', which uses a Multi-Layer Perceptron (MLP) of 2 layers each of 64 nodes

In [None]:
policy_kwargs = dict(
    optimizer_class=optim.Adam,
    net_arch=[dict(pi=[50, 25, 10], vf=[50, 25, 10])],
    activation_fn=nn.ReLU,
)

model_returner = PPO(
    policy="MlpPolicy",
    env=returner_env,
    learning_rate=3e-4,
    n_steps = 1024,
    batch_size=256,
    tensorboard_log="/content/drive/MyDrive/Enhancing Performance in Special Teams within the NFL through Reinforcement Learning: A Data-Driven Approach/logs-returner/",
    policy_kwargs=policy_kwargs,
    verbose=1,
    device="cuda")

In [None]:
policy_kwargs = dict(
    optimizer_class=optim.Adam,
    net_arch=[dict(pi=[50, 25, 10], vf=[50, 25, 10])],
    activation_fn=nn.ReLU,
)

model_chaser = PPO(
    policy="MlpPolicy",
    env=chaser_env,
    learning_rate=3e-4,
    n_steps = 1024,
    batch_size=256,
    tensorboard_log="/content/drive/MyDrive/Enhancing Performance in Special Teams within the NFL through Reinforcement Learning: A Data-Driven Approach/logs-chaser/",
    policy_kwargs=policy_kwargs,
    verbose=1,
    device="cuda")

## Learning

### Agent 1: Returner

In [None]:
model_returner.learn(total_timesteps=5000, progress_bar=True) # Ran this already

In [None]:
save_path = os.path.join("/content/drive/MyDrive/Enhancing Performance in Special Teams within the NFL through Reinforcement Learning: A Data-Driven Approach/models-returner", "Model_5")
model_returner.save(save_path)

In [None]:
del model_returner

In [None]:
learned_model_returner = PPO.load("/content/drive/MyDrive/Enhancing Performance in Special Teams within the NFL through Reinforcement Learning: A Data-Driven Approach/models-returner/Model_5.zip", returner_env)

In [None]:
episodes = 1
episode_list = []
blocker_episode_list = []
for episode in range(1, episodes+1):
  obs = returner_env.reset()
  done = False
  score = 0
  positions = []
  blocker_position = []

  while not done:
    returner_env.add_positions()
    action, _ = learned_model_returner.predict(obs)
    obs, reward, done, info = returner_env.step(action)
    score += reward
  positions.pop()
  print("Episode:{} Score:{}".format(episode,score))
  episode_list.append(positions)
  blocker_episode_list.append(blocker_position)
  returner_env.animation(episode_list)
  episode_list= []
  blocker_episode_list= []
returner_env.close()

### Agent 2: Chaser

In [None]:
model_chaser.learn(total_timesteps=5000, progress_bar=True) # Ran this already

In [None]:
save_path = os.path.join("/content/drive/MyDrive/Enhancing Performance in Special Teams within the NFL through Reinforcement Learning: A Data-Driven Approach/models-chaser", "Model_5")
model_chaser.save(save_path)

In [None]:
del model_chaser

In [None]:
learned_model_chaser = PPO.load("/content/drive/MyDrive/Enhancing Performance in Special Teams within the NFL through Reinforcement Learning: A Data-Driven Approach/models-chaser/Model_5.zip", chaser_env)

In [None]:
episodes = 1
episode_list = []
blocker_episode_list = []
for episode in range(1, episodes+1):
  obs = chaser_env.reset()
  done = False
  score = 0
  positions = []
  blocker_position = []

  while not done:
    chaser_env.add_positions()
    action, _ = learned_model_chaser.predict(obs)
    obs, reward, done, info = chaser_env.step(action)
    score += reward
  positions.pop()
  print("Episode:{} Score:{}".format(episode,score))
  episode_list.append(positions)
  blocker_episode_list.append(blocker_position)
  chaser_env.animation(episode_list)
  episode_list= []
  blocker_episode_list= []
chaser_env.close()

# Tensorboard

We look at mean reward values per episode to evaluate each agent

In [None]:
%load_ext tensorboard
%tensorboard --logdir "/content/drive/MyDrive/Enhancing Performance in Special Teams within the NFL through Reinforcement Learning: A Data-Driven Approach/logs-returner/" --port 8091

In [None]:
%load_ext tensorboard
%tensorboard --logdir "/content/drive/MyDrive/Enhancing Performance in Special Teams within the NFL through Reinforcement Learning: A Data-Driven Approach/logs-chaser/" --port 8090