In [1]:
# Import Libraries
import os
import sys
import time
import imageio
import logging
import numpy as np
import pandas as pd
import seaborn as sns
import networkx as nx
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from datetime import datetime, timedelta
from matplotlib.patches import Rectangle, Ellipse
from PIL import Image, UnidentifiedImageError


# Apply constants
plt.rcParams['figure.dpi'] = 180
plt.rcParams["figure.figsize"] = (25, 17)
colors = sns.color_palette('Set3')
sns.set_theme(rc={
    'axes.facecolor': '#FFFFFF',
    'figure.facecolor': '#FFFFFF',
    'font.sans-serif': 'Arial',
    'font.family': 'sans-serif'
})

In [2]:
# Initilize the Logger
class ColorFormatter(logging.Formatter):
    COLORS = {
        "DEBUG"    : "\033[94m",
        "INFO"     : "\033[92m",
        "WARNING"  : "\033[93m",
        "ERROR"    : "\033[91m",
        "CRICTICAL": "\033[41m",
    }
    RESET = '\033[0m'
    
    def format(self, record):
        log_message = super().format(record)
        return f"{self.COLORS.get(record.levelname, self.RESET)}{log_message}{self.RESET}"

class Logger:
    _handlers_added = False

    def __init__(self, name=__name__, level=logging.INFO, stream=sys.stdout):
        self.logger = logging.getLogger(name)
        self.logger.setLevel(level)

        if not Logger._handlers_added:
            self.handler = logging.StreamHandler(stream)
            self.formatter = ColorFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
            self.handler.setFormatter(self.formatter)
            self.logger.addHandler(self.handler)
            Logger._handlers_added = True
    
    def get_logger(self):
        """Returns the logger instance."""
        return self.logger

In [3]:

# Initilize the logger
logger = Logger().get_logger()

class NFLDataLoader:
    path = "assets/nfl-big-data-bowl-2025/"
    save_offense_path = "assets/offesnse-data/" 
    save_defense_path = "assets/defense-data/"
    
    def __init__(self):
        """Initializes the NFLDataLoader with empty data attributes."""
        self.games = None
        self.players = None
        self.plays = None
        self.tracking = None
        self.player_play = None
        logger.info("NFLDataLoader initialized.")

    def downcast_memory_usage(self, df, df_name, verbose=True):
        """
        Reduces the memory usage of a DataFrame by downcasting numerical columns.
        
        Parameters:
            df (pd.DataFrame): The DataFrame to downcast.
            df_name (str): Name of the DataFrame for logging purposes.
            verbose (bool): Whether to log the compression percentage.
        
        Returns:
            pd.DataFrame: The downcasted DataFrame.
        """
        try:
            start_mem = df.memory_usage().sum() / 1024**2
            logger.info(f"Starting memory usage for {df_name}: {start_mem:.2f} MB")

            for col in df.columns:
                dtype_name = df[col].dtype.name
                if dtype_name == 'object':
                    logger.debug(f"Column '{col}' is of type object; skipping downcast.")
                    pass
                elif dtype_name == 'bool':
                    df[col] = df[col].astype('int8')
                    logger.debug(f"Column '{col}' downcasted to int8.")
                elif dtype_name.startswith('int') or (df[col].round() == df[col]).all():
                    df[col] = pd.to_numeric(df[col], downcast='integer')
                    logger.debug(f"Column '{col}' downcasted to integer.")
                else:
                    df[col] = pd.to_numeric(df[col], downcast='float')
                    logger.debug(f"Column '{col}' downcasted to float.")

            end_mem = df.memory_usage().sum() / 1024**2
            compression = 100 * (start_mem - end_mem) / start_mem
            if verbose:
                logger.info(f"{df_name}: Compressed by {compression:.1f}%")

            return df

        except Exception as e:
            logger.error(f"Error in downcasting {df_name}: {e}")
            raise

    def load_data(self, file_name):
        """
        Loads a CSV file into a DataFrame.
        
        Parameters:
            file_name (str): The name of the CSV file to load.
        
        Returns:
            pd.DataFrame: The loaded DataFrame.
        """
        try:
            file_path = os.path.join(self.path, file_name)
            logger.info(f"Loading data from {file_path}")
            df = pd.read_csv(file_path)
            logger.info(f"Loaded {len(df)} records from {file_name}")
            return df

        except FileNotFoundError:
            logger.error(f"File {file_name} not found in path {self.path}.")
            raise
        except pd.errors.EmptyDataError:
            logger.error(f"File {file_name} is empty.")
            raise
        except Exception as e:
            logger.error(f"Error loading {file_name}: {e}")
            raise

    def load_all_data(self):
        """
        Loads all required datasets with memory optimization.
        """
        try:
            logger.info("Starting to load all datasets.")
            self.games = self.downcast_memory_usage(self.load_data("games.csv"), "Games Dataset")
            self.players = self.downcast_memory_usage(self.load_data("players.csv"), "Players Dataset")
            self.plays = self.downcast_memory_usage(self.load_data("plays.csv"), "Plays Dataset")
            self.player_play = self.downcast_memory_usage(self.load_data("player_play.csv"), "Player Play Dataset")
            logger.info("All data loaded and downcasted successfully.")

        except Exception as e:
            logger.error(f"Error loading all data: {e}")
            raise

    def load_tracking_for_game_play(self, game_id, play_id):
        """
        Loads tracking data for a specific game and play.
        
        Parameters:
            game_id (int): The ID of the game.
            play_id (int): The ID of the play.
        
        Returns:
            pd.DataFrame: The tracking data for the specified game and play.
        """
        try:
            tracking_files = [f"tracking_week_{week_num}.csv" for week_num in range(1, 10)]
            filtered_chunks = []

            logger.info(f"Loading tracking data for gameId {game_id} and playId {play_id}.")

            for file_name in tracking_files:
                file_path = os.path.join(self.path, file_name)
                logger.debug(f"Processing file {file_path}")
                for chunk in pd.read_csv(file_path, chunksize=10000):
                    filtered_chunk = chunk[(chunk['gameId'] == game_id) & (chunk['playId'] == play_id)]
                    if not filtered_chunk.empty:
                        filtered_chunks.append(filtered_chunk)
                        logger.debug(f"Found matching chunk in {file_name} with {len(filtered_chunk)} records.")

            if filtered_chunks:
                tracking_data = pd.concat(filtered_chunks, ignore_index=True)
                logger.info(f"Tracking data loaded for gameId {game_id} and playId {play_id}.")
                return tracking_data
            else:
                error_msg = f"No tracking data found for gameId {game_id} and playId {play_id}."
                logger.warning(error_msg)
                raise ValueError(error_msg)

        except FileNotFoundError as fnf_error:
            logger.error(f"Tracking file not found: {fnf_error}")
            raise
        except pd.errors.EmptyDataError:
            logger.error("One of the tracking files is empty.")
            raise
        except Exception as e:
            logger.error(f"Error loading tracking data for gameId {game_id} and playId {play_id}: {e}")
            raise

    def get_specific_game_play_data(self, game_id, play_id):
        """
        Retrieves specific game and play data, merged with player and tracking data.
        
        Parameters:
            game_id (int): The ID of the game.
            play_id (int): The ID of the play.
        
        Returns:
            pd.DataFrame: The merged data for the specific game and play.
        """
        try:
            logger.info(f"Retrieving data for gameId {game_id} and playId {play_id}.")
            tracking_data = self.load_tracking_for_game_play(game_id, play_id)
            games_and_play_df = pd.merge(self.games, self.plays, on=['gameId'], how='inner')
            logger.debug("Merged games and plays data.")
            
            plays_with_tracking = pd.merge(tracking_data, games_and_play_df, on=['gameId', 'playId'], how='inner')
            logger.debug("Merged tracking data with games and plays.")
            
            players_plays_with_tracking = pd.merge(
                plays_with_tracking, self.player_play, on=['gameId', 'playId', 'nflId'], how='left'
            )
            logger.debug("Merged player play data.")
            
            merged_data = pd.merge(
                players_plays_with_tracking, self.players, on=['nflId', 'displayName'], how='left'
            )
            logger.debug("Merged players data.")
            
            merged_data['nflId'].fillna(999999, inplace=True)
            merged_data['jerseyNumber'] = merged_data['jerseyNumber'].astype(object)
            merged_data['jerseyNumber'].fillna("", inplace=True)
            merged_data.rename(columns={'club': 'Team'}, inplace=True)
            
            logger.info(f"Data merged for gameId {game_id} and playId {play_id}.")
            return merged_data

        except Exception as e:
            logger.error(f"Error retrieving game play data for gameId {game_id} and playId {play_id}: {e}")
            raise

    def get_possession_team_data(self, possession_team, save=True):
        """
        Retrieves and optionally saves data for the possession team.
        
        Parameters:
            possession_team (str): The name of the possession team.
            save (bool): Whether to save the retrieved data to a CSV file.
        
        Returns:
            pd.DataFrame: The merged data for the possession team.
        """
        try:
            logger.info(f"Retrieving possession team data for '{possession_team}'.")
            games_and_play_df = pd.merge(self.games, self.plays, on=['gameId'], how='inner')
            filtered_plays = games_and_play_df[games_and_play_df['possessionTeam'] == possession_team]

            if filtered_plays.empty:
                error_msg = f"No plays found for possession team '{possession_team}'."
                logger.warning(error_msg)
                raise ValueError(error_msg)

            merged_chunks = []
            tracking_files = [f"tracking_week_{week_num}.csv" for week_num in range(1, 10)]

            for file_name in tracking_files:
                file_path = os.path.join(self.path, file_name)
                logger.debug(f"Processing tracking file {file_path}")
                for chunk in pd.read_csv(file_path, chunksize=10000):
                    plays_with_tracking = pd.merge(chunk, filtered_plays, on=['gameId', 'playId'], how='inner')

                    if plays_with_tracking.empty:
                        logger.debug(f"No matching plays in chunk from {file_name}.")
                        continue 

                    players_plays_with_tracking = pd.merge(
                        plays_with_tracking, self.player_play, on=['gameId', 'playId', 'nflId'], how='left'
                    )
                    logger.debug("Merged player play data.")
                    
                    merged_data = pd.merge(
                        players_plays_with_tracking, self.players, on=['nflId', 'displayName'], how='left'
                    )
                    logger.debug("Merged players data.")
                    
                    merged_data['nflId'].fillna(999999, inplace=True)
                    merged_data['jerseyNumber'] = merged_data['jerseyNumber'].astype(object)
                    merged_data['jerseyNumber'].fillna("", inplace=True)
                    merged_data.rename(columns={'club': 'Team'}, inplace=True)
                    
                    merged_chunks.append(merged_data)
                    logger.debug(f"Appended merged data chunk with {len(merged_data)} records.")

            if merged_chunks:
                full_merged_data = pd.concat(merged_chunks, ignore_index=True)
                logger.info(f"Data retrieved for possession team '{possession_team}'.")
            else:
                error_msg = f"No tracking data found for possession team '{possession_team}'."
                logger.warning(error_msg)
                raise ValueError(error_msg)

            if save:
                try:
                    dir_name = f"{possession_team}_offense_data"
                    os.makedirs(self.save_offense_path, exist_ok=True)
                    file_path = os.path.join(self.save_offense_path, f"{dir_name}.csv")
                    full_merged_data.to_csv(file_path, index=False)
                    logger.info(f"Data for possession team '{possession_team}' saved to '{file_path}'.")
                except Exception as e:
                    logger.error(f"Error saving data for possession team '{possession_team}': {e}")
                    raise

            return full_merged_data

        except Exception as e:
            logger.error(f"Error retrieving possession team data: {e}")
            raise

    def get_defense_team_data(self, defense_team, save=True):
        """
        Retrieves and optionally saves data for the defense team.
        
        Parameters:
            defense_team (str): The name of the defense team.
            save (bool): Whether to save the retrieved data to a CSV file.
        
        Returns:
            pd.DataFrame: The merged data for the defense team.
        """
        try:
            logger.info(f"Retrieving defense team data for '{defense_team}'.")
            games_and_play_df = pd.merge(self.games, self.plays, on=['gameId'], how='inner')
            filtered_plays = games_and_play_df[games_and_play_df['defensiveTeam'] == defense_team]

            if filtered_plays.empty:
                error_msg = f"No plays found for defense team '{defense_team}'."
                logger.warning(error_msg)
                raise ValueError(error_msg)

            merged_chunks = []
            tracking_files = [f"tracking_week_{week_num}.csv" for week_num in range(1, 10)]

            for file_name in tracking_files:
                file_path = os.path.join(self.path, file_name)
                logger.debug(f"Processing tracking file {file_path}")
                for chunk in pd.read_csv(file_path, chunksize=10000):
                    plays_with_tracking = pd.merge(chunk, filtered_plays, on=['gameId', 'playId'], how='inner')

                    if plays_with_tracking.empty:
                        logger.debug(f"No matching plays in chunk from {file_name}.")
                        continue 

                    players_plays_with_tracking = pd.merge(
                        plays_with_tracking, self.player_play, on=['gameId', 'playId', 'nflId'], how='left'
                    )
                    logger.debug("Merged player play data.")
                    
                    merged_data = pd.merge(
                        players_plays_with_tracking, self.players, on=['nflId', 'displayName'], how='left'
                    )
                    logger.debug("Merged players data.")
                    
                    merged_data['nflId'].fillna(999999, inplace=True)  
                    merged_data['jerseyNumber'] = merged_data['jerseyNumber'].astype(object)
                    merged_data['jerseyNumber'].fillna("", inplace=True)
                    merged_data.rename(columns={'club': 'Team'}, inplace=True)
                    
                    merged_chunks.append(merged_data)
                    logger.debug(f"Appended merged data chunk with {len(merged_data)} records.")

            if merged_chunks:
                full_merged_data = pd.concat(merged_chunks, ignore_index=True)
                logger.info(f"Data retrieved for defense team '{defense_team}'.")
            else:
                error_msg = f"No tracking data found for defense team '{defense_team}'."
                logger.warning(error_msg)
                raise ValueError(error_msg)

            if save:
                try:
                    dir_name = f"{defense_team}_defense_data"
                    os.makedirs(self.save_defense_path, exist_ok=True)
                    file_path = os.path.join(self.save_defense_path, f"{dir_name}.csv")
                    full_merged_data.to_csv(file_path, index=False)
                    logger.info(f"Data for defense team '{defense_team}' saved to '{file_path}'.")
                except Exception as e:
                    logger.error(f"Error saving data for defense team '{defense_team}': {e}")
                    raise

            return full_merged_data

        except Exception as e:
            logger.error(f"Error retrieving defense team data: {e}")
            raise

    def get_overall_plays_with_tracking_in_chunks(self, chunk_size=100000):
        """
        Retrieves all plays with tracking data in chunks.
        
        Parameters:
            chunk_size (int): The number of rows per chunk when reading tracking files.
        
        Returns:
            pd.DataFrame: The concatenated merged data for all plays.
        """
        try:
            logger.info("Retrieving overall plays with tracking data in chunks.")
            merged_chunks = []
            tracking_files = [f"tracking_week_{week_num}.csv" for week_num in range(1, 10)]
            
            for file_name in tracking_files:
                file_path = os.path.join(self.path, file_name)
                logger.debug(f"Processing tracking file {file_path}")
                for chunk in pd.read_csv(file_path, chunksize=chunk_size):
                    plays_with_tracking = pd.merge(chunk, self.plays, on=['gameId', 'playId'], how='inner')
                    logger.debug(f"Merged chunk with plays: {len(plays_with_tracking)} records.")
                    
                    players_plays_with_tracking = pd.merge(
                        plays_with_tracking, self.player_play, on=['gameId', 'playId', 'nflId'], how='left'
                    )
                    logger.debug("Merged player play data.")
                    
                    merged_data = pd.merge(
                        players_plays_with_tracking, self.players, on=['nflId', 'displayName'], how='left'
                    )
                    logger.debug("Merged players data.")
                    
                    merged_data['nflId'].fillna(999999, inplace=True)
                    merged_data['jerseyNumber'].fillna("", inplace=True)
                    merged_data.rename(columns={'club': 'Team'}, inplace=True)
                    
                    merged_chunks.append(merged_data)
                    logger.debug(f"Appended merged data chunk with {len(merged_data)} records.")

            if merged_chunks:
                full_merged_data = pd.concat(merged_chunks, ignore_index=True)
                logger.info("Successfully retrieved and merged all plays with tracking data.")
                return full_merged_data
            else:
                logger.warning("No tracking data found across all tracking files.")
                return pd.DataFrame()  # Return empty DataFrame if no data found

        except FileNotFoundError as fnf_error:
            logger.error(f"Tracking file not found: {fnf_error}")
            raise
        except pd.errors.EmptyDataError:
            logger.error("One of the tracking files is empty.")
            raise
        except Exception as e:
            logger.error(f"Error retrieving overall plays with tracking data: {e}")
            raise

    def basic_summary(self, data_frame, data_set_name):
        """
        Generates a basic summary of a DataFrame, including data types, null counts, unique counts, and sample values.
        
        Parameters:
            data_frame (pd.DataFrame): The DataFrame to summarize.
            data_set_name (str): The name of the dataset for logging purposes.
        
        Returns:
            pd.DataFrame: A summary DataFrame.
        """
        try:
            logger.info(f"Generating basic summary for dataset '{data_set_name}'.")
            summary = pd.DataFrame(data_frame.dtypes, columns=['Data Type'])
            summary = summary.reset_index()
            summary = summary.rename(columns={'index': 'Feature'})
            summary['Num of Nulls'] = data_frame.isnull().sum().values
            summary['Num of Unique'] = data_frame.nunique().values
            summary['First Value'] = data_frame.iloc[0].values
            summary['Second Value'] = data_frame.iloc[1].values
            summary['Third Value'] = data_frame.iloc[2].values
            summary['Fourth Value'] = data_frame.iloc[3].values
            logger.info(f"Basic summary generated for dataset '{data_set_name}'.")
            return summary

        except IndexError:
            logger.warning(f"DataFrame '{data_set_name}' does not have enough rows to generate sample values.")
            # Handle cases where the DataFrame has fewer than 4 rows
            summary = pd.DataFrame(data_frame.dtypes, columns=['Data Type'])
            summary = summary.reset_index()
            summary = summary.rename(columns={'index': 'Feature'})
            summary['Num of Nulls'] = data_frame.isnull().sum().values
            summary['Num of Unique'] = data_frame.nunique().values
            for i in range(4):
                column_name = f'Value {i+1}'
                if i < len(data_frame):
                    summary[column_name] = data_frame.iloc[i].values
                else:
                    summary[column_name] = None
            logger.info(f"Basic summary generated for dataset '{data_set_name}' with limited sample values.")
            return summary
        except Exception as e:
            logger.error(f"Error generating basic summary for dataset '{data_set_name}': {e}")
            raise




In [4]:

class NFLFieldVertical:
    """
    This class creates a vertical football field visualization, complete with team colors,
    end zones, yard markers, and optional logos. It supports customization of the field’s dimensions
    and the home and visitor team information.

    Attributes:
        width (float): Width of the football field in yards (default 53.3).
        height (float): Height of the football field in yards (default 120).
        home_team (str): Abbreviation of the home team.
        home_team_color (str): Color representing the home team.
        visitor_team (str): Abbreviation of the visiting team.
        visitor_team_color (str): Color representing the visitor team.
        logo_abbr (str): Abbreviation of the team logo to load (optional).
        fig, ax: Matplotlib figure and axis used for plotting the field.
    """

    def __init__(self, width=53.3, height=120, home_team="", home_team_color="", 
                 visitor_team="", visitor_team_color="", logo_abbr=""):
        """Initializes the NFL field with team details, dimensions, and colors."""
        from utils.helpers import Logger
        self.logger = Logger().get_logger()
        self.width = width
        self.height = height
        self.home_team = home_team
        self.home_team_color = home_team_color
        self.visitor_team = visitor_team
        self.visitor_team_color = visitor_team_color

        # Dictionary mapping team abbreviations to their full names.
        self.team_names = {
            'LA': 'Los Angeles Rams', 'ATL': 'Atlanta Falcons', 'CAR': 'Carolina Panthers',
            'CHI': 'Chicago Bears', 'CIN': 'Cincinnati Bengals', 'DET': 'Detroit Lions',
            'HOU': 'Houston Texans', 'MIA': 'Miami Dolphins', 'NYJ': 'New York Jets',
            'WAS': 'Washington Commanders', 'ARI': 'Arizona Cardinals',
            'LAC': 'Los Angeles Chargers', 'MIN': 'Minnesota Vikings', 
            'TEN': 'Tennessee Titans', 'DAL': 'Dallas Cowboys', 'SEA': 'Seattle Seahawks',
            'KC': 'Kansas City Chiefs', 'BAL': 'Baltimore Ravens', 'CLE': 'Cleveland Browns',
            'JAX': 'Jacksonville Jaguars', 'NO': 'New Orleans Saints', 'NYG': 'New York Giants',
            'PIT': 'Pittsburgh Steelers', 'SF': 'San Francisco 49ers', 'DEN': 'Denver Broncos',
            'LV': 'Las Vegas Raiders', 'GB': 'Green Bay Packers', 'BUF': 'Buffalo Bills',
            'PHI': 'Philadelphia Eagles', 'IND': 'Indianapolis Colts', 'NE': 'New England Patriots',
            'TB': 'Tampa Bay Buccaneers'
        }

        # Create the football field visualization.
        self.fig, self.ax = self.create_pitch()

    def create_pitch(self):
        """
        Creates the football field visualization, including end zones, yard markers, and optional logos.
        Returns:
            fig, ax: Matplotlib figure and axis for further customization or saving.
        """
        fig, ax = plt.subplots()
        ax.set_xlim(0, self.width)
        ax.set_ylim(0, self.height + 1)
        ax.axis('off')  # Remove axis labels.

        # Draw the field background.
        background = Rectangle((0, 0), self.width, self.height, linewidth=1, 
                               facecolor='#97BC62FF', edgecolor='black', capstyle='round')
        ax.add_patch(background)

        # Draw horizontal yard lines, alternating between solid and dashed.
        for i in range(21):
            style = '--' if i % 2 != 0 else '-'
            ax.plot([0, self.width], [10 + 5 * i] * 2, c="white", linestyle=style, lw=1, alpha=0.8)

        # Draw yard numbers on both sides of the field.
        for units in range(10, 100, 10):
            units_text = units if units <= 50 else 100 - units
            ax.text(self.width - 7.5, 10 + units - 1.1, units_text, size=18, c="white", weight="bold", alpha=0.8)
            ax.text(7.5, 10 + units - 1.1, units_text, size=18, c="white", weight="bold", alpha=0.8)

        # Draw small tick marks along the sidelines.
        for x in range(20):
            for j in range(1, 5):
                ax.plot([1, 3], [10 + x * 5 + j] * 2, color="white", lw=1, alpha=0.8)
                ax.plot([self.width - 1, self.width - 3], [10 + x * 5 + j] * 2, color="white", lw=1, alpha=0.8)

        # Draw tick marks near the center of the field.
        gap = 5  
        center_x1, center_x2 = (self.width / 2) - gap, (self.width / 2) + gap
        for y in range(20):
            for j in range(1, 5):
                ax.plot([center_x1, center_x1 + 1], [10 + y * 5 + j] * 2, color="white", lw=1, alpha=0.8)
                ax.plot([center_x2, center_x2 - 1], [10 + y * 5 + j] * 2, color="white", lw=1, alpha=0.8)

        # Add team names and end zones.
        visitor_full_name = self.team_names.get(self.visitor_team, self.visitor_team)
        ax.text(self.width / 2, 5.5, self.visitor_team, size=30, c="white", weight="bold", ha='center')
        ax.text(self.width / 2, 1, visitor_full_name, size=30, c="white", weight="bold", ha='center')
        ax.add_patch(Rectangle((0, 0), self.width, 10, ec="black", fc=self.visitor_team_color, lw=1))

        home_full_name = self.team_names.get(self.home_team, self.home_team)
        ax.text(self.width / 2, self.height - 5, self.home_team, size=30, c="white", weight="bold", ha='center')
        ax.text(self.width / 2, self.height - 9, home_full_name, size=30, c="white", weight="bold", ha='center')
        ax.add_patch(Rectangle((0, self.height - 10), self.width, 10, ec="black", fc=self.home_team_color, lw=1))

        # Add the team logo at the center (if available).
        logo_path = os.path.join('assets/logo', f'{self.home_team}.png')
        if os.path.exists(logo_path):
            try:
                self._add_logo(ax, logo_path)
            except (UnidentifiedImageError, OSError) as e:
                self.logger.error(f"Error loading logo '{logo_path}': {e}")
        else:
            self.logger.warning(f"Logo file not found: {logo_path}")

        return fig, ax

    def _add_logo(self, ax, logo_path):
        """Adds the team logo at the center of the field."""
        logo = Image.open(logo_path).rotate(90, expand=True)
        width, height = logo.size
        square_size = max(width, height)
        square_logo = Image.new('RGBA', (square_size, square_size), (255, 255, 255, 0))
        square_logo.paste(logo, ((square_size - width) // 2, (square_size - height) // 2))
        ax.imshow(np.array(square_logo), extent=[(self.width - 10) / 2, (self.width + 10) / 2,
                                                 (self.height - 10) / 2, (self.height + 10) / 2], 
                  aspect='auto', zorder=10)

    def save_pitch(self, folder_path, filename='pitch.png'):
        """
        Saves the football field visualization to the specified folder.
        Args:
            folder_path (str): Directory to save the pitch image.
            filename (str): Name of the saved image file (default 'pitch.png').
        """
        try:
            if not os.path.exists(folder_path):
                os.makedirs(folder_path)
            file_path = os.path.join(folder_path, filename)
            self.fig.savefig(file_path, bbox_inches='tight')
            self.logger.info(f"Pitch saved successfully at: {file_path}")
        except Exception as e:
            self.logger.error(f"Failed to save pitch: {e}")
        finally:
            plt.close(self.fig)



In [5]:
class SingleGamePlayExtractor:
    """Class to extract and analyze data for a specific game play."""
    
    def __init__(self, df):
        """
        Initialize the extractor with the provided DataFrame.

        Parameters:
        df (pd.DataFrame): DataFrame containing game and play data.
        """
        self.df = df
        logging.info("SingleGamePlayExtractor initialized with provided DataFrame.")

    def extract_game_play_data(self, gameId, playId):
        """
        Extract data for a specific game and play based on gameId and playId.

        Parameters:
        gameId (int): ID of the game.
        playId (int): ID of the play.

        Returns:
        tuple: A DataFrame with the specific game-play data and a dictionary of play information.

        Raises:
        ValueError: If no data is available for the provided gameId and playId.
        """
        logging.info(f"Extracting data for gameId: {gameId} and playId: {playId}.")
        game_play_df = self.df[(self.df['gameId'] == gameId) & (self.df['playId'] == playId)]

        if game_play_df.empty:
            logging.error(f"No data found for gameId: {gameId} and playId: {playId}.")
            raise ValueError("No data available for the provided gameId and playId.")

        play_info = {
            'defensive_team': game_play_df.defensiveTeam.values[0],
            'possession_team': game_play_df.possessionTeam.values[0],
            'unique_frame_ids': game_play_df['frameId'].unique(),
            'play_description': game_play_df.playDescription.values[0],
            'offense_formation': game_play_df.offenseFormation.values[0],
            'line_of_scrimmage': game_play_df.absoluteYardlineNumber.values[0],
            'down': game_play_df.down.values[0],
            'quarter': game_play_df.quarter.values[0],
            'play_direction': game_play_df.playDirection.values[0],
            'yards_to_go': game_play_df.yardsToGo.values[0],
            'pre_snap_home_score': game_play_df.preSnapHomeScore.values[0],
            'pre_snap_visitor_score': game_play_df.preSnapVisitorScore.values[0],
            'home_team_abbr': game_play_df.homeTeamAbbr.values[0],
            'visitor_team_abbr': game_play_df.visitorTeamAbbr.values[0],
            'game_lock': game_play_df.gameClock.values[0],
            'time': game_play_df['time'].unique()
        }

        logging.info(f"Successfully extracted data for gameId: {gameId} and playId: {playId}.")
        return game_play_df, play_info

    def determine_first_down_marker(self, play_info):
        """
        Determine the position of the first down marker based on play direction.

        Parameters:
        play_info (dict): Dictionary containing play information.

        Returns:
        int: The position of the first down marker.
        """
        logging.info(f"Calculating first down marker for play with direction: {play_info['play_direction']}.")

        if play_info['play_direction'] == "left":
            marker = play_info['line_of_scrimmage'] - play_info['yards_to_go']
        else:
            marker = play_info['line_of_scrimmage'] + play_info['yards_to_go']

        logging.info(f"First down marker calculated as: {marker}.")
        return marker



In [6]:

class NFLPlotVisualizeUtils:    
    @staticmethod
    def calculate_distance(x1, y1, x2, y2):
        """
        Calculate the Euclidean distance between two points.
        
        Parameters:
            x1 (float): X-coordinate of the first point.
            y1 (float): Y-coordinate of the first point.
            x2 (float): X-coordinate of the second point.
            y2 (float): Y-coordinate of the second point.
        
        Returns:
            float: The Euclidean distance between the two points.
        """
        try:
            return np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
        except Exception as e:
            logger = Logger().get_logger()
            logger.error(f"Error calculating distance: {e}")
            return None

    @staticmethod
    def angle_with_x_axis(x, y):
        """
        Calculate the angle of the line connecting the origin to the point (x, y) with respect to the x-axis.
        
        Parameters:
            x (float): X-coordinate of the point.
            y (float): Y-coordinate of the point.
        
        Returns:
            float: Angle in degrees with respect to the x-axis.
        """
        try:
            return np.degrees(np.arctan2(y, x))
        except Exception as e:
            logger = Logger().get_logger()
            logger.error(f"Error calculating angle with x-axis: {e}")
            return None

    @staticmethod
    def angle_in_32_segments(angle):
        """
        Convert an angle to a segment from 0 to 31 (32 segments total).
        
        Parameters:
            angle (float): Angle in degrees.
        
        Returns:
            int: Segment number corresponding to the angle.
        """
        if np.isnan(angle):
            return 0 
        angle = angle % 360
        return round(angle / 11.25)
    
    @staticmethod
    def assign_direction(angle):
        """
        Assign a compass direction based on the angle.
        
        Parameters:
            angle (float): Angle in degrees.
        
        Returns:
            str: Compass direction (e.g., "North", "Southwest").
        """
        directions = [
            "North", "North by East", "North-Northeast", "Northeast by North",
            "Northeast", "Northeast by East", "East-Northeast", "East by North",
            "East", "East by South", "East-Southeast", "Southeast by East",
            "Southeast", "Southeast by South", "South-Southeast", "South by East",
            "South", "South by West", "South-Southwest", "Southwest by South",
            "Southwest", "Southwest by West", "West-Southwest", "West by South",
            "West", "West by North", "West-Northwest", "Northwest by West",
            "Northwest", "Northwest by North", "North-Northwest", "North by West",
            "North"  
        ]
        
        bounds = [i * 11.25 for i in range(33)]

        for i in range(len(bounds) - 1):
            lower_bound = bounds[i]
            upper_bound = bounds[i + 1]
            if lower_bound <= angle < upper_bound:
                return directions[i]

        return None
    
    @staticmethod
    def calculate_dx_dy_arrow(x, y, angle, speed, multiplier):
        """
        Calculate the change in x and y (dx, dy) for an arrow based on its angle and speed.
        
        Parameters:
            x (float): X-coordinate of the starting point.
            y (float): Y-coordinate of the starting point.
            angle (float): Angle in degrees.
            speed (float): Speed (used for scaling).
            multiplier (float): Multiplier for adjusting arrow length.
        
        Returns:
            tuple: (dx, dy) change in coordinates for the arrow.
        """
        fixed_length = 0.5 * multiplier  # Adjust arrow length by multiplier
        angle_radians = np.radians(angle)
        
        try:
            if angle <= 90:
                dx = np.sin(angle_radians) * fixed_length
                dy = np.cos(angle_radians) * fixed_length
            elif angle <= 180:
                angle_radians = np.radians(angle - 90)
                dx = np.sin(angle_radians) * fixed_length
                dy = -np.cos(angle_radians) * fixed_length
            elif angle <= 270:
                angle_radians = np.radians(angle - 180)
                dx = -np.sin(angle_radians) * fixed_length
                dy = -np.cos(angle_radians) * fixed_length
            else: 
                angle_radians = np.radians(360 - angle)
                dx = -np.sin(angle_radians) * fixed_length
                dy = np.cos(angle_radians) * fixed_length
            return dx, dy
        except Exception as e:
            logger = Logger().get_logger()
            logger.error(f"Error calculating dx and dy: {e}")
            return None, None

    @staticmethod
    def calculate_relative_velocity(speed1, speed2, dir1, dir2):
        """
        Calculate the relative velocity between two moving entities based on their speeds and directions.
        
        Parameters:
            speed1 (float): Speed of the first entity.
            speed2 (float): Speed of the second entity.
            dir1 (float): Direction of the first entity in degrees.
            dir2 (float): Direction of the second entity in degrees.
        
        Returns:
            float: Magnitude of the relative velocity.
        """
        try:
            # Convert directions to radians
            theta1 = np.radians(dir1)
            theta2 = np.radians(dir2)

            # Calculate velocity components
            vx1 = speed1 * np.cos(theta1)
            vy1 = speed1 * np.sin(theta1)
            vx2 = speed2 * np.cos(theta2)
            vy2 = speed2 * np.sin(theta2)

            # Calculate relative velocity components
            rel_vx = vx1 - vx2
            rel_vy = vy1 - vy2

            # Calculate magnitude of relative velocity
            return np.sqrt(rel_vx**2 + rel_vy**2)
        except Exception as e:
            logger = Logger().get_logger()
            logger.error(f"Error calculating relative velocity: {e}")
            return None

    @staticmethod
    def calculate_fixed_arrow(angle, length=0.38):
        """
        Calculate the change in coordinates (dx, dy) for a fixed-length arrow based on its angle.
        
        Parameters:
            angle (float): Angle in degrees.
            length (float): Length of the arrow.
        
        Returns:
            tuple: (dx, dy) change in coordinates for the arrow.
        """
        try:
            angle_radians = np.radians(angle)  

            dx = np.cos(angle_radians) * length
            dy = np.sin(angle_radians) * length

            return dx, dy
        except Exception as e:
            logger = Logger().get_logger()
            logger.error(f"Error calculating fixed arrow: {e}")
            return None, None


    @staticmethod
    def calculate_time_to_contact(distance, rel_velocity):
        """
        Calculate the time to contact based on the distance and relative velocity.
        
        Parameters:
            distance (float): Distance to the target.
            rel_velocity (float): Relative velocity towards the target.
        
        Returns:
            float: Time to contact in seconds, or np.inf if the relative velocity is zero or negative.
        """
        try:
            return distance / rel_velocity if rel_velocity > 0 else np.inf
        except Exception as e:
            logger = Logger().get_logger()
            logger.error(f"Error calculating time to contact: {e}")
            return None

    @staticmethod
    def calculate_angle_of_approach(dir1, dir2):
        """
        Calculate the angle of approach between two directions.
        
        Parameters:
            dir1 (float): Direction of the first entity in degrees.
            dir2 (float): Direction of the second entity in degrees.
        
        Returns:
            float: Angle in degrees between the two directions.
        """
        try:
            # Convert directions to radians and calculate vectors
            vector1 = np.array([np.cos(np.radians(dir1)), np.sin(np.radians(dir1))])
            vector2 = np.array([np.cos(np.radians(dir2)), np.sin(np.radians(dir2))])

            # Calculate the angle between the vectors
            angle = np.degrees(np.arctan2(vector2[1], vector2[0]) - np.arctan2(vector1[1], vector1[0]))
            return angle % 360  
        except Exception as e:
            logger = Logger().get_logger()
            logger.error(f"Error calculating angle of approach: {e}")
            return None
    
    @staticmethod
    def resize_for_video(image, target_size):
        """
        Resize an image to the specified target size for video presentation.

        Parameters:
            image (PIL.Image.Image): The image to be resized.
            target_size (tuple): A tuple representing the target size (width, height) in pixels.

        Returns:
            PIL.Image.Image: The resized image.
        """
        return image.resize(target_size, Image.LANCZOS)

    @staticmethod
    def initialize_base_plot(home_team_abbr, home_team_color, visitor_team_abbr, visitor_team_color):
        """
        Initialize the base plot for the NFL field with the specified teams and their colors.

        Parameters:
            home_team_abbr (str): The abbreviation of the home team (e.g., "NE" for New England Patriots).
            home_team_color (str): The color code for the home team (e.g., "#FF0000" for red).
            visitor_team_abbr (str): The abbreviation of the visitor team (e.g., "KC" for Kansas City Chiefs).
            visitor_team_color (str): The color code for the visitor team (e.g., "#0000FF" for blue).

        Returns:
            None
        """
        # Store team abbreviations and colors for plotting
        home_team_abbr = home_team_abbr
        home_team_color = home_team_color
        visitor_team_abbr = visitor_team_abbr
        visitor_team_color = visitor_team_color
        
        # Create an NFL field pitch with the specified dimensions and team details
        pitch = NFLFieldVertical(
            width=53.3,  # Width of the field in meters
            height=120,  # Height of the field in meters
            home_team=home_team_abbr,
            home_team_color=home_team_color,
            visitor_team=visitor_team_abbr,
            visitor_team_color=visitor_team_color
        )
        
        # Save the pitch as an image in the specified folder
        pitch.save_pitch(folder_path='assets', filename='vertical_pitch.png')
        
        # Pause the execution for a specified duration to ensure the pitch is saved
        time.sleep(7)
    
    

In [7]:

class NFLPlotVisualizer:    
    def __init__(self, pitch_image_path):
        """
        Initialize the NFLPlotVisualizer with the path to the pitch image.

        Parameters:
            pitch_image_path (str): The file path to the image of the pitch.

        Returns:
            None
        """
        self.pitch_image_path = pitch_image_path
        try:
            self.pitch_img = Image.open(self.pitch_image_path)
        except Exception as e:
            logging.error(f"Error loading image at {self.pitch_image_path}: {e}")
            raise

    def initialize_plot(self, line_of_scrimmage, first_down_marker):
        """
        Create a new plot with the NFL pitch image and draw the line of scrimmage and first down marker.

        Parameters:
            line_of_scrimmage (float): The y-coordinate of the line of scrimmage.
            first_down_marker (float): The y-coordinate of the first down marker.

        Returns:
            tuple: A tuple containing the figure and axes objects for further customization.
        """
        try:
            fig, ax = plt.subplots()
            ax.imshow(self.pitch_img, extent=[0, 53.3, 0, 120], aspect='auto')
            ax.axhline(y=line_of_scrimmage, color='#00539CFF', linestyle='-', linewidth=4)
            ax.axhline(y=first_down_marker, color='#FDD20EFF', linestyle='-', linewidth=4)
            return fig, ax
        except Exception as e:
            logging.error(f"Error initializing plot: {e}")
            raise

    def process_frames(self, frames):
        """
        Resize a list of image frames to ensure they are all the same size for video processing.

        Parameters:
            frames (list): A list of PIL Image objects representing video frames.

        Returns:
            list: A list of resized PIL Image objects.
        """
        try:
            first_width, first_height = frames[0].size
            target_width = (first_width + 15) // 16 * 16
            target_height = (first_height + 15) // 16 * 16
            target_size = (target_width, target_height)
            resized_frames = [NFLPlotVisualizeUtils.resize_for_video(frame, target_size) for frame in frames]
            return resized_frames
        except Exception as e:
            logging.error(f"Error processing frames: {e}")
            raise

    def add_player_scatter(self, ax, x, y, jersey_number, team_color, label_prefix=''):
        """
        Add a scatter plot for a player at the specified coordinates, with the jersey number displayed.

        Parameters:
            ax (matplotlib.axes.Axes): The axes on which to draw the scatter plot.
            x (float): The x-coordinate of the player.
            y (float): The y-coordinate of the player.
            jersey_number (int): The jersey number of the player.
            team_color (str): The color to use for the player's marker.
            label_prefix (str, optional): Optional prefix for the label. Defaults to ''.

        Returns:
            None
        """
        try:
            ax.scatter(y, x, color=team_color, s=1000, edgecolors='k', label=f"{label_prefix}")
            ax.text(y, x, str(jersey_number), color='white', ha='center', va='center', fontsize=12, weight='bold')
        except Exception as e:
            logging.error(f"Error adding player scatter: {e}")
            raise

    def add_player_distance_line(self, ax, distance, x1, x2, y1, y2):
        """
        Draw a line between two points representing players, varying the line style based on distance.

        Parameters:
            ax (matplotlib.axes.Axes): The axes on which to draw the line.
            distance (float): The distance between the two points.
            x1 (float): The y-coordinate of the first point.
            x2 (float): The y-coordinate of the second point.
            y1 (float): The x-coordinate of the first point.
            y2 (float): The x-coordinate of the second point.

        Returns:
            None
        """
        try:
            if 5 < distance < 7:
                ax.plot([y1, y2], [x1, x2], color='k', linestyle='-', linewidth=3, alpha=0.9)
            elif distance < 5:
                ax.plot([y1, y2], [x1, x2], color='k', linestyle='--', linewidth=2.5, alpha=0.6)

            if distance < 5 or 5 < distance < 7:
                mid_x = (x1 + x2) / 2
                mid_y = (y1 + y2) / 2

                ax.text(mid_y, mid_x, f'{int(distance)} yd',
                        fontsize=15, ha='center', va='center',
                        bbox=dict(facecolor='white', alpha=0.5, boxstyle='round,pad=0.3'))
        except Exception as e:
            logging.error(f"Error adding player distance line: {e}")
            raise

    def add_player_moving_looking_direction(self, ax, x, y, dx, dy, color):
        """
        Draw an arrow to represent a player's movement direction on the plot.

        Parameters:
            ax (matplotlib.axes.Axes): The axes on which to draw the arrow.
            x (float): The y-coordinate of the player.
            y (float): The x-coordinate of the player.
            dx (float): The change in x-coordinate (movement).
            dy (float): The change in y-coordinate (movement).
            color (str): The color of the arrow.

        Returns:
            None
        """
        try:
            ax.arrow(y, x, dx, dy, color=color, ec='black', width=0.25, head_width=0.4, head_length=0.25, shape='full', alpha=0.7)
        except Exception as e:
            logging.error(f"Error adding player moving direction arrow: {e}")
            raise

    def add_player_scatter_with_arrows(self, ax, x, y, jersey_number, team_color, move_color, look_color, moving_angle, looking_angle, length=0.32, label_prefix=''):
        """
        Add a scatter plot for a player with arrows indicating movement and looking direction.

        Parameters:
            ax (matplotlib.axes.Axes): The axes on which to draw the scatter plot and arrows.
            x (float): The y-coordinate of the player.
            y (float): The x-coordinate of the player.
            jersey_number (int): The jersey number of the player.
            team_color (str): The color of the player's marker.
            move_color (str): The color of the movement arrow.
            look_color (str): The color of the looking direction arrow.
            moving_angle (float): The angle of movement in degrees.
            looking_angle (float): The angle of looking direction in degrees.
            length (float, optional): The length of the arrows. Defaults to 0.32.
            label_prefix (str, optional): Optional prefix for the label. Defaults to ''.

        Returns:
            None
        """
        try:
            dx_m, dy_m = NFLPlotVisualizeUtils.calculate_fixed_arrow(moving_angle, length=length)
            dx_l, dy_l = NFLPlotVisualizeUtils.calculate_fixed_arrow(looking_angle, length=length)

            ax.arrow(y, x, dx_l, dy_l, color=look_color, width=0.1, head_width=0.5, head_length=0.5, alpha=0.8, ec='black', overhang=0.1)
            ax.arrow(y, x, dx_m, dy_m, color=move_color, width=0.1, head_width=0.5, head_length=0.5, alpha=0.8, ec='black', overhang=0.1)

            ax.scatter(y, x, color=team_color, s=1100, edgecolors='black', label=f"{label_prefix}")
            ax.text(y, x, str(jersey_number), color='white', ha='center', va='center', fontsize=12, weight='bold')
        except Exception as e:
            logging.error(f"Error adding player scatter with arrows: {e}")
            raise

    def get_down_suffix(self, down):
        """
        Get the ordinal suffix for a given down (e.g., 1st, 2nd, 3rd, 4th).

        Parameters:
            down (int): The down number.

        Returns:
            str: The ordinal suffix for the down number.
        """
        try:
            if down == 1:
                return "st"
            elif down == 2:
                return "nd"
            elif down == 3:
                return "rd"
            else:
                return "th"
        except Exception as e:
            logging.error(f"Error getting down suffix for {down}: {e}")
            raise

    def add_legends(self, ax, top_handles, top_labels):
        """
        Add a legend to the plot.

        Parameters:
            ax (matplotlib.axes.Axes): The axes on which to add the legend.
            top_handles (list): A list of handles for the legend entries.
            top_labels (list): A list of labels corresponding to the handles.

        Returns:
            None
        """
        try:
            top_legend = ax.legend(
                title="Team and Game Situation",
                handles=top_handles,
                labels=top_labels,
                loc='center left',
                bbox_to_anchor=(1, 0.94),
                prop={'size': 16},
                ncol=2,
                title_fontsize=22
            )
            ax.add_artist(top_legend)
            logging.info("Legend added successfully.")
        except Exception as e:
            logging.error(f"Error adding legend: {e}")
            raise

    def plot_network_graph(self, defensive_players_df, node_color):
        """
        Plot a network graph of defensive players based on their positions, 
        adding edges between players that are within a certain distance.

        Parameters:
            defensive_players_df (DataFrame): A DataFrame containing player information 
                                                with columns 'displayName', 'x', and 'y'.
            node_color (str): The color to use for the nodes in the network graph.

        Returns:
            None
        """
        try:
            # Initialize a new graph
            G = nx.Graph()

            # Add nodes to the graph using player display names and their positions
            for _, row in defensive_players_df.iterrows():
                G.add_node(row['displayName'], pos=(row['y'], row['x']))  # 'y' is x-coordinate and 'x' is y-coordinate

            # Add edges between nodes that are within a distance of 5 units
            for node1 in G.nodes:
                for node2 in G.nodes:
                    if node1 != node2:  # Avoid self-loops
                        x1, y1 = G.nodes[node1]['pos']  # Get position of node1
                        x2, y2 = G.nodes[node2]['pos']  # Get position of node2
                        distance = NFLPlotVisualizeUtils.calculate_distance(x1, y1, x2, y2)  # Calculate the distance
                        if distance < 5:  # Check if within distance threshold
                            G.add_edge(node1, node2, weight=1/distance)  # Add edge with weight based on distance

            # Get positions of nodes for plotting
            pos = nx.get_node_attributes(G, 'pos')

            # Draw the network graph without labels, with specified node size and color
            nx.draw_networkx(G, pos, with_labels=False, node_size=1100, font_size=12, font_weight='bold', node_color=node_color)

            # Identify cycles in the graph for additional visual representation
            cycles = list(nx.simple_cycles(G))  # Find simple cycles in the graph
            nx.draw_networkx_nodes(G, pos, node_size=1100, node_color=node_color, edgecolors="black", alpha=0.2)  # Draw nodes

            # Draw edges of cycles with specific styling
            nx.draw_networkx_edges(G, pos, edgelist=cycles, edge_color='black', width=2, alpha=0.4)

            # Draw a filled polygon around each cycle for visual emphasis
            for cycle in cycles:
                cycle_nodes = cycle + [cycle[0]]  # Close the cycle
                cycle_pos = np.array([pos[node] for node in cycle_nodes])  # Get the positions of the cycle nodes
                polygon = plt.Polygon(cycle_pos, closed=True, fill=True, color='#FFCCCB', ec="black", alpha=0.1)  # Create a polygon
                plt.gca().add_patch(polygon)  # Add the polygon to the current axes

            logging.info("Network graph plotted successfully.")
        except Exception as e:
            logging.error(f"Error plotting network graph: {e}")
            raise

    def save_plot_to_image(self, fig, save_path):
        """
        Save the matplotlib figure to the specified path as a PNG file.

        Parameters:
            fig (matplotlib.figure.Figure): The figure object to be saved.
            save_path (str): The file path where the image will be saved.

        Returns:
            None
        """
        try:
            fig.savefig(save_path, format='png', bbox_inches='tight')  # Save the figure to the specified path
            plt.close(fig)
            logging.info(f"Figure saved to {save_path} successfully.")
        except Exception as e:
            logging.error(f"Error saving figure to {save_path}: {e}")
            raise

        

In [8]:
class GamePlayVisualizer:    
    def __init__(self, df, pitch_image_path, assets_dir='assets/game-play/'):
        """
        Initializes the GameVisualizer with the provided game data, pitch image path, 
        and optional assets directory.

        Parameters:
        - df (DataFrame): DataFrame containing game play data.
        - pitch_image_path (str): Path to the pitch image used for visualization.
        - assets_dir (str): Directory path for storing generated assets. Default is 'assets/game-play/'.
        """
        self.df = df
        self.utils = NFLPlotVisualizeUtils()
        self.data_processor = SingleGamePlayExtractor(df)
        self.visualizer = NFLPlotVisualizer(pitch_image_path)
        self.assets_dir = assets_dir 
    

    def create_directory(self, gameId, playId):
        """
        Creates a directory structure based on the provided gameId and playId.

        Parameters:
        - gameId (int): The unique ID for the game.
        - playId (int): The unique ID for the play within the game.

        Returns:
        - str: The path to the created directory.
        """
        directory_path = os.path.join(self.assets_dir, f"game_{gameId}", f"play_{playId}")
        os.makedirs(directory_path, exist_ok=True)
        return directory_path

    def get_game_clock_at(self, timestamp, start_time, game_clock_timedelta):
        """
        Calculates the remaining game clock time at a specific timestamp.

        Parameters:
        - timestamp (str): The current timestamp in ISO format.
        - start_time (datetime): The starting time of the game.
        - game_clock_timedelta (timedelta): The total game clock duration.

        Returns:
        - str: The remaining game time formatted as MM:SS.
        """
        current_time = datetime.fromisoformat(timestamp)
        elapsed_time = current_time - start_time
        remaining_time = game_clock_timedelta - elapsed_time

        if remaining_time.total_seconds() < 0:
            remaining_time = timedelta(0)

        minutes, seconds = divmod(int(remaining_time.total_seconds()), 60)
        return f"{minutes:02}:{seconds:02}"

    def plot_game_in_matplotlib(self, gameId, playId):
        """
        Plots the game play based on the provided gameId and playId using Matplotlib.

        Parameters:
        - gameId (int): The unique ID for the game to be visualized.
        - playId (int): The unique ID for the play within the game.

        Raises:
        - ValueError: If no data is available for the provided gameId and playId.
        """
        home_visitor_team_colors = {
            'LA': "#B3995D", 'ATL': "#A5ACAF", 'CAR': "#0085CA", 'CHI': "#DD4814", 
            'CIN': "#FB4F14", 'DET': "#0076B6", 'HOU': "#03202F", 'MIA': "#008E97", 
            'NYJ': "#203731", 'WAS': "#773141", 'ARI': "#97233F", 'LAC': "#FFB81C", 
            'MIN': "#4F2683", 'TEN': "#0C2340", 'DAL': "#869397", 'SEA': "#69BE28", 
            'KC': "#E31837", 'BAL': "#241773", 'CLE': "#FF3C00", 'JAX': "#006778", 
            'NO': "#D3BC8D", 'NYG': "#0B2265", 'PIT': "#FFB81C", 'SF': "#B3995D", 
            'DEN': "#FB4F14", 'LV': "#000000", 'GB': "#203731", 'BUF': "#00338D", 
            'PHI': "#004C54", 'IND': "#002C5F", 'NE': "#002244", 'TB': "#D50A0A"
        }

        team_colors = {
            'LA': "#B3995D", 'ATL': "#A5ACAF", 'CAR': "#0085CA", 'CHI': "#DD4814", 
            'CIN': "#FB4F14", 'DET': "#0076B6", 'HOU': "#03202F", 'MIA': "#008E97", 
            'NYJ': "#203731", 'WAS': "#773141", 'ARI': "#97233F", 'LAC': "#FFB81C", 
            'MIN': "#4F2683", 'TEN': "#0C2340", 'DAL': "#869397", 'SEA': "#69BE28", 
            'KC': "#E31837", 'BAL': "#241773", 'CLE': "#FF3C00", 'JAX': "#006778", 
            'NO': "#D3BC8D", 'NYG': "#0B2265", 'PIT': "#FFB81C", 'SF': "#B3995D", 
            'DEN': "#FB4F14", 'LV': "#000000", 'GB': "#203731", 'BUF': "#00338D", 
            'PHI': "#004C54", 'IND': "#002C5F", 'NE': "#002244", 'TB': "#D50A0A"
        }
        move_colors = {
            'LA': "#002244", 'ATL': "#000000", 'CAR': "#101820", 'CHI': "#0B162A", 
            'CIN': "#000000", 'DET': "#B0B7BC", 'HOU': "#A71930", 'MIA': "#F58220", 
            'NYJ': "#000000", 'WAS': "#FFB612", 'ARI': "#000000", 'LAC': "#FFB81C", 
            'MIN': "#FFC62F", 'TEN': "#4B92DB", 'DAL': "#041E42", 'SEA': "#A5ACAF", 
            'KC': "#FFB81C", 'BAL': "#000000", 'CLE': "#311D00", 'JAX': "#9F792C", 
            'NO': "#000000", 'NYG': "#A71930", 'PIT': "#101820", 'SF': "#AA0000", 
            'DEN': "#002244", 'LV': "#A5ACAF", 'GB': "#FFB81C", 'BUF': "#C60C30", 
            'PHI': "#A5ACAF", 'IND': "#A5ACAF", 'NE': "#C60C30", 'TB': "#FF7900"
        }
        look_colors = {
            'LA': "#FFFFFF", 'ATL': "#A71930", 'CAR': "#BFC0BF", 'CHI': "#FFFFFF", 
            'CIN': "#FFFFFF", 'DET': "#FFFFFF", 'HOU': "#FFFFFF", 'MIA': "#FFFFFF", 
            'NYJ': "#000000", 'WAS': "#FFFFFF", 'ARI': "#9F792C", 'LAC': "#FFB81C", 
            'MIN': "#FFFFFF", 'TEN': "#FFFFFF", 'DAL': "#FFFFFF", 'SEA': "#FFFFFF", 
            'KC': "#FFFFFF", 'BAL': "#9E7C0C", 'CLE': "#FFFFFF", 'JAX': "#FFFFFF", 
            'NO': "#FFFFFF", 'NYG': "#A5ACAF", 'PIT': "#FFFFFF", 'SF': "#FFFFFF", 
            'DEN': "#FFFFFF", 'LV': "#FFFFFF", 'GB': "#FFFFFF", 'BUF': "#FFFFFF", 
            'PHI': "#FFFFFF", 'IND': "#FFFFFF", 'NE': "#FFFFFF", 'TB': "#FFFFFF"
        }
        is_clock_active = False
        ball_snap_timestamp = None 

        if not ((self.df['gameId'] == gameId) & (self.df['playId'] == playId)).any():
            raise ValueError("No data available for the provided gameId and playId.")
        
        try:
            game_play_df, play_info = self.data_processor.extract_game_play_data(gameId, playId)
        except Exception as e:
            raise ValueError(f"Error extracting game data: {str(e)}")
        
        # Extract relevant information from play_info
        first_down_marker = self.data_processor.determine_first_down_marker(play_info)
        unique_frame_ids = play_info['unique_frame_ids']
        defensive_team = play_info['defensive_team']
        possession_team = play_info['possession_team']
        play_description = play_info['play_description']
        offense_formation = play_info['offense_formation']
        line_of_scrimmage = play_info['line_of_scrimmage']
        down = play_info['down']
        quarter = play_info['quarter']
        yards_to_go = play_info['yards_to_go']
        pre_snap_home_score = play_info['pre_snap_home_score']
        pre_snap_visitor_score = play_info['pre_snap_visitor_score']
        home_team_abbr = play_info['home_team_abbr']
        visitor_team_abbr = play_info['visitor_team_abbr']
        game_clock = play_info["game_lock"]
        clock_time_list = play_info["time"]
        current_game_clock = game_clock

        # Parse the game clock
        game_clock_minutes, game_clock_seconds = map(int, game_clock.split(':'))
        game_clock_timedelta = timedelta(minutes=game_clock_minutes, seconds=game_clock_seconds)
        clock_time_list = sorted(clock_time_list, key=lambda t: datetime.fromisoformat(t))
   
        # Determine team colors
        home_team_color = home_visitor_team_colors.get(home_team_abbr, "black") 
        visitor_team_color = home_visitor_team_colors.get(visitor_team_abbr, "black") 
        defense_team_color = team_colors.get(defensive_team, "black") 
        offense_team_color = team_colors.get(possession_team, "black")

        # Initialize movement and look colors
        defense_move_color = move_colors.get(defensive_team, "black")
        offense_move_color = move_colors.get(possession_team, "black")
        defense_look_color = look_colors.get(defensive_team, "black")
        offense_look_color = look_colors.get(possession_team, "black")
        
        # Create directory for saving images and videos
        save_directory = self.create_directory(gameId, playId)

        # Initialize the base plot
        self.utils.initialize_base_plot(home_team_abbr, home_team_color, visitor_team_abbr, visitor_team_color)
        
        frames = []   
        # Iterate through unique frame IDs to plot each frame     
        for frameId in unique_frame_ids:
            frame_data = game_play_df[game_play_df['frameId'] == frameId]
            ball_data = frame_data[frame_data['Team'] == 'football']
            defensive_players_data = frame_data[frame_data['Team'] == defensive_team]
            # Extract defense formation
            extract_defense_formation = defensive_players_data['position'].value_counts()
            defense_formation = ', '.join([f"{count}- {position}" for position, count in extract_defense_formation.items()])
            offense_player_data = frame_data[frame_data['Team'] == possession_team]
            events = frame_data['event'].astype(str).unique()          

            # Extract ball and player positions
            ball_x = ball_data['x'].values[0]
            ball_y = ball_data['y'].values[0]
            offense_x = offense_player_data['x']
            offense_y = offense_player_data['y']
            defensive_x = defensive_players_data['x']
            defensive_y = defensive_players_data['y']

            # Calculate min and max for plot limits
            min_x = min(min(offense_y), min(defensive_y), ball_y) - 5  
            max_x = max(max(offense_y), max(defensive_y), ball_y) + 5
            min_y = min(min(offense_x), min(defensive_x), ball_x) - 5 
            max_y = max(max(offense_x), max(defensive_x), ball_x) + 5
      
            # Initialize the plot for the current frame
            fig, ax = self.visualizer.initialize_plot(line_of_scrimmage, first_down_marker)
            self.visualizer.plot_network_graph(defensive_players_data,node_color=defense_team_color)
            
            # Plot defensive players
            for _, player in defensive_players_data.iterrows():
                x1, y1 = player['x'], player['y']
                x2, y2 = ball_x, ball_y
                distance = self.utils.calculate_distance(x1, y1, x2, y2)
                jersey_number = int(player['jerseyNumber'])
                self.visualizer.add_player_scatter_with_arrows(ax, x1, y1, jersey_number, defense_team_color, defense_move_color, defense_look_color, player['dir'], player['o'],label_prefix=f"{player['position']}: {jersey_number:02} - {player['displayName']}")
                ax.margins(0.1)

            # Plot offensive players
            for _, off_row in offense_player_data.iterrows():
                x1, y1 = off_row['x'], off_row['y']
                x2, y2 = ball_x, ball_y
                offense_jersey_number = int(off_row['jerseyNumber'])
                distance = self.utils.calculate_distance(x1, y1, x2, y2)
                self.visualizer.add_player_scatter_with_arrows(ax, off_row['x'], off_row['y'], offense_jersey_number, offense_team_color, offense_move_color, offense_look_color, off_row['dir'], off_row['o'],label_prefix=f"{off_row['position']}: {offense_jersey_number:02} - {off_row['displayName']}")
                self.visualizer.add_player_distance_line(ax, distance, x1, x2, y1, y2)

            # Plot the ball
            ax.add_artist(Ellipse((ball_y, ball_x), 0.55, 0.5, facecolor="#755139FF", ec="k", lw=2)) 
            ax.axes.get_yaxis().set_visible(False)
            ax.tick_params(axis='y', which='both', left=False, right=False, labelleft=False, labelright=False)
            ax.set_axis_on()
            ax.axes.get_xaxis().set_visible(True)
            ax.set_xlabel(None)
    
            # Format and set plot title
            words = play_description.split()
            formatted_play_description = '\n'.join(' '.join(words[i:i+20]) for i in range(0, len(words), 20))
            title_str = (
                f'FRAME: {frameId}      ✤ Play Description: {formatted_play_description}\n'
                f'✤ Offense Formation: {offense_formation}     ✤ Defense Formation: {defense_formation}'
            )
            ax.set_title(title_str, x=0.6, y=1, fontweight='bold', fontsize=18)

            # Prepare legends and score display
            down_suffix = self.visualizer.get_down_suffix(down) 
            quarter_suffix = self.visualizer.get_down_suffix(quarter) 
            top_handles = [
                Line2D([0], [0], marker='o', color='w', label=defensive_team, markersize=28, 
                       markerfacecolor=defense_team_color, markeredgecolor='k'),
                Line2D([0], [0], marker='o', color='w', label=possession_team, markersize=28, 
                       markerfacecolor=offense_team_color, markeredgecolor='k'),
                Line2D([0], [0], marker='|', color='#00539CFF', label=f'LOS: {first_down_marker}', linestyle='None',
                       markersize=28, markeredgewidth=4),
                Line2D([0], [0], marker='|', color='#FDD20EFF', label=f'Down: {down}', linestyle='None',
                       markersize=28, markeredgewidth=4),
            ]
            top_labels = [
                f'Defense: {defensive_team}', f'Offense: {possession_team}', 
                f'LOS: {first_down_marker} yds', f'Down: {down}'
            ]
            self.visualizer.add_legends(ax, top_handles, top_labels)

            # Add arrows legend
            arrow_handles = [
                Line2D([0], [0], label='Defense Player Moving', marker='>', markersize=20, markeredgecolor="black", 
                       markerfacecolor=defense_move_color, linestyle='-', color="white", linewidth=3),
                Line2D([0], [0], label='Defense Player Facing', marker='>', markersize=20, markeredgecolor='black', 
                       markerfacecolor=defense_look_color, linestyle='-', color="white", linewidth=3),
                Line2D([0], [0], label='Offense Player Moving', marker='>', markersize=20, markeredgecolor="black", 
                       markerfacecolor=offense_move_color, linestyle='-', color="white", linewidth=3),
                Line2D([0], [0], label='Defense Player Facing', marker='>', markersize=20, markeredgecolor='black', 
                markerfacecolor=offense_look_color, linestyle='-', color="white", linewidth=3),
            ]
            arrow_legend = ax.legend(title="Players Direction",handles=arrow_handles, loc='center left', bbox_to_anchor=(1, 0.07), fontsize='x-large',title_fontsize=22,prop={'size': 19})
            ax.add_artist(arrow_legend)

            # Add player legend
            ax.legend(title="Players Roster", loc='center left', bbox_to_anchor=(1, 0.52),fontsize='x-large', title_fontsize=22, prop={'size': 19})

            ax.set_xlim(min_x, max_x)
            ax.set_ylim(min_y, max_y)
            ax.axis('off')

            # Add score board       
            if 'ball_snap' in events and not is_clock_active:
                is_clock_active = True
                ball_snap_timestamp = frame_data.iloc[0]['time'] 

            if is_clock_active:
                current_game_clock = self.get_game_clock_at(
                    frame_data.iloc[0]['time'],  
                    datetime.fromisoformat(ball_snap_timestamp),  
                    game_clock_timedelta  
                )

            scoreboard_str = (
                f"{home_team_abbr}: {pre_snap_home_score}   |   "
                f"{visitor_team_abbr}: {pre_snap_visitor_score}   |   "
                f"{down}{down_suffix} & {yards_to_go}   |   "
                f"{quarter}{quarter_suffix}      {current_game_clock}"
            )

            fig.text(0.5, 0.1, scoreboard_str, ha='center', va='top', fontsize=45, fontweight='bold')

            # Create and save the image from frames
            image_filename = f"{gameId}_{playId}_{frameId:04d}.png"
            image_path = os.path.join(save_directory, image_filename)
            self.visualizer.save_plot_to_image(fig, image_path)
            frames.append(Image.open(image_path).copy()) 
            logger.info(f"Frame {frameId} saved at {image_path}")

        # Create and save the gif from frames
        gif_path = os.path.join(save_directory, f"{gameId}_{playId}_animation.gif")
        frames[0].save(gif_path, format='GIF', save_all=True, append_images=frames[1:], loop=0, duration=120)
        logger.info(f"Video saved at {gif_path}")

        # Create and save the video from frames
        video_path = os.path.join(save_directory, f"{gameId}_{playId}_animation.mp4")
        processed_frames = self.visualizer.process_frames(frames)
        with imageio.get_writer(video_path, format='ffmpeg', fps=6) as writer:
            for resized_frame in processed_frames:
                writer.append_data(np.array(resized_frame))
        logger.info(f"Video saved at {video_path}")
        time.sleep(10)

In [9]:
class NFLProcessor:
    """
    NFLDataProcessor handles the loading and visualization of NFL data.
    
    This class initializes a data loader, retrieves defense and offense data 
    for specified teams, and visualizes game plays using matplotlib.

    Attributes:
        data_loader (NFLDataLoader): An instance of the NFLDataLoader for loading data.
        logger (Logger): Logger instance for logging events and errors.
        team_names (list): A list of NFL team abbreviations to process.
    """

    def __init__(self):
        # Initialize the data loader
        self.data_loader = NFLDataLoader()
        self.data_loader.load_all_data()

        # List of team names
        self.team_names = [
            'LA', 'ATL', 'CAR', 'CHI', 
            'CIN', 'DET', 'HOU', 'MIA', 
            'NYJ', 'WAS', 'ARI', 'LAC', 
            'MIN', 'TEN', 'DAL', 'SEA', 
            'KC', 'BAL', 'CLE', 'JAX', 'NO', 
            'NYG', 'PIT', 'SF', 'DEN', 'LV', 
            'GB', 'BUF', 'PHI', 'IND', 'NE', 'TB'
        ]  

    def process_teams(self):
        """Process data for each team and print summaries."""
        for team_name in self.team_names:
            try:
                # Get defensive team data
                defense_data = self.data_loader.get_defense_team_data(defense_team=team_name, save=True)
                logger.info(f"Retrieved defense data for {team_name}.")
                print(self.data_loader.basic_summary(defense_data, f"Defense {team_name} Data"))

                # Get offensive team data
                offense_data = self.data_loader.get_possession_team_data(possession_team=team_name, save=True)
                logger.info(f"Retrieved offense data for {team_name}.")
                print(self.data_loader.basic_summary(offense_data, f"Offense {team_name} Data"))

            except ValueError as e:
                # Handle the case where no data is found for the defense team
                logger.error(f"Error for team '{team_name}': {e}")
                print(f"Error for team '{team_name}': {e}")

    def visualize_game_play(self, game_id, play_id, pitch_image_path):
        """
        Visualize the game play for the specified game ID and play ID.

        Args:
            game_id (int): The ID of the game to visualize.
            play_id (int): The ID of the play to visualize.
            pitch_image_path (str): The file path to the pitch image.
        """
        try:
            df = self.data_loader.get_specific_game_play_data(game_id, play_id)
            game_visualizer = GamePlayVisualizer(df, pitch_image_path)
            game_visualizer.plot_game_in_matplotlib(game_id, play_id)
            logger.info(f"Successfully visualized game play for Game ID: {game_id}, Play ID: {play_id}.")
        except Exception as e:
            logger.error(f"Error visualizing game play for Game ID: {game_id}, Play ID: {play_id}: {e}")



In [None]:
processor = NFLProcessor()
# processor.process_teams()

# Example of visualizing a specific game play
game_id = 2022091110
play_id = 3797
pitch_image_path = 'assets/vertical_pitch.png'
processor.visualize_game_play(game_id, play_id, pitch_image_path)