NFL Big Data Bowl 2025 Data Ingestion 

In [2]:
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, lit
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, FloatType  

appName = "NFLBigData2025"
master = "local" 
spark = SparkSession.builder \
    .appName(appName) \
    .master(master) \
    .config("spark.driver.host", "127.0.0.1") \
    .config("spark.driver.memory", "8g") \
    .config("spark.executor.memory", "4g") \
    .config("spark.jars", "/home/koza/Documents/NFL Big Data Bowl 2025/nfl-big-data-bowl-2025/postgresql-42.6.2.jar") \
    .getOrCreate() 
print("Spark session started successfully with PostgreSQL driver")

24/12/27 11:34:42 WARN Utils: Your hostname, user1 resolves to a loopback address: 127.0.1.1; using 10.0.0.36 instead (on interface wlp13s0)
24/12/27 11:34:42 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
24/12/27 11:34:42 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


Spark session started successfully with PostgreSQL driver


Games.csv Data Ingestion

In [None]:
from pyspark.sql.functions import col, isnan, when, count
games_schema = StructType([
    StructField("gameId", IntegerType(), True),
    StructField("season", IntegerType(), True),
    StructField("week", IntegerType(), True),
    StructField("gameDate", StringType(), True),  
    StructField("gameTimeEastern", StringType(), True),
    StructField("homeTeamAbbr", StringType(), True),
    StructField("visitorTeamAbbr", StringType(), True),
    StructField("homeFinalScore", IntegerType(), True),
    StructField("visitorFinalScore", IntegerType(), True)
])
games_df = spark.read.csv("/home/koza/Documents/NFL Big Data Bowl 2025/nfl-big-data-bowl-2025/Data/games.csv", header=True, schema=games_schema)
#games_df.printSchema()
#games_df.show(5)

#All Columns Contain Values No Need To Drop Anything
'''
total_rows = games_df.count()

missing_percentage = games_df.select([
    (count(when(col(c).isNull() | isnan(c), c)) / total_rows).alias(c) for c in games_df.columns
])

print("Missing Percentage per Column:")
missing_percentage.show()

columns_to_drop = [c for c in missing_percentage.columns if missing_percentage.collect()[0][c] > 0.75]
print(columns_to_drop)
games_df_cleaned = games_df.drop(*columns_to_drop)
games_df_cleaned = games_df_cleaned.fillna({
    "gameDate": "Unknown",
    "gameTimeEastern": "00:00:00",
    "homeTeamAbbr": "Unknown",
    "visitorTeamAbbr": "Unknown",
    "homeFinalScore": 0,
    "visitorFinalScore": 0
})
'''


Missing Percentage per Column:
+------+------+----+--------+---------------+------------+---------------+--------------+-----------------+
|gameId|season|week|gameDate|gameTimeEastern|homeTeamAbbr|visitorTeamAbbr|homeFinalScore|visitorFinalScore|
+------+------+----+--------+---------------+------------+---------------+--------------+-----------------+
|   0.0|   0.0| 0.0|     0.0|            0.0|         0.0|            0.0|           0.0|              0.0|
+------+------+----+--------+---------------+------------+---------------+--------------+-----------------+

[]
+----------+------+----+---------+---------------+------------+---------------+--------------+-----------------+
|    gameId|season|week| gameDate|gameTimeEastern|homeTeamAbbr|visitorTeamAbbr|homeFinalScore|visitorFinalScore|
+----------+------+----+---------+---------------+------------+---------------+--------------+-----------------+
|2022090800|  2022|   1| 9/8/2022|       20:20:00|          LA|            BUF|       

play.csv Data Ingestion

In [10]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, FloatType, BooleanType
from pyspark.sql.functions import col, isnan, when, count

plays_schema = StructType([
    StructField("gameId", IntegerType(), True),
    StructField("playId", IntegerType(), True),
    StructField("playDescription", StringType(), True),
    StructField("quarter", IntegerType(), True),
    StructField("down", IntegerType(), True),
    StructField("yardsToGo", IntegerType(), True),
    StructField("possessionTeam", StringType(), True),
    StructField("defensiveTeam", StringType(), True),
    StructField("yardlineSide", StringType(), True),
    StructField("yardlineNumber", IntegerType(), True),
    StructField("gameClock", StringType(), True),
    StructField("preSnapHomeScore", IntegerType(), True),
    StructField("preSnapVisitorScore", IntegerType(), True),
    StructField("playNullifiedByPenalty", BooleanType(), True),
    StructField("absoluteYardlineNumber", IntegerType(), True),
    StructField("preSnapHomeTeamWinProbability", FloatType(), True),
    StructField("preSnapVisitorTeamWinProbability", FloatType(), True),
    StructField("expectedPoints", FloatType(), True),
    StructField("offenseFormation", StringType(), True),
    StructField("receiverAlignment", StringType(), True),
    StructField("playClockAtSnap", StringType(), True),
    StructField("passResult", StringType(), True),
    StructField("passLength", FloatType(), True),
    StructField("targetX", FloatType(), True),
    StructField("targetY", FloatType(), True),
    StructField("playAction", BooleanType(), True),
    StructField("dropbackType", StringType(), True),
    StructField("dropbackDistance", FloatType(), True),
    StructField("passLocationType", StringType(), True),
    StructField("timeToThrow", FloatType(), True),
    StructField("timeInTackleBox", FloatType(), True),
    StructField("timeToSack", FloatType(), True),
    StructField("passTippedAtLine", BooleanType(), True),
    StructField("unblockedPressure", BooleanType(), True),
    StructField("qbSpike", BooleanType(), True),
    StructField("qbKneel", BooleanType(), True),
    StructField("qbSneak", BooleanType(), True),
    StructField("rushLocationType", StringType(), True),
    StructField("penaltyYards", IntegerType(), True),
    StructField("prePenaltyYardsGained", IntegerType(), True),
    StructField("yardsGained", IntegerType(), True),
    StructField("homeTeamWinProbabilityAdded", FloatType(), True),
    StructField("visitorTeamWinProbilityAdded", FloatType(), True),
    StructField("expectedPointsAdded", FloatType(), True),
    StructField("isDropback", BooleanType(), True),
    StructField("pff_runConceptPrimary", StringType(), True),
    StructField("pff_runConceptSecondary", StringType(), True),
    StructField("pff_runPassOption", BooleanType(), True),
    StructField("pff_passCoverage", StringType(), True),
    StructField("pff_manZone", StringType(), True)
])

plays_df = spark.read.csv("/home/koza/Documents/NFL Big Data Bowl 2025/nfl-big-data-bowl-2025/Data/plays.csv", header=True, schema=plays_schema)
#plays_df.printSchema()
#plays_df.show(5)


# Removing & Replacing Missing Values and Collumns with 75% missing data
plays_df = plays_df.withColumn(
    "pff_runPassOption",
    when(
        col("pff_runPassOption").cast("string").isin("true", "false"),
        col("pff_runPassOption").cast(BooleanType())
    ).otherwise(None)
)
total_rows = plays_df.count()

missing_percentage = plays_df.select([
    (
        count(when(col(c).isNull(), c)) / total_rows
        if plays_df.schema[c].dataType != FloatType()
        else count(when(col(c).isNull() | isnan(c), c)) / total_rows
    ).alias(c) for c in plays_df.columns
])
missing_percentage.show()

columns_to_drop = [
    c for c in missing_percentage.columns if missing_percentage.collect()[0][c] > 0.75
]
print(f"Number of columns dropped: {len(columns_to_drop)}")
print(f"Columns dropped: {columns_to_drop}")
plays_df_cleaned = plays_df.drop(*columns_to_drop)
remaining_columns = plays_df_cleaned.columns
default_values = {
    "gameId": -1,
    "playId": -1,
    "playDescription": "No Description",
    "quarter": 0,
    "down": 0,
    "yardsToGo": 0,
    "possessionTeam": "Unknown",
    "defensiveTeam": "Unknown",
    "yardlineSide": "N/A",
    "yardlineNumber": 0,
    "gameClock": "00:00",
    "preSnapHomeScore": 0,
    "preSnapVisitorScore": 0,
    "playNullifiedByPenalty": False,
    "absoluteYardlineNumber": 0,
    "preSnapHomeTeamWinProbability": 0.5,
    "preSnapVisitorTeamWinProbability": 0.5,
    "expectedPoints": 0.0,
    "offenseFormation": "Unknown",
    "receiverAlignment": "Unknown",
    "playClockAtSnap": "00:00",
    "passResult": "Unknown",
    "passLength": 0.0,
    "targetX": 0.0,
    "targetY": 0.0,
    "playAction": False,
    "dropbackType": "Unknown",
    "dropbackDistance": 0.0,
    "passLocationType": "Unknown",
    "timeToThrow": 0.0,
    "timeInTackleBox": 0.0,
    "timeToSack": 0.0,
    "passTippedAtLine": False,
    "unblockedPressure": False,
    "qbSpike": False,
    "qbKneel": False,
    "qbSneak": False,
    "rushLocationType": "Unknown",
    "penaltyYards": 0,
    "prePenaltyYardsGained": 0,
    "yardsGained": 0,
    "homeTeamWinProbabilityAdded": 0.0,
    "visitorTeamWinProbilityAdded": 0.0,
    "expectedPointsAdded": 0.0,
    "isDropback": False,
    "pff_runConceptPrimary": "Unknown",
    "pff_runConceptSecondary": "Unknown",
    "pff_runPassOption": False,
    "pff_passCoverage": "Unknown",
    "pff_manZone": "Unknown"
}
default_values_filtered = {k: v for k, v in default_values.items() if k in remaining_columns}
plays_df_cleaned = plays_df_cleaned.fillna(default_values_filtered)


+------+------+---------------+-------+----+---------+--------------+-------------+------------+--------------+---------+----------------+-------------------+----------------------+----------------------+-----------------------------+--------------------------------+--------------+----------------+-----------------+---------------+-------------------+------------------+-------------------+-------------------+----------+------------+-------------------+----------------+-------------------+------------------+------------------+------------------+-------------------+------------------+-------+------------------+----------------+------------------+---------------------+-----------+---------------------------+----------------------------+-------------------+----------+---------------------+-----------------------+-----------------+----------------+-----------+
|gameId|playId|playDescription|quarter|down|yardsToGo|possessionTeam|defensiveTeam|yardlineSide|yardlineNumber|gameClock|preSnapHome

player_play.csv Data Ingestion

This data set has mostly blank or zero information, so most likley will not use at all 

In [None]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, BooleanType, FloatType
from pyspark.sql.functions import col, isnan, when, count

playerplay_schema = StructType([
    StructField("gameId", IntegerType(), True),
    StructField("playId", IntegerType(), True),
    StructField("nflId", IntegerType(), True),
    StructField("teamAbbr", StringType(), True),
    StructField("hadRushAttempt", IntegerType(), True),
    StructField("rushingYards", FloatType(), True),
    StructField("hadDropback", IntegerType(), True),
    StructField("passingYards", FloatType(), True),
    StructField("sackYardsAsOffense", FloatType(), True),
    StructField("hadPassReception", IntegerType(), True),
    StructField("receivingYards", FloatType(), True),
    StructField("wasTargettedReceiver", IntegerType(), True),
    StructField("yardageGainedAfterTheCatch", FloatType(), True),
    StructField("fumbles", IntegerType(), True),
    StructField("fumbleLost", IntegerType(), True),
    StructField("fumbleOutOfBounds", IntegerType(), True),
    StructField("assistedTackle", IntegerType(), True),
    StructField("forcedFumbleAsDefense", IntegerType(), True),
    StructField("halfSackYardsAsDefense", FloatType(), True),
    StructField("passDefensed", IntegerType(), True),
    StructField("quarterbackHit", IntegerType(), True),
    StructField("sackYardsAsDefense", FloatType(), True),
    StructField("safetyAsDefense", IntegerType(), True),
    StructField("soloTackle", IntegerType(), True),
    StructField("tackleAssist", IntegerType(), True),
    StructField("tackleForALoss", IntegerType(), True),
    StructField("tackleForALossYardage", FloatType(), True),
    StructField("hadInterception", IntegerType(), True),
    StructField("interceptionYards", FloatType(), True),
    StructField("fumbleRecoveries", IntegerType(), True),
    StructField("fumbleRecoveryYards", FloatType(), True),
    StructField("penaltyYards", IntegerType(), True),
    StructField("penaltyNames", StringType(), True),
    StructField("wasInitialPassRusher", IntegerType(), True),
    StructField("causedPressure", BooleanType(), True),
    StructField("timeToPressureAsPassRusher", FloatType(), True),
    StructField("getOffTimeAsPassRusher", FloatType(), True),
    StructField("inMotionAtBallSnap", BooleanType(), True),
    StructField("shiftSinceLineset", BooleanType(), True),
    StructField("motionSinceLineset", BooleanType(), True),
    StructField("wasRunningRoute", BooleanType(), True),
    StructField("routeRan", StringType(), True),
    StructField("blockedPlayerNFLId1", IntegerType(), True),
    StructField("blockedPlayerNFLId2", IntegerType(), True),
    StructField("blockedPlayerNFLId3", IntegerType(), True),
    StructField("pressureAllowedAsBlocker", IntegerType(), True),
    StructField("timeToPressureAllowedAsBlocker", FloatType(), True),
    StructField("pff_defensiveCoverageAssignment", StringType(), True),
    StructField("pff_primaryDefensiveCoverageMatchupNflId", IntegerType(), True),
    StructField("pff_secondaryDefensiveCoverageMatchupNflId", IntegerType(), True)
])

playerplay_data_df = spark.read.csv("/home/koza/Documents/NFL Big Data Bowl 2025/nfl-big-data-bowl-2025/Data/player_play.csv", header=True, schema=playerplay_schema)

total_rows = playerplay_data_df.count()

# Removing & Replacing Missing Values and Collumns with 75% missing data

missing_percentage = playerplay_data_df.select([
    (
        count(when(col(c).isNull(), c)) / total_rows
        if playerplay_data_df.schema[c].dataType != FloatType()
        else count(when(col(c).isNull() | isnan(c), c)) / total_rows
    ).alias(c) for c in playerplay_data_df.columns
])

missing_percentage.show()

columns_to_drop = [
    c for c in missing_percentage.columns if missing_percentage.collect()[0][c] > 0.5
]

print(f"Number of columns dropped: {len(columns_to_drop)}")
print(f"Columns dropped: {columns_to_drop}")

playerplay_data_df_cleaned = playerplay_data_df.drop(*columns_to_drop)

remaining_columns = playerplay_data_df_cleaned.columns

default_values = {
    "gameId": -1,
    "playId": -1,
    "nflId": -1,
    "teamAbbr": "Unknown",
    "hadRushAttempt": 0,
    "rushingYards": 0.0,
    "hadDropback": 0,
    "passingYards": 0.0,
    "sackYardsAsOffense": 0.0,
    "hadPassReception": 0,
    "receivingYards": 0.0,
    "wasTargettedReceiver": 0,
    "yardageGainedAfterTheCatch": 0.0,
    "fumbles": 0,
    "fumbleLost": 0,
    "fumbleOutOfBounds": 0,
    "assistedTackle": 0,
    "forcedFumbleAsDefense": 0,
    "halfSackYardsAsDefense": 0.0,
    "passDefensed": 0,
    "quarterbackHit": 0,
    "sackYardsAsDefense": 0.0,
    "safetyAsDefense": 0,
    "soloTackle": 0,
    "tackleAssist": 0,
    "tackleForALoss": 0,
    "tackleForALossYardage": 0.0,
    "hadInterception": 0,
    "interceptionYards": 0.0,
    "fumbleRecoveries": 0,
    "fumbleRecoveryYards": 0.0,
    "penaltyYards": 0,
    "penaltyNames": "No Penalty",
    "wasInitialPassRusher": 0,
    "causedPressure": False,
    "timeToPressureAsPassRusher": 0.0,
    "getOffTimeAsPassRusher": 0.0,
    "inMotionAtBallSnap": False,
    "shiftSinceLineset": False,
    "motionSinceLineset": False,
    "wasRunningRoute": False,
    "routeRan": "Unknown",
    "blockedPlayerNFLId1": -1,
    "blockedPlayerNFLId2": -1,
    "blockedPlayerNFLId3": -1,
    "pressureAllowedAsBlocker": 0,
    "timeToPressureAllowedAsBlocker": 0.0,
    "pff_defensiveCoverageAssignment": "Unknown",
    "pff_primaryDefensiveCoverageMatchupNflId": -1,
    "pff_secondaryDefensiveCoverageMatchupNflId": -1
}

default_values_filtered = {k: v for k, v in default_values.items() if k in remaining_columns}

playerplay_data_df_cleaned = playerplay_data_df_cleaned.fillna(default_values_filtered)

playerplay_data_df_cleaned.show(5)


                                                                                

+------+------+-----+--------+--------------+------------+-----------+------------+------------------+----------------+--------------+--------------------+--------------------------+-------+----------+-----------------+--------------+---------------------+----------------------+------------+--------------+------------------+---------------+----------+------------+--------------+---------------------+---------------+-----------------+----------------+-------------------+------------+------------+--------------------+--------------+--------------------------+----------------------+------------------+-----------------+------------------+---------------+--------+-------------------+-------------------+-------------------+------------------------+------------------------------+-------------------------------+----------------------------------------+------------------------------------------+
|gameId|playId|nflId|teamAbbr|hadRushAttempt|rushingYards|hadDropback|passingYards|sackYardsAsOffens

                                                                                

Number of columns dropped: 14
Columns dropped: ['wasInitialPassRusher', 'timeToPressureAsPassRusher', 'getOffTimeAsPassRusher', 'inMotionAtBallSnap', 'shiftSinceLineset', 'motionSinceLineset', 'wasRunningRoute', 'blockedPlayerNFLId1', 'blockedPlayerNFLId2', 'blockedPlayerNFLId3', 'pressureAllowedAsBlocker', 'timeToPressureAllowedAsBlocker', 'pff_primaryDefensiveCoverageMatchupNflId', 'pff_secondaryDefensiveCoverageMatchupNflId']
+----------+------+-----+--------+--------------+------------+-----------+------------+------------------+----------------+--------------+--------------------+--------------------------+-------+----------+-----------------+--------------+---------------------+----------------------+------------+--------------+------------------+---------------+----------+------------+--------------+---------------------+---------------+-----------------+----------------+-------------------+------------+------------+--------------+--------+-------------------------------+
|    g

players.csv Data Ingestion

In [13]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType
from pyspark.sql.functions import col, isnan, when, count

players_schema = StructType([
    StructField("nflId", IntegerType(), True),
    StructField("height", StringType(), True),
    StructField("weight", IntegerType(), True),
    StructField("birthDate", StringType(), True),
    StructField("collegeName", StringType(), True),
    StructField("position", StringType(), True),
    StructField("displayName", StringType(), True)
])

players_data_df = spark.read.csv("/home/koza/Documents/NFL Big Data Bowl 2025/nfl-big-data-bowl-2025/Data/players.csv", header=True, schema=players_schema)

total_rows = players_data_df.count()

# Removing & Replacing Missing Values and Collumns with 75% missing data

missing_percentage = players_data_df.select([
    (
        count(when(col(c).isNull(), c)) / total_rows
        if players_data_df.schema[c].dataType != StringType()
        else count(when(col(c).isNull() | (col(c) == ""), c)) / total_rows
    ).alias(c) for c in players_data_df.columns
])

missing_percentage.show()

columns_to_drop = [
    c for c in missing_percentage.columns if missing_percentage.collect()[0][c] > 0.75
]

print(f"Number of columns dropped: {len(columns_to_drop)}")
print(f"Columns dropped: {columns_to_drop}")

players_data_df_cleaned = players_data_df.drop(*columns_to_drop)

remaining_columns = players_data_df_cleaned.columns

default_values = {
    "nflId": -1,
    "height": "N/A",
    "weight": 0,
    "birthDate": "N/A",
    "collegeName": "Unknown",
    "position": "Unknown",
    "displayName": "Unknown"
}

default_values_filtered = {k: v for k, v in default_values.items() if k in remaining_columns}

players_data_df_cleaned = players_data_df_cleaned.fillna(default_values_filtered)

players_data_df_cleaned.show(5)


+-----+------+------+---------+-----------+--------+-----------+
|nflId|height|weight|birthDate|collegeName|position|displayName|
+-----+------+------+---------+-----------+--------+-----------+
|  0.0|   0.0|   0.0|      0.0|        0.0|     0.0|        0.0|
+-----+------+------+---------+-----------+--------+-----------+

Number of columns dropped: 0
Columns dropped: []
+-----+------+------+----------+--------------+--------+--------------+
|nflId|height|weight| birthDate|   collegeName|position|   displayName|
+-----+------+------+----------+--------------+--------+--------------+
|25511|   6-4|   225|1977-08-03|      Michigan|      QB|     Tom Brady|
|29550|   6-4|   328|1982-01-22|      Arkansas|       T|  Jason Peters|
|29851|   6-2|   225|1983-12-02|    California|      QB| Aaron Rodgers|
|30842|   6-6|   267|1984-05-19|          UCLA|      TE|Marcedes Lewis|
|33084|   6-4|   217|1985-05-17|Boston College|      QB|     Matt Ryan|
+-----+------+------+----------+--------------+--

tracking_week_all.csv Data Ingestion

In [17]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, FloatType, TimestampType
from pyspark.sql.functions import lit, col, isnan, when, count

tracking_schema = StructType([
    StructField("gameId", IntegerType(), True),
    StructField("playId", IntegerType(), True),
    StructField("nflId", IntegerType(), True),
    StructField("displayName", StringType(), True),
    StructField("frameId", IntegerType(), True),
    StructField("frameType", StringType(), True),
    StructField("time", TimestampType(), True),
    StructField("jerseyNumber", IntegerType(), True),
    StructField("club", StringType(), True),
    StructField("playDirection", StringType(), True),
    StructField("x", FloatType(), True),
    StructField("y", FloatType(), True),
    StructField("s", FloatType(), True),
    StructField("a", FloatType(), True),
    StructField("dis", FloatType(), True),
    StructField("o", FloatType(), True),
    StructField("dir", FloatType(), True),
    StructField("event", StringType(), True),
    StructField("week", IntegerType(), True)
])

tracking_df_with_week = None
# Removing & Replacing Missing Values and Collumns with 75% missing data
for i in range(1, 10):  
    tracking_file_path = f"/home/koza/Documents/NFL Big Data Bowl 2025/nfl-big-data-bowl-2025/Data/tracking_week_{i}.csv"
    df = spark.read.csv(tracking_file_path, header=True, schema=tracking_schema).withColumn("week", lit(i))
    tracking_df_with_week = df if tracking_df_with_week is None else tracking_df_with_week.union(df)

total_rows = tracking_df_with_week.count()

missing_percentage = tracking_df_with_week.select([
    (
        count(when(col(c).isNull(), c)) / total_rows
        if tracking_df_with_week.schema[c].dataType != FloatType()
        else count(when(col(c).isNull() | isnan(c), c)) / total_rows
    ).alias(c) for c in tracking_df_with_week.columns
])

missing_percentage.show()

columns_to_drop = [
    c for c in missing_percentage.columns if missing_percentage.collect()[0][c] > 0.75
]

print(f"Number of columns dropped: {len(columns_to_drop)}")
print(f"Columns dropped: {columns_to_drop}")

tracking_df_cleaned = tracking_df_with_week.drop(*columns_to_drop)

remaining_columns = tracking_df_cleaned.columns

default_values = {
    "gameId": -1,
    "playId": -1,
    "nflId": -1,
    "displayName": "Unknown",
    "frameId": 0,
    "frameType": "Unknown",
    "time": "1970-01-01 00:00:00",  
    "jerseyNumber": 0,
    "club": "Unknown",
    "playDirection": "Unknown",
    "x": 0.0,
    "y": 0.0,
    "s": 0.0,
    "a": 0.0,
    "dis": 0.0,
    "o": 0.0,
    "dir": 0.0,
    "event": "Unknown",
    "week": -1
}

default_values_filtered = {k: v for k, v in default_values.items() if k in remaining_columns}

tracking_df_cleaned = tracking_df_cleaned.fillna(default_values_filtered)

tracking_df_cleaned.show(5)


                                                                                

+------+------+--------------------+-----------+-------+---------+----+--------------------+----+-------------+---+---+---+---+---+-------------------+-------------------+-----+----+
|gameId|playId|               nflId|displayName|frameId|frameType|time|        jerseyNumber|club|playDirection|  x|  y|  s|  a|dis|                  o|                dir|event|week|
+------+------+--------------------+-----------+-------+---------+----+--------------------+----+-------------+---+---+---+---+---+-------------------+-------------------+-----+----+
|   0.0|   0.0|0.043478260869565216|        0.0|    0.0|      0.0| 0.0|0.043478260869565216| 0.0|          0.0|0.0|0.0|0.0|0.0|0.0|0.04343996488770875|0.04343996488770875|  0.0| 0.0|
+------+------+--------------------+-----------+-------+---------+----+--------------------+----+-------------+---+---+---+---+---+-------------------+-------------------+-----+----+



                                                                                

Number of columns dropped: 0
Columns dropped: []
+----------+------+-----+--------------+-------+-----------+--------------------+------------+----+-------------+-----+-----+----+----+----+------+-----+--------------------+----+
|    gameId|playId|nflId|   displayName|frameId|  frameType|                time|jerseyNumber|club|playDirection|    x|    y|   s|   a| dis|     o|  dir|               event|week|
+----------+------+-----+--------------+-------+-----------+--------------------+------------+----+-------------+-----+-----+----+----+----+------+-----+--------------------+----+
|2022091200|    64|35459|Kareem Jackson|      1|BEFORE_SNAP|2022-09-13 00:16:...|          22| DEN|        right|51.06|28.55|0.72|0.37|0.07|246.17|68.34|huddle_break_offense|   1|
|2022091200|    64|35459|Kareem Jackson|      2|BEFORE_SNAP|2022-09-13 00:16:...|          22| DEN|        right|51.13|28.57|0.71|0.36|0.07|245.41|71.21|                  NA|   1|
|2022091200|    64|35459|Kareem Jackson|      3|BEF