In [1]:
from QuantitativeCleaner import *

In [None]:
# Read Path
optimized_path = "F:/Data/Processing_data/OptimizedTrackData.parquet"

# Write paths
group_dir = "F:/Data/Processing_data/injury_output"
main_dir = "F:/Data/Processing_data/"

In [None]:
# optimize_injury_data()

In [None]:
# process_file(optimized_path, group_dir)

In [None]:
#### Tracking Output
def tracking_injuries(group_dir, main_dir):
    from DataHandler import data_loader
    import os
    import polars as pl # type: ignore
    pl.enable_string_cache()

    # Read in the PlayKeys from the injury file to isolate PlayKeys associated with injury paths
    injuryPlayKeys = data_loader('injuries')
    PlayKeys = injuryPlayKeys.select("PlayKey").unique().with_columns([
            pl.col("PlayKey").cast(pl.Utf8)
            ])

    # Initialize an empty list to store dataframes
    filtered_dfs = []

    # Iterate through files in the directory
    for file in os.listdir(group_dir):
        if file.startswith("group_"):
            file_path = os.path.join(group_dir, file)
            
            # Read the Parquet file
            df = pl.read_parquet(file_path)
            
            # Ensure PlayKey is of type Utf8
            df = df.with_columns(pl.col("PlayKey").cast(pl.Utf8))
            
            # Inner join with unique_gsisid to filter rows
            filtered_df = df.join(PlayKeys, on="PlayKey", how="inner")
            
            # Append to the list of filtered dataframes
            filtered_dfs.append(filtered_df)

    # Concatenate all filtered dataframes
    final_df = pl.concat(filtered_dfs)

    # Save the concatenated dataframe
    final_df.write_parquet(os.path.join(main_dir, "TrackingInjuries.parquet"))

    print("Processing complete. Filtered summary dataframe saved as 'TrackingInjuries.parquet'")

In [None]:
tracking_injuries(group_dir, main_dir)

In [None]:
from QualitativeCleaner import *
from DataHandler import parquet_writer, data_shrinker

In [None]:
df = table_joiner('injury')
df = injury_interpolator(df, 'injury')
df = stadium_cleaner(df)
df = weather_cleaner(df)
df, schema = data_shrinker(df)


df.head()

In [None]:
##### Primary Injury Cleaning Function #####
def clean_injury_qual():
    """
    Applies data cleaning to surface injury data and writes to 'qualitative_injuries' as a csv file 
    """
    from DataHandler import parquet_writer, data_shrinker
    import os 

    analysis = "injury"
    injury_qual_path = "F:/Data/Processing_data/QualitativeInjuries.parquet"


    df = table_joiner(analysis) 
    df = injury_interpolator(df, analysis)
    df = stadium_cleaner(df)
    df = weather_cleaner(df)
    df, schema = data_shrinker(df)
    df.write_parquet(injury_qual_path)

    print('Injuries have been cleaned and dressed.')
    # return df


In [None]:
clean_injury_qual()

In [None]:
##### Primary Concussion Cleaning Function #####
def clean_concussions(): 
    """
    Applies data cleaning to surface injury data and writes to 'qualitative_injuries' as a csv file 
    """
    from DataHandler import parquet_writer, data_shrinker

    analysis = "concussion"
    concussion_qual_path = "F:/Data/Processing_data/QualitativeConcussions.parquet"
    
    df = table_joiner(analysis)
    df = injury_interpolator(df, analysis)
    df = stadium_cleaner(df)
    df = weather_cleaner(df)
    df = turf_cleaner(df)
    df = cancellation_cleaner(df)
    df = score_splitter(df)
    df, schema = data_shrinker(df)
    df.write_parquet(concussion_qual_path)
    del df

    print('Concussions have been assessed and cleared for play.')
    # return df

In [None]:
clean_concussions()

In [None]:
from QuantitativeCleaner import *

In [None]:
injury_summary_maker(group_dir = "F:/Data/Processing_data/injury_output")

In [None]:
import polars as pl

len(pl.read_parquet("F:/Data/Processing_data/Full_Summary_Injuries.parquet"))

# Concussion Data

In [None]:
df = pl.read_csv("F:/Data/NFL-Punt-Analytics-Competition/NGS-2017-post.csv")

In [None]:
df.head()

In [None]:
pl.read_parquet("F:/Data/Processing_data/TrackingInjuries.parquet").head()

In [None]:
from QuantitativeCleaner import *
import polars as pl
import os


source_dir = "F:/Data/NFL-Punt-Analytics-Competition/"
concussion_dir = "F:/Data/Processing_data/concussion_output"

os.makedirs(concussion_dir, exist_ok=True)

In [None]:
df_test, schema = data_shrinker(df)
df_test.head()

In [None]:
def column_corrector(df):
    import polars as pl # type: ignore
    """
    Add a Play_Time column that acts like the 'time' column did in the injury dataset. 
    Each PlayKey will start at 0.0 and increase by 0.1 for each subsequent record.
    """
    df = df.with_columns([
        pl.concat_str([
            pl.col('GSISID').cast(pl.Int32).cast(pl.Utf8)
            , pl.lit('-')
            , pl.col('GameKey').cast(pl.Utf8)
            , pl.lit('-')
            , pl.col('PlayID').cast(pl.Utf8)
        ]).alias('PlayKey')
    ])
     
    
    df = df.select([
        'PlayKey'
        , 'Time'
        , 'x'
        , 'y'
        , 'o'
        , 'dir'
        , 'GSISID'
        ]).rename({"Time":"DateTime"})

    df = df.sort(['PlayKey', 'DateTime'])

    df = df.with_columns(
        (pl.arange(0, pl.len()) * 0.1).over("PlayKey").cast(pl.Float32).alias("time")
        ).with_columns([pl.col('GSISID').cast(pl.Int32)])  
    
    df = df.drop(['DateTime'])
    
    return df

In [None]:
def body_builder_conc(df):
    """
    This uses averages collected for height, weight, and chest radius for each position. This information
    is used to determine the momentum and impulse rather than just looking at velocities in the analysis. Chest
    radius is needed for angular moment of inertia as a rotating cylinder.
    The data here are cast as f32 to reduce the size of these columns as well as in all future calculations, where the f64 
    gets exponentially larger with application. 
    """
    import polars as pl # type: ignore

    # Enable global string cache
    pl.enable_string_cache()


    try:
        body_data = pl.DataFrame({
            "Position": ["QB", "RB", "FB", "WR", "TE", "T", "G", "C", "DE", "DT", "NT", "LB", "OLB", "MLB", "CB", "S", "K", "P", "SS", "ILB", "FS", "LS", "DB"]
            , "Height_m": [1.91, 1.79, 1.85, 1.88, 1.96, 1.97, 1.90, 1.87, 1.97, 1.92, 1.88, 1.90, 1.90, 1.87, 1.82, 1.84, 1.83, 1.88, 1.84, 1.90, 1.84, 1.88, 1.82]
            , "Weight_kg": [102.1, 95.3, 111.1, 90.7, 114.6, 140.6, 141.8, 136.1, 120.2, 141.8, 152.0, 110.0, 108.9, 113.4, 87.4, 95.9, 92.08, 97.52, 95.9, 110.0, 95.9, 108.86, 87.4]
            , "Chest_rad_m": [0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191]
        }).with_columns([
            pl.col("Height_m").cast(pl.Float32)
            , pl.col("Weight_kg").cast(pl.Float32)
            , pl.col("Chest_rad_m").cast(pl.Float32)
            , pl.col("Position").cast(pl.Categorical)
        ])

        
        Player_path = "F:/Data/NFL-Punt-Analytics-Competition/player_punt_data.csv"
        position = pl.read_csv(Player_path).select(["GSISID", "Position"])
        position = position.with_columns([
            pl.col("GSISID").cast(pl.Int32)
            , pl.col("Position").cast(pl.Categorical)
        ])

        position = position.join(
            body_data
            , on='Position'
            , how='left'
        )

        df = df.with_columns([
            pl.col("GSISID").cast(pl.Int32)
        ])

        df = df.join(
            position
            , on='GSISID'
            , how='left'
        )    

        return df.filter(pl.col('Position').is_not_null())    
        
    except Exception as e: 
        print(f"An error occurred during body_builder: {e}")
        return None

In [None]:
review = pl.read_csv("F:/Data/NFL-Punt-Analytics-Competition/video_review.csv")

In [None]:
def clean_review():
    review = pl.read_csv("F:/Data/NFL-Punt-Analytics-Competition/video_review.csv")
    review, schema = data_shrinker(review)
    review = review.with_columns([
            pl.concat_str([
                pl.col('GSISID').cast(pl.Int32).cast(pl.Utf8)
                , pl.lit('-')
                , pl.col('GameKey').cast(pl.Utf8)
                , pl.lit('-')
                , pl.col('PlayID').cast(pl.Utf8)
            ]).alias('PlayKey')
        ]).drop(['Season_Year', 'GameKey', 'PlayID', 'GSISID', 'Turnover_Related', 'Friendly_Fire'])
    
    return review

In [None]:
review = clean_review()

In [None]:
review.head()

In [None]:
df_testes = df_test.join(
    review
    , on="PlayKey"
    , how="left"
)

In [None]:
def add_review_data(df):
    review = clean_review()

    df = df.join(
        review
        , on="PlayKey"
        , how="left"    
        )
    
    return df

In [None]:
df_test, schema = data_shrinker(df)
df_test = column_corrector(df_test)
df_test = angle_corrector(df_test)
df_test = body_builder_conc(df_test)
df_test = velocity_calculator(df_test)
df_test = impulse_calculator(df_test)
df_test = add_review_data(df_test)
df_test.head()

In [None]:
group_df, schema = data_shrinker(df)
group_df = (group_df
                .pipe(column_corrector)
                .pipe(angle_corrector)
                .pipe(body_builder_conc)
                .pipe(velocity_calculator)                
                .pipe(impulse_calculator)
                .pipe(add_review_data))

In [None]:
group_df.head()

In [None]:
# for file in os.listdir(source_dir):
#     if file.startswith("NGS-"):
#         file_path = os.path.join(source_dir, file)
#         output_dir = "F:/Data/Processing_data/concussion_output/"

#         # Read the CSV into polars DF
#         df = pl.read_csv(file_path)
#         df = (df
#               .pipe(column_corrector)
#                 .pipe(angle_corrector)
#                 .pipe(body_builder_conc)
#                 .pipe(velocity_calculator)                
#                 .pipe(impulse_calculator)
#                 .pipe(add_review_data))
        
#         output_file_path = os.path.join(output_dir, file.replace(".csv", ".parquet"))

#         df.write_parquet(output_file_path)

#         print(f"Processed and saved: {output_file_path}")

# print("For fuck's sake that took a while. Finally done processing and saving the concussion files.")

In [None]:
# def process_and_save_concussion_data():

#     for file in os.listdir(source_dir):
#         if file.startswith("NGS-"):
#             file_path = os.path.join(source_dir, file)
#             output_dir = "F:/Data/Processing_data/concussion_output/"

#             # Read the CSV into polars DF

#             df = pl.read_csv(file_path)
#             df = (df
#                 .pipe(column_corrector)
#                     .pipe(angle_corrector)
#                     .pipe(body_builder_conc)
#                     .pipe(velocity_calculator)                
#                     .pipe(impulse_calculator))
            
#             output_file_path = os.path.join(output_dir, file.replace(".csv", ".parquet"))

#             df.write_parquet(output_file_path)

#             print(f"Processed and saved: {output_file_path}")

#     print("For fuck's sake that took a while. Finally done processing and saving the concussion files.")

In [1]:
from QuantitativeCleaner import * 



In [2]:

process_and_save_concussion_data()

Memory usage of dataframe is 87.43 MB
Memory usage after optimization is: 35.77 MB
Decreased by 59.1%


  df = df.with_columns(pl.col(col).cast(pl.Categorical))


Processed and saved: F:/Data/Processing_data/concussion_output/NGS-2016-post.parquet
Memory usage of dataframe is 95.19 MB
Memory usage after optimization is: 38.49 MB
Decreased by 59.6%
Processed and saved: F:/Data/Processing_data/concussion_output/NGS-2016-pre.parquet
Memory usage of dataframe is 792.55 MB
Memory usage after optimization is: 328.82 MB
Decreased by 58.5%
Processed and saved: F:/Data/Processing_data/concussion_output/NGS-2016-reg-wk1-6.parquet
Memory usage of dataframe is 690.97 MB
Memory usage after optimization is: 285.94 MB
Decreased by 58.6%
Processed and saved: F:/Data/Processing_data/concussion_output/NGS-2016-reg-wk13-17.parquet
Memory usage of dataframe is 760.78 MB
Memory usage after optimization is: 314.28 MB
Decreased by 58.7%
Processed and saved: F:/Data/Processing_data/concussion_output/NGS-2016-reg-wk7-12.parquet
Memory usage of dataframe is 94.12 MB
Memory usage after optimization is: 38.95 MB
Decreased by 58.6%
Processed and saved: F:/Data/Processing_da

# Combine Tracking Data

To do this, the columns will have to be the same in both the concussion and the injury tracking data. I will create a separate processing for this, so that the outputs 

In [3]:
import polars as pl
df = pl.read_parquet("F:/Data/Processing_data/concussion_output/NGS-2017-reg-wk13-17.parquet").head(1000)

In [4]:
df.head()

PlayKey,x,y,o,dir,GSISID,time,Angle_Diff,Position,Height_m,Weight_kg,Chest_rad_m,Displacement,Speed,vx,vy,omega_dir,omega_o,omega_diff,px,py,moment,moment_upper,p_magnitude,L_dir,L_diff,Jx,Jy,J_magnitude,torque,torque_internal
str,f32,f32,f32,f32,i32,f32,f32,cat,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
"""19714-585-120""",68.160004,10.81,-36.779999,-109.649994,19714,0.0,72.870003,"""P""",1.88,97.519997,0.191,,,,,,,,,,0.296469,0.207528,,,,,,,,
"""19714-585-120""",68.07,10.87,-39.009995,-111.0,19714,0.1,71.989998,"""P""",1.88,97.519997,0.191,0.10817,1.081695,-0.90004,0.599995,-0.235621,-0.389208,0.153587,-87.771866,58.511478,0.296469,0.207528,105.486931,-0.069854,0.031874,,,,,
"""19714-585-120""",68.040001,10.89,-40.610001,-109.609985,19714,0.2,69.0,"""P""",1.88,97.519997,0.191,0.036055,0.360547,-0.299988,0.200005,0.242603,-0.279254,0.521857,-29.254808,19.504446,0.296469,0.207528,35.160591,0.071924,0.1083,58.517059,-39.007034,70.326347,1.417784,0.764264
"""19714-585-120""",68.010002,10.91,-42.110001,-107.549988,19714,0.3,65.440002,"""P""",1.88,97.519997,0.191,0.036054,0.360542,-0.299988,0.199995,0.359538,-0.261799,0.621337,-29.254808,19.503515,0.296469,0.207528,35.160076,0.106592,0.128945,0.0,-0.000931,0.000931,0.346675,0.206449
"""19714-585-120""",68.129997,10.83,-36.800003,-110.670013,19714,0.4,73.870003,"""P""",1.88,97.519997,0.191,0.144218,1.442179,1.199951,-0.799999,-0.544547,0.926769,1.471316,117.019234,-78.015923,0.296469,0.207528,140.641327,-0.161441,0.30534,146.274048,-97.51944,175.801422,-2.68033,1.763946


In [7]:
review = clean_review()

Memory usage of dataframe is 0.00 MB
Memory usage after optimization is: 0.00 MB
Decreased by 36.9%


In [10]:

def add_review_data(df, review):

    df = df.join(
        review
        , on="PlayKey"
        , how="inner"    
        )
    
    return df

In [11]:
add_review_data(df, review)

PlayKey,x,y,o,dir,GSISID,time,Angle_Diff,Position,Height_m,Weight_kg,Chest_rad_m,Displacement,Speed,vx,vy,omega_dir,omega_o,omega_diff,px,py,moment,moment_upper,p_magnitude,L_dir,L_diff,Jx,Jy,J_magnitude,torque,torque_internal,Player_Activity_Derived,Primary_Impact_Type,Primary_Partner_GSISID,Primary_Partner_Activity_Derived
str,f32,f32,f32,f32,i32,i32,f32,cat,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,cat,cat,str,cat


In [12]:
len(review)

37

In [38]:
def create_concussion_review_df(review):
    """
    Create a new DataFrame from multiple Parquet files, including only rows where the PlayKey matches those in the review DataFrame.
    """
    import polars as pl # type: ignore
    import os

    ngs_dir = "F:/Data/Processing_data/concussion_output/"


    # Add the OpponentKey column
    review = review.with_columns(
        (pl.col("Primary_Partner_GSISID") + pl.col("PlayKey").str.slice(5)).alias("OpponentKey")
        )
    # Filter OpponentKey values that are longer than 12 characters
    
    # Extract PlayKey and OpponentKey values into lists
    playkey_list = review["PlayKey"].to_list()
    opponentkey_list = review["OpponentKey"].to_list()

    # Remove any "Unknown" GSISID opponents from the list, since it will be a nonsense PlayKey
    opponentkey_list = [key for key in opponentkey_list if key is not None and len(key) <= 12]

    # Combine both lists
    combined_keys = playkey_list + opponentkey_list


    # Initialize a list to store dataframes from each table
    dataframes = []

    # Iterate through the parquet files in the directory
    for file in os.listdir(ngs_dir):
        if file.startswith("NGS-"):
            file_path = os.path.join(ngs_dir, file)

            # Read into df
            df = pl.read_parquet(file_path)

            # Filter based on matching PlayKey values
            filtered_df = df.filter(pl.col('PlayKey').is_in(combined_keys))

            # Append to the dataframes
            dataframes.append(filtered_df)

    combined_df = pl.concat(dataframes)

    combined_df = combined_df.join(
        review
        , on='PlayKey'
        , how = 'left'
    )

    return combined_df




In [36]:
review = clean_review()

Memory usage of dataframe is 0.00 MB
Memory usage after optimization is: 0.00 MB
Decreased by 36.9%


In [39]:
cf = create_concussion_review_df(review)

In [40]:
cf.head()

PlayKey,x,y,o,dir,GSISID,time,Angle_Diff,Position,Height_m,Weight_kg,Chest_rad_m,Displacement,Speed,vx,vy,omega_dir,omega_o,omega_diff,px,py,moment,moment_upper,p_magnitude,L_dir,L_diff,Jx,Jy,J_magnitude,torque,torque_internal,Player_Activity_Derived,Primary_Impact_Type,Primary_Partner_GSISID,Primary_Partner_Activity_Derived,OpponentKey
str,f32,f32,f32,f32,i32,i32,f32,cat,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,cat,cat,str,cat,str
"""31023-29-538""",32.950001,38.700001,-117.579987,85.380005,31023,0,157.039993,"""WR""",1.88,90.699997,0.191,,,,,,,,,,0.275736,0.193015,,,,,,,,,"""Tackling""","""Helmet-to-body""","""31941""","""Tackled""","""31941-29-538"""
"""31023-29-538""",32.98,38.720001,-119.970001,105.070007,31023,0,134.960007,"""WR""",1.88,90.699997,0.191,0.036055,0.360547,0.299988,0.200005,3.436552,-0.417137,3.853689,27.208893,18.140415,0.275736,0.193015,32.70166,0.94758,0.743819,,,,,,"""Tackling""","""Helmet-to-body""","""31941""","""Tackled""","""31941-29-538"""
"""31023-29-538""",32.98,38.709999,-122.320007,90.539978,31023,0,147.139999,"""WR""",1.88,90.699997,0.191,0.010002,0.100021,0.0,-0.100021,-2.535969,-0.410154,2.125815,0.0,-9.071938,0.275736,0.193015,9.071938,-0.699257,0.410314,-27.208893,-27.212353,38.481632,-16.468365,-3.335054,"""Tackling""","""Helmet-to-body""","""31941""","""Tackled""","""31941-29-538"""
"""31023-29-538""",32.93,38.669998,-124.540009,48.880005,31023,0,173.419998,"""WR""",1.88,90.699997,0.191,0.064031,0.640312,-0.499992,-0.400009,-7.271037,-0.387464,6.883573,-45.349308,-36.28083,0.275736,0.193015,58.076317,-2.004883,1.328632,-45.349308,-27.208893,52.885574,-13.056264,9.183182,"""Tackling""","""Helmet-to-body""","""31941""","""Tackled""","""31941-29-538"""
"""31023-29-538""",32.900002,38.619999,-126.799988,40.029999,31023,0,166.830002,"""WR""",1.88,90.699997,0.191,0.058308,0.583082,-0.299988,-0.499992,-1.544617,-0.39444,1.150178,-27.208893,-45.349308,0.275736,0.193015,52.885574,-0.425906,0.222001,18.140415,-9.068478,20.280827,15.789774,-11.066308,"""Tackling""","""Helmet-to-body""","""31941""","""Tackled""","""31941-29-538"""


In [42]:
cf.filter(pl.col("PlayKey")=="31941-29-538")

PlayKey,x,y,o,dir,GSISID,time,Angle_Diff,Position,Height_m,Weight_kg,Chest_rad_m,Displacement,Speed,vx,vy,omega_dir,omega_o,omega_diff,px,py,moment,moment_upper,p_magnitude,L_dir,L_diff,Jx,Jy,J_magnitude,torque,torque_internal,Player_Activity_Derived,Primary_Impact_Type,Primary_Partner_GSISID,Primary_Partner_Activity_Derived,OpponentKey
str,f32,f32,f32,f32,i32,i32,f32,cat,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,cat,cat,str,cat,str
"""31941-29-538""",18.77,38.830002,-7.649994,60.259995,31941,0,67.910004,"""WR""",1.88,90.699997,0.191,,,,,,,,,,0.275736,0.193015,,,,,,,,,,,,,
"""31941-29-538""",18.83,38.700001,-8.690002,60.110001,31941,0,68.800003,"""WR""",1.88,90.699997,0.191,0.143179,1.43179,0.599995,-1.300011,-0.02618,-0.181516,0.155336,54.419514,-117.910965,0.275736,0.193015,129.863312,-0.007219,0.029982,,,,,,,,,,
"""31941-29-538""",18.9,38.59,-9.540009,59.820007,31941,0,69.360001,"""WR""",1.88,90.699997,0.191,0.130384,1.303844,0.699997,-1.100006,-0.050613,-0.148354,0.097741,63.489719,-99.770554,0.275736,0.193015,118.258644,-0.013956,0.018866,9.070206,18.140411,20.281595,-0.067371,-0.111166,,,,,
"""31941-29-538""",19.0,38.48,-10.449997,59.550003,31941,0,70.0,"""WR""",1.88,90.699997,0.191,0.148661,1.486614,1.000004,-1.100006,-0.047125,-0.158823,0.111699,90.70034,-99.770554,0.275736,0.193015,134.835876,-0.012994,0.021559,27.210621,0.0,27.210621,0.009618,0.026939,,,,,
"""31941-29-538""",19.09,38.380001,-11.399994,58.889999,31941,0,70.290001,"""WR""",1.88,90.699997,0.191,0.134535,1.345352,0.900002,-0.999985,-0.115193,-0.165806,0.050612,81.630135,-90.698616,0.275736,0.193015,122.023438,-0.031763,0.009769,-9.070206,9.071938,12.828432,-0.187689,-0.117905,,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""31941-29-538""",35.98,33.360001,-64.160004,-112.910004,31941,32,48.75,"""WR""",1.88,90.699997,0.191,1.392049,13.920492,-5.699997,12.700005,-2.771585,2.961823,5.733408,-516.989685,1151.890381,0.275736,0.193015,1262.588501,-0.764225,1.106633,-11954.257812,-2648.439941,12244.121094,69.097771,-18.235016,,,,,
"""31941-29-538""",21.18,42.93,105.580017,-169.769989,31941,32,84.650002,"""WR""",1.88,90.699997,0.191,17.624554,176.245544,-148.0,95.699997,-9.923939,29.625225,39.549164,-13423.599609,8679.989258,0.275736,0.193015,15985.469727,-2.736383,7.633578,-12906.610352,7528.098633,14941.648438,-19.721582,65.269455,,,,,
"""31941-29-538""",20.01,42.91,120.039978,176.529999,31941,32,56.490002,"""WR""",1.88,90.699997,0.191,1.170171,11.70171,-11.700001,-0.200005,60.44075,2.523739,57.917011,-1061.190063,-18.140415,0.275736,0.193015,1061.345093,16.665665,11.178846,12362.40918,-8698.129883,15115.774414,194.020477,35.452682,,,,,
"""31941-29-538""",36.740002,31.18,-62.479996,-165.119995,31941,32,102.639999,"""WR""",1.88,90.699997,0.191,20.43247,204.324707,167.300018,-117.299995,-59.629177,-31.855745,27.773432,15174.111328,-10639.109375,0.275736,0.193015,18532.25,-16.441885,5.360686,16235.301758,-10620.96875,19400.773438,-331.0755,-58.181602,,,,,


# Concussion Opponent Tracking Data with Player Data 

In [5]:
from QuantitativeCleaner import *

In [6]:
review = clean_review()

Memory usage of dataframe is 0.00 MB
Memory usage after optimization is: 0.00 MB
Decreased by 36.9%


In [7]:
create_concussion_review_df(review)

Processed and saved: F:/Data/Processing_data/OpponentPlays.parquet


In [5]:
import polars as pl

In [8]:
pl.read_parquet("F:/Data/Processing_data/OpponentPlays.parquet").head()

PlayKey,x,y,o,dir,GSISID,time,Angle_Diff,Position,Height_m,Weight_kg,Chest_rad_m,Displacement,Speed,vx,vy,omega_dir,omega_o,omega_diff,px,py,moment,moment_upper,p_magnitude,L_dir,L_diff,Jx,Jy,J_magnitude,torque,torque_internal,Player_Activity_Derived,Primary_Impact_Type,Primary_Partner_GSISID,Primary_Partner_Activity_Derived,OpponentKey
str,f32,f32,f32,f32,i32,f32,f32,cat,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,cat,cat,str,cat,str
"""31023-29-538""",32.950001,38.700001,-117.579987,85.380005,31023,0.0,157.039993,"""WR""",1.88,90.699997,0.191,,,,,,,,,,0.275736,0.193015,,,,,,,,,"""Tackling""","""Helmet-to-body""","""31941""","""Tackled""","""31941-29-538"""
"""31023-29-538""",32.98,38.720001,-119.970001,105.070007,31023,0.1,134.960007,"""WR""",1.88,90.699997,0.191,0.036055,0.360547,0.299988,0.200005,3.436552,-0.417137,3.853689,27.208893,18.140415,0.275736,0.193015,32.70166,0.94758,0.743819,,,,,,"""Tackling""","""Helmet-to-body""","""31941""","""Tackled""","""31941-29-538"""
"""31023-29-538""",32.98,38.709999,-122.320007,90.539978,31023,0.2,147.139999,"""WR""",1.88,90.699997,0.191,0.010002,0.100021,0.0,-0.100021,-2.535969,-0.410154,2.125815,0.0,-9.071938,0.275736,0.193015,9.071938,-0.699257,0.410314,-27.208893,-27.212353,38.481632,-16.468365,-3.335054,"""Tackling""","""Helmet-to-body""","""31941""","""Tackled""","""31941-29-538"""
"""31023-29-538""",32.93,38.669998,-124.540009,48.880005,31023,0.3,173.419998,"""WR""",1.88,90.699997,0.191,0.064031,0.640312,-0.499992,-0.400009,-7.271037,-0.387464,6.883573,-45.349308,-36.28083,0.275736,0.193015,58.076317,-2.004883,1.328632,-45.349308,-27.208893,52.885574,-13.056264,9.183182,"""Tackling""","""Helmet-to-body""","""31941""","""Tackled""","""31941-29-538"""
"""31023-29-538""",32.900002,38.619999,-126.799988,40.029999,31023,0.4,166.830002,"""WR""",1.88,90.699997,0.191,0.058308,0.583082,-0.299988,-0.499992,-1.544617,-0.39444,1.150178,-27.208893,-45.349308,0.275736,0.193015,52.885574,-0.425906,0.222001,18.140415,-9.068478,20.280827,15.789774,-11.066308,"""Tackling""","""Helmet-to-body""","""31941""","""Tackled""","""31941-29-538"""


# Concussion Summary 

In [7]:
qual_path = "F:/Data/Processing_data/QualitativeConcussions.parquet"
qual = pl.read_parquet(qual_path)
qual.head()

PlayKey,GSISID,GameKey,PlayID,Position,Number,Role,Game_Date,YardLine,Quarter,Play_Type,Poss_Team,Game_Site,Start_Time,HomeTeamCode,VisitTeamCode,StadiumType,FieldType,Weather,Temperature,Player_Activity_Derived,Primary_Impact_Type,Primary_Partner_Activity_Derived,Primary_Partner_GSISID,OpponentKey,IsInjured,Home_Score,Visiting_Score,Score_Difference
str,i32,i16,i16,cat,cat,cat,cat,cat,i8,cat,cat,cat,cat,cat,cat,cat,cat,cat,f32,cat,cat,cat,i32,cat,i8,i8,i8,i8
"""32189-12-3632""",32189,12,3632,"""LB""","""97""","""PDR2""","""08/11/2016""","""NE 24""",4,"""Punt""","""NE""","""Foxborough""","""19:30""","""NE""","""NO""","""Outdoor""","""Synthetic""","""Hazy/Fog""",84.0,"""No_Injury""","""No_Injury""","""No_Injury""",0,"""None""",0,24,22,2
"""31208-59-574""",31208,59,574,"""DE""","""57""","""PDL3""","""09/01/2016""","""BLT 11""",1,"""Punt""","""BLT""","""New Orleans""","""19:00""","""NO""","""BLT""","""Indoor""","""Synthetic""","""Cloudy""",92.0,"""No_Injury""","""No_Injury""","""No_Injury""",0,"""None""",0,3,0,3
"""28932-619-3026""",28932,619,3026,"""ILB""","""54""","""PLG""","""12/17/2017""","""PIT 34""",4,"""Punt""","""PIT""","""Pittsburgh""","""16:25""","""PIT""","""NE""","""Outdoor""","""Natural""","""Cloudy""",42.0,"""No_Injury""","""No_Injury""","""No_Injury""",0,"""None""",0,24,16,8
"""31940-286-2097""",31940,286,2097,"""DE""","""96""","""PDL4""","""12/18/2016""","""DEN 33""",3,"""Punt""","""DEN""","""Denver""","""14:25""","""DEN""","""NE""","""Outdoor""","""Natural""","""Clear""",18.0,"""No_Injury""","""No_Injury""","""No_Injury""",0,"""None""",0,3,10,-7
"""32059-533-2862""",32059,533,2862,"""CB""","""26""","""VL""","""11/12/2017""","""BUF 30""",3,"""Punt""","""BUF""","""Orchard Park""","""13:00""","""BUF""","""NO""","""Outdoor""","""Synthetic""","""Cloudy""",42.0,"""No_Injury""","""No_Injury""","""No_Injury""",0,"""None""",0,3,30,-27


Need to drop GSISID, GameKey, PlayID, Number, Game_Date, YardLine, Quarter, Start_Time from Qualitative when merging into Summary

In [9]:
qual.drop(['GSISID', 'GameKey', 'PlayID', 'Number', 'Game_Date', 'YardLine', 'Quarter', 'Start_Time']).head()

PlayKey,Position,Role,Play_Type,Poss_Team,Game_Site,HomeTeamCode,VisitTeamCode,StadiumType,FieldType,Weather,Temperature,Player_Activity_Derived,Primary_Impact_Type,Primary_Partner_Activity_Derived,Primary_Partner_GSISID,OpponentKey,IsInjured,Home_Score,Visiting_Score,Score_Difference
str,cat,cat,cat,cat,cat,cat,cat,cat,cat,cat,f32,cat,cat,cat,i32,cat,i8,i8,i8,i8
"""32189-12-3632""","""LB""","""PDR2""","""Punt""","""NE""","""Foxborough""","""NE""","""NO""","""Outdoor""","""Synthetic""","""Hazy/Fog""",84.0,"""No_Injury""","""No_Injury""","""No_Injury""",0,"""None""",0,24,22,2
"""31208-59-574""","""DE""","""PDL3""","""Punt""","""BLT""","""New Orleans""","""NO""","""BLT""","""Indoor""","""Synthetic""","""Cloudy""",92.0,"""No_Injury""","""No_Injury""","""No_Injury""",0,"""None""",0,3,0,3
"""28932-619-3026""","""ILB""","""PLG""","""Punt""","""PIT""","""Pittsburgh""","""PIT""","""NE""","""Outdoor""","""Natural""","""Cloudy""",42.0,"""No_Injury""","""No_Injury""","""No_Injury""",0,"""None""",0,24,16,8
"""31940-286-2097""","""DE""","""PDL4""","""Punt""","""DEN""","""Denver""","""DEN""","""NE""","""Outdoor""","""Natural""","""Clear""",18.0,"""No_Injury""","""No_Injury""","""No_Injury""",0,"""None""",0,3,10,-7
"""32059-533-2862""","""CB""","""VL""","""Punt""","""BUF""","""Orchard Park""","""BUF""","""NO""","""Outdoor""","""Synthetic""","""Cloudy""",42.0,"""No_Injury""","""No_Injury""","""No_Injury""",0,"""None""",0,3,30,-27


In [15]:
def collect_concussion_summaries(group_dir="F:/Data/Processing_data/concussion_output"):
    import polars as pl
    
    import os

    # Initialize an empty list for the dataframes
    summary_dfs = []

    # Iterate through files in the directory
    for file in os.listdir(group_dir):
        if file.startswith("NGS-"):
            file_path = os.path.join(group_dir, file)
            
            # Read the Parquet file
            df = pl.read_parquet(file_path)
            
            # Apply the summary_calculator function
            temp_df = summary_calculator(df)
            
            # Append to the list of summary dataframes
            summary_dfs.append(temp_df)

    # Concatenate all summary dataframes
    summary_df = pl.concat(summary_dfs)  

    return summary_df

In [16]:
def concussion_summary_maker(group_dir="F:/Data/Processing_data/concussion_output"): 
    """
    Joins the qualitative and quantitative summary data from the concussion sets
    """
    import polars as pl
    pl.enable_string_cache()

    # Read
    qual_path = "F:/Data/Processing_data/QualitativeConcussions.parquet"
    
    #Write    
    qual_quant_path = "F:/Data/Processing_data/Full_Summary_Concussions.parquet"

    quant = collect_concussion_summaries(group_dir)
    quals = pl.read_parquet(qual_path).drop(['GSISID', 'GameKey', 'PlayID', 'Number', 'Game_Date', 'YardLine', 'Quarter', 'Start_Time'])

    qual_quant = quals.join(quant, on="PlayKey", how="inner")

    qual_quant.write_parquet(qual_quant_path)
    print(f"Saved the full summary with qualitative and quantitative features at {qual_quant_path}")    

In [17]:
def summary_calculator(df):
    """
    Collects dispalcement and distance, means and maxima for the for each of the parameters collected
    and outputs to a quantitative summary table that can be joined to the qualitative table for machine learning.  
    """
    import polars as pl # type: ignore

    result = df.select([
        "PlayKey"
        , pl.col("Position")
        , pl.col("Displacement").sum().over("PlayKey").alias("Distance")
        , pl.col("x").first().over("PlayKey").alias("start_x")
        , pl.col("y").first().over("PlayKey").alias("start_y")
        , pl.col("x").last().over("PlayKey").alias("end_x")
        , pl.col("y").last().over("PlayKey").alias("end_y")
        , pl.col("Angle_Diff").max().over("PlayKey").alias("Max_Angle_Diff")
        , pl.col("Angle_Diff").mean().over("PlayKey").alias("Mean_Angle_Diff")
        , pl.col("Speed").max().over("PlayKey").alias("Max_Speed")
        , pl.col("Speed").mean().over("PlayKey").alias("Mean_Speed")
        , pl.col("J_magnitude").max().over("PlayKey").alias("Max_Impulse")
        , pl.col("J_magnitude").mean().over("PlayKey").alias("Mean_Impulse")
        , pl.col("torque").max().over("PlayKey").alias("Max_Torque")
        , pl.col("torque").mean().over("PlayKey").alias("Mean_Torque")
        , pl.col("torque_internal").max().over("PlayKey").alias("Max_Int_Torque")
        , pl.col("torque_internal").mean().over("PlayKey").alias("Mean_Int_Torque")

        ]).unique(subset=["PlayKey"])


    # Calculate the displacement
    result = result.with_columns([
        (((pl.col("end_x") - pl.col("start_x"))**2 + 
          (pl.col("end_y") - pl.col("start_y"))**2)**0.5)
        .alias("Displacement")
        ]).with_columns([
            (pl.col("Distance") - pl.col("Displacement")).alias("Path_Diff")
        ])

     
    # Select only the required columns
    result = result.select([
        'PlayKey'
        , 'Position'
        , 'Distance'
        , 'Displacement'
        , 'Path_Diff'
        , 'Max_Angle_Diff'
        , 'Mean_Angle_Diff'
        , 'Max_Speed'
        , 'Mean_Speed'
        , 'Max_Impulse'
        , 'Mean_Impulse'
        , 'Max_Torque'
        , 'Mean_Torque'
        , 'Max_Int_Torque'
        , 'Mean_Int_Torque'
      
    ]).sort("PlayKey")


    return result

In [18]:
group_dir="F:/Data/Processing_data/concussion_output"

concussion_summary_maker(group_dir)

Saved the full summary with qualitative and quantitative features at F:/Data/Processing_data/Full_Summary_Concussions.parquet


In [19]:
pl.read_parquet("F:/Data/Processing_data/Full_Summary_Concussions.parquet").head()

PlayKey,Position,Role,Play_Type,Poss_Team,Game_Site,HomeTeamCode,VisitTeamCode,StadiumType,FieldType,Weather,Temperature,Player_Activity_Derived,Primary_Impact_Type,Primary_Partner_Activity_Derived,Primary_Partner_GSISID,OpponentKey,IsInjured,Home_Score,Visiting_Score,Score_Difference,Position_right,Distance,Displacement,Path_Diff,Max_Angle_Diff,Mean_Angle_Diff,Max_Speed,Mean_Speed,Max_Impulse,Mean_Impulse,Max_Torque,Mean_Torque,Max_Int_Torque,Mean_Int_Torque
str,cat,cat,cat,cat,cat,cat,cat,cat,cat,cat,f32,cat,cat,cat,i32,cat,i8,i8,i8,i8,cat,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
"""28932-619-3026""","""ILB""","""PLG""","""Punt""","""PIT""","""Pittsburgh""","""PIT""","""NE""","""Outdoor""","""Natural""","""Cloudy""",42.0,"""No_Injury""","""No_Injury""","""No_Injury""",0,"""None""",0,24,16,8,"""ILB""",1059.546265,4.110486,1055.435791,179.690002,81.73243,438.940918,33.743496,87072.65625,7072.790527,181.551453,-0.000373,136.719818,0.00154
"""31940-286-2097""","""DE""","""PDL4""","""Punt""","""DEN""","""Denver""","""DEN""","""NE""","""Outdoor""","""Natural""","""Clear""",18.0,"""No_Injury""","""No_Injury""","""No_Injury""",0,"""None""",0,3,10,-7,"""DE""",657.908997,40.652538,617.25647,179.880005,88.194229,433.154724,11.481838,52065.191406,2765.052979,229.050446,0.0,188.242416,0.0
"""32059-533-2862""","""CB""","""VL""","""Punt""","""BUF""","""Orchard Park""","""BUF""","""NO""","""Outdoor""","""Synthetic""","""Cloudy""",42.0,"""No_Injury""","""No_Injury""","""No_Injury""",0,"""None""",0,3,30,-27,"""CB""",794.471741,39.591827,754.879883,177.380005,93.184357,415.684631,21.647732,36330.835938,3794.362793,161.900818,0.0,122.014114,0.0
"""32012-337-411""","""WR""","""PDR3""","""Punt""","""BUF""","""Orchard Park""","""BUF""","""MIN""","""Outdoor""","""Synthetic""","""Cloudy""",82.0,"""No_Injury""","""No_Injury""","""No_Injury""",0,"""None""",0,0,0,0,"""WR""",1515.785522,42.873726,1472.911743,179.979996,53.64418,479.613007,54.721508,43526.503906,9383.631836,337.861084,0.012729,146.058716,0.007385
"""32725-550-2460""","""TE""","""PRW""","""Punt""","""HST""","""Houston""","""HST""","""ARZ""","""Indoor""","""Synthetic""","""Clear""",60.0,"""No_Injury""","""No_Injury""","""No_Injury""",0,"""None""",0,17,14,3,"""TE""",2147.256348,26.946266,2120.310059,179.970001,83.138863,532.389038,38.275501,106859.875,8476.088867,434.9646,0.101166,179.966187,0.034888


# Create a tracking dataset for Tableau for paths and injuries

In [9]:
pl.read_parquet("F:/Data/Processing_data/OpponentPlays.parquet").filter(pl.col("OpponentKey").is_null())
# cf.filter(pl.col("PlayKey")=="31941-29-538")

PlayKey,x,y,o,dir,GSISID,time,Angle_Diff,Position,Height_m,Weight_kg,Chest_rad_m,Displacement,Speed,vx,vy,omega_dir,omega_o,omega_diff,px,py,moment,moment_upper,p_magnitude,L_dir,L_diff,Jx,Jy,J_magnitude,torque,torque_internal,Player_Activity_Derived,Primary_Impact_Type,Primary_Partner_GSISID,Primary_Partner_Activity_Derived,OpponentKey
str,f32,f32,f32,f32,i32,f32,f32,cat,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,cat,cat,str,cat,str
"""31941-29-538""",18.77,38.830002,-7.649994,60.259995,31941,0.0,67.910004,"""WR""",1.88,90.699997,0.191,,,,,,,,,,0.275736,0.193015,,,,,,,,,,,,,
"""31941-29-538""",18.83,38.700001,-8.690002,60.110001,31941,0.1,68.800003,"""WR""",1.88,90.699997,0.191,0.143179,1.43179,0.599995,-1.300011,-0.02618,-0.181516,0.155336,54.419514,-117.910965,0.275736,0.193015,129.863312,-0.007219,0.029982,,,,,,,,,,
"""31941-29-538""",18.9,38.59,-9.540009,59.820007,31941,0.2,69.360001,"""WR""",1.88,90.699997,0.191,0.130384,1.303844,0.699997,-1.100006,-0.050613,-0.148354,0.097741,63.489719,-99.770554,0.275736,0.193015,118.258644,-0.013956,0.018866,9.070206,18.140411,20.281595,-0.067371,-0.111166,,,,,
"""31941-29-538""",19.0,38.48,-10.449997,59.550003,31941,0.3,70.0,"""WR""",1.88,90.699997,0.191,0.148661,1.486614,1.000004,-1.100006,-0.047125,-0.158823,0.111699,90.70034,-99.770554,0.275736,0.193015,134.835876,-0.012994,0.021559,27.210621,0.0,27.210621,0.009618,0.026939,,,,,
"""31941-29-538""",19.09,38.380001,-11.399994,58.889999,31941,0.4,70.290001,"""WR""",1.88,90.699997,0.191,0.134535,1.345352,0.900002,-0.999985,-0.115193,-0.165806,0.050612,81.630135,-90.698616,0.275736,0.193015,122.023438,-0.031763,0.009769,-9.070206,9.071938,12.828432,-0.187689,-0.117905,,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""27060-506-1988""",34.84,30.34,-91.309998,179.299988,27060,44.900002,89.389999,"""FS""",1.84,95.900002,0.191,0.926121,9.261209,8.400002,-3.899994,6.834705,-0.205948,7.040653,805.560181,-374.00943,0.291544,0.204081,888.149902,1.992617,1.436862,7269.22168,16418.082031,17955.361328,15.631561,-83.280373,"""Tackled""","""Helmet-to-helmet""",,"""Tackling""",
"""27060-506-1988""",35.369999,30.59,-89.260002,-43.720001,27060,45.0,45.540001,"""FS""",1.84,95.900002,0.191,0.586002,5.860023,5.299988,2.5,-38.924332,0.357792,39.282124,508.268829,239.75,0.291544,0.204081,561.976196,-11.348155,8.016727,-297.291351,613.759399,681.969788,-133.40773,65.798653,"""Tackled""","""Helmet-to-helmet""",,"""Tackling""",
"""27060-506-1988""",38.360001,39.779999,-125.679993,-127.579987,27060,45.099998,1.9,"""FS""",1.84,95.900002,0.191,9.66417,96.641701,29.900017,91.899986,-14.636331,-6.356488,8.279842,2867.411621,8813.208984,0.291544,0.204081,9267.939453,-4.267134,1.689757,2359.142822,8573.458984,8892.117188,70.810211,-63.269707,"""Tackled""","""Helmet-to-helmet""",,"""Tackling""",
"""27060-506-1988""",36.759998,42.990002,-126.98999,-78.389999,27060,45.200001,48.599998,"""FS""",1.84,95.900002,0.191,3.586659,35.866592,-16.000023,32.100029,8.585273,-0.228636,8.81391,-1534.402222,3078.392822,0.291544,0.204081,3439.606445,2.502985,1.79875,-4401.813965,-5734.816406,7229.390625,67.701187,1.089929,"""Tackled""","""Helmet-to-helmet""",,"""Tackling""",


In [10]:
pl.read_parquet("F:/Data/Processing_data/TrackingInjuries.parquet").head(3)

PlayKey,time,x,y,dir,o,Angle_Diff,Displacement,Speed,vx,vy,omega_dir,omega_o,omega_diff,Position,Height_m,Weight_kg,Chest_rad_m,px,py,moment,moment_upper,p_magnitude,L_dir,L_diff,Jx,Jy,J_magnitude,torque,torque_internal
str,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,cat,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
"""39678-2-1""",0.0,70.110001,30.780001,21.980011,-9.050003,31.030001,,,,,,,,"""DE""",1.97,120.199997,0.191,,,0.365418,0.255793,,,,,,,,
"""39678-2-1""",0.1,70.139999,30.76,26.519989,-15.600006,42.119999,0.036055,0.360547,0.299988,-0.200005,0.792376,-1.143191,1.935567,"""DE""",1.97,120.199997,0.191,36.058533,-24.04055,0.365418,0.255793,43.337811,0.289548,0.495104,,,,,
"""39678-2-1""",0.2,70.18,30.73,30.559998,-21.419998,51.98,0.050001,0.500011,0.400009,-0.300007,0.705114,-1.01578,1.720895,"""DE""",1.97,120.199997,0.191,48.0811,-36.060825,0.365418,0.255793,60.101376,0.257661,0.440192,12.022568,-12.020275,17.000856,-0.318868,-0.549116


Next steps:
1. Reorder the columns in both tables
2. Add missing columns to the second table
3. Concatenate the tables

In [11]:
injuries = pl.read_parquet("F:/Data/Processing_data/TrackingInjuries.parquet")
concussions = pl.read_parquet("F:/Data/Processing_data/OpponentPlays.parquet")

In [24]:
# Step 1: Reorder columns in both tables
trackinginjuries_path = "F:/Data/Processing_data/TrackingInjuries.parquet"
opponentplays_path = "F:/Data/Processing_data/OpponentPlays.parquet"

def track_all_quant(trackinginjuries_path, opponentplays_path):
    """ 
    Concatenates the two tracking dataframes for Viz creation with all injuries. 
    This additionally adds an InjuryType column for concussions and other. 
    """
    from DataHandler import data_loader
    import polars as pl #type: ignore

    #Write 
    output_path = "F:/Data/Processing_data/All_Tracking.parquet"

    body_part = data_loader('injuries').select(['PlayKey', 'BodyPart']).filter(pl.col("PlayKey").is_not_null())
    concussions = pl.read_parquet(opponentplays_path)
    injuries = pl.read_parquet(trackinginjuries_path)


    concussions = concussions.with_columns(
        pl.when(pl.col("OpponentKey").is_not_null())
            .then(pl.lit("Concussion"))
            .otherwise(pl.lit("No Injury"))
            .alias("InjuryType")
            )

    injuries = injuries.join(
        body_part
        , on='PlayKey'
        , how='left' 
        ).rename({"BodyPart": "InjuryType"})

    common_columns = ["PlayKey"
                    , "time"
                    , "x"
                    , "y"
                    , "dir"
                    , "o"
                    , "Angle_Diff"
                    , "Displacement"
                    , "Speed"
                    , "vx"
                    , "vy"
                    , "omega_dir"
                    , "omega_o"
                    , "omega_diff"
                    , "Position"
                    , "Height_m"
                    , "Weight_kg"
                    , "Chest_rad_m"
                    , "px"
                    , "py"
                    , "moment"
                    , "moment_upper"
                    , "p_magnitude"
                    , "L_dir"
                    , "L_diff"
                    , "Jx"
                    , "Jy"
                    , "J_magnitude"
                    , "torque"
                    , "torque_internal"
                    , "InjuryType"
                    ]

    additional_columns = ["GSISID"
                        , "Player_Activity_Derived"
                        , "Primary_Impact_Type"
                        , "Primary_Partner_GSISID"
                        , "Primary_Partner_Activity_Derived"
                        , "OpponentKey"
                        ]

    concussions = concussions.select(common_columns + additional_columns)

    for col in additional_columns:
        if col not in injuries.columns:
            injuries = injuries.with_columns(pl.lit(None).alias(col))

    injuries = injuries.select(common_columns + additional_columns)

    combined_df = pl.concat([concussions, injuries])
    combined_df = combined_df.sort(["PlayKey", "time"])

    combined_df.write_parquet(output_path)

    print(f"Concatenated all Injury and Concussion tracking data to {output_path}")

In [25]:
track_all_quant(trackinginjuries_path, opponentplays_path)

Concatenated all Injury and Concussion tracking data to F:/Data/Processing_data/All_Tracking.parquet


In [26]:
pl.read_parquet("F:/Data/Processing_data/All_Tracking.parquet")

PlayKey,time,x,y,dir,o,Angle_Diff,Displacement,Speed,vx,vy,omega_dir,omega_o,omega_diff,Position,Height_m,Weight_kg,Chest_rad_m,px,py,moment,moment_upper,p_magnitude,L_dir,L_diff,Jx,Jy,J_magnitude,torque,torque_internal,InjuryType,GSISID,Player_Activity_Derived,Primary_Impact_Type,Primary_Partner_GSISID,Primary_Partner_Activity_Derived,OpponentKey
str,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,cat,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,str,i32,cat,cat,str,cat,str
"""23564-266-2902""",0.0,87.07,12.2,-77.860001,-142.519989,64.660004,,,,,,,,"""RB""",1.79,95.300003,0.191,,,0.28972,0.202804,,,,,,,,,"""Concussion""",23564,"""Tackled""","""Helmet-to-body""","""31844""","""Tackling""","""31844-266-2902"""
"""23564-266-2902""",0.1,87.07,12.2,-77.739998,-146.390015,68.650002,0.0,0.0,0.0,0.0,0.020944,-0.675447,0.696391,"""RB""",1.79,95.300003,0.191,0.0,0.0,0.28972,0.202804,0.0,0.006068,0.141231,,,,,,"""Concussion""",23564,"""Tackled""","""Helmet-to-body""","""31844""","""Tackling""","""31844-266-2902"""
"""23564-266-2902""",0.2,87.07,12.2,-78.099998,-135.769989,57.669998,0.0,0.0,0.0,0.0,-0.062832,1.853545,1.916376,"""RB""",1.79,95.300003,0.191,0.0,0.0,0.28972,0.202804,0.0,-0.018204,0.388649,0.0,0.0,0.0,-0.242714,2.474179,"""Concussion""",23564,"""Tackled""","""Helmet-to-body""","""31844""","""Tackling""","""31844-266-2902"""
"""23564-266-2902""",0.3,87.07,12.2,-77.979996,-138.929993,60.950001,0.0,0.0,0.0,0.0,0.020945,-0.551524,0.572469,"""RB""",1.79,95.300003,0.191,0.0,0.0,0.28972,0.202804,0.0,0.006068,0.116099,0.0,0.0,0.0,0.242718,-2.725497,"""Concussion""",23564,"""Tackled""","""Helmet-to-body""","""31844""","""Tackling""","""31844-266-2902"""
"""23564-266-2902""",0.4,87.199997,12.3,-161.570007,0.0,161.570007,0.16401,1.640103,1.299973,1.000004,-14.58921,24.247858,38.837067,"""RB""",1.79,95.300003,0.191,123.88739,95.300369,0.28972,0.202804,156.301773,-4.226785,7.876311,123.88739,95.300369,156.301773,-42.328533,77.602127,"""Concussion""",23564,"""Tackled""","""Helmet-to-body""","""31844""","""Tackling""","""31844-266-2902"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""47813-8-19""",19.6,76.400002,25.01,-95.679993,-102.23999,6.56,0.372157,3.721568,-0.400009,3.700008,-0.167551,-0.289725,0.122174,"""CB""",1.82,87.400002,0.191,-34.9608,323.380737,0.265703,0.185992,325.265045,-0.044519,0.022723,-8.741867,8.741852,12.362856,-0.023182,0.162306,"""Ankle""",,,,,,
"""47813-8-19""",19.700001,76.360001,25.389999,-96.179993,-104.089996,7.91,0.382099,3.820987,-0.400009,3.799992,-0.087265,-0.322887,0.235622,"""CB""",1.82,87.400002,0.191,-34.9608,332.119263,0.265703,0.185992,333.954285,-0.023187,0.043824,0.0,8.738525,8.738525,0.213323,0.211004,"""Ankle""",,,,,,
"""47813-8-19""",19.799999,76.32,25.76,-96.230011,-104.089996,7.86,0.372157,3.721568,-0.400009,3.700008,-0.00873,0.0,0.00873,"""CB""",1.82,87.400002,0.191,-34.9608,323.380737,0.265703,0.185992,325.265045,-0.00232,0.001624,0.0,-8.738525,8.738525,0.20867,-0.422002,"""Ankle""",,,,,,
"""47813-8-19""",19.9,76.290001,26.15,-95.929993,-100.769989,4.84,0.391151,3.911514,-0.299988,3.899994,0.052363,0.57945,0.527087,"""CB""",1.82,87.400002,0.191,-26.218933,340.859467,0.265703,0.185992,341.866364,0.013913,0.098034,8.741867,17.478729,19.542933,0.162324,0.964106,"""Ankle""",,,,,,


In [30]:
from DataHandler import data_loader
body_part = data_loader('injuries')

In [34]:
len(body_part.filter(pl.col("PlayKey").is_not_null()))

77

In [49]:
body_part = body_part.select(['PlayKey', 'BodyPart']).filter(pl.col("PlayKey").is_not_null())

In [50]:
body_part.head()

PlayKey,BodyPart
str,str
"""39873-4-32""","""Knee"""
"""46074-7-26""","""Knee"""
"""36557-1-70""","""Ankle"""
"""46646-3-30""","""Ankle"""
"""43532-5-69""","""Ankle"""


In [38]:
inj = pl.read_csv("F:/Data/nfl-playing-surface-analytics/InjuryRecord.csv")

In [39]:
inj.head()

PlayerKey,GameID,PlayKey,BodyPart,Surface,DM_M1,DM_M7,DM_M28,DM_M42
i64,str,str,str,str,i64,i64,i64,i64
39873,"""39873-4""","""39873-4-32""","""Knee""","""Synthetic""",1,1,1,1
46074,"""46074-7""","""46074-7-26""","""Knee""","""Natural""",1,1,0,0
36557,"""36557-1""","""36557-1-70""","""Ankle""","""Synthetic""",1,1,1,1
46646,"""46646-3""","""46646-3-30""","""Ankle""","""Natural""",1,0,0,0
43532,"""43532-5""","""43532-5-69""","""Ankle""","""Synthetic""",1,1,1,1


In [42]:
inj["GameID"].unique().sort()

GameID
str
"""31070-3"""
"""31933-20"""
"""33337-2"""
"""33337-8"""
"""33474-19"""
…
"""47287-4"""
"""47307-10"""
"""47334-8"""
"""47382-3"""


In [43]:
# Assuming your data is in a DataFrame called 'inj'

# Step 1: Count occurrences of each GameID
game_id_counts = inj.group_by("GameID").agg(pl.count("GameID").alias("count"))

# Step 2: Filter for GameIDs that appear more than once
non_unique_game_ids = game_id_counts.filter(pl.col("count") > 1)["GameID"]

# Step 3: Select rows from the original DataFrame where GameID is in the non-unique list
result = inj.filter(pl.col("GameID").is_in(non_unique_game_ids))

print(result)

shape: (2, 9)
┌───────────┬──────────┬─────────────┬──────────┬───┬───────┬───────┬────────┬────────┐
│ PlayerKey ┆ GameID   ┆ PlayKey     ┆ BodyPart ┆ … ┆ DM_M1 ┆ DM_M7 ┆ DM_M28 ┆ DM_M42 │
│ ---       ┆ ---      ┆ ---         ┆ ---      ┆   ┆ ---   ┆ ---   ┆ ---    ┆ ---    │
│ i64       ┆ str      ┆ str         ┆ str      ┆   ┆ i64   ┆ i64   ┆ i64    ┆ i64    │
╞═══════════╪══════════╪═════════════╪══════════╪═══╪═══════╪═══════╪════════╪════════╡
│ 47307     ┆ 47307-10 ┆ 47307-10-18 ┆ Knee     ┆ … ┆ 1     ┆ 1     ┆ 0      ┆ 0      │
│ 47307     ┆ 47307-10 ┆ 47307-10-18 ┆ Ankle    ┆ … ┆ 1     ┆ 1     ┆ 0      ┆ 0      │
└───────────┴──────────┴─────────────┴──────────┴───┴───────┴───────┴────────┴────────┘


In [48]:
len(body_part) - len(body_part.filter(pl.col('PlayKey').is_not_null()))

28

There is one play where the player was injured in two places. However, there are also 28 injuries that were recorded, however the exact play during the game is unknown. The rest of the qualitative conditions are known, but we won't know the path, since we won't know the play. These can be removed from this list, but I will need to create a PlayKey value by appending '-0' to the null Playkeys in Injury. I'll have to fix the join by extracing the GameID for qualitative data. 

In [46]:
mask = pl.col("PlayKey").str.slice(-1) == "-0"

# Filter the DataFrame to show only rows where the last digit is 0
result = injuries.filter(mask)

print(result)

# Alternatively, if you just want to count how many rows have PlayKey ending with 0:
count = injuries.filter(mask).shape[0]
print(f"Number of rows where PlayKey ends with 0: {count}")

shape: (0, 30)
┌─────────┬──────┬─────┬─────┬───┬─────┬─────────────┬────────┬─────────────────┐
│ PlayKey ┆ time ┆ x   ┆ y   ┆ … ┆ Jy  ┆ J_magnitude ┆ torque ┆ torque_internal │
│ ---     ┆ ---  ┆ --- ┆ --- ┆   ┆ --- ┆ ---         ┆ ---    ┆ ---             │
│ str     ┆ f32  ┆ f32 ┆ f32 ┆   ┆ f32 ┆ f32         ┆ f32    ┆ f32             │
╞═════════╪══════╪═════╪═════╪═══╪═════╪═════════════╪════════╪═════════════════╡
└─────────┴──────┴─────┴─────┴───┴─────┴─────────────┴────────┴─────────────────┘
Number of rows where PlayKey ends with 0: 0


In [61]:
trackinginjuries_path = "F:/Data/Processing_data/TrackingInjuries.parquet"
opponentplays_path = "F:/Data/Processing_data/OpponentPlays.parquet"

body_part = data_loader('injuries').select(['PlayKey', 'BodyPart']).filter(pl.col("PlayKey").is_not_null())
concussions = pl.read_parquet(opponentplays_path)
injuries = pl.read_parquet(trackinginjuries_path)

injuries = injuries.join(
        body_part
        , on='PlayKey'
        , how='left' 
        ).rename({"BodyPart": "InjuryType"})

In [62]:
injuries.head()

PlayKey,time,x,y,dir,o,Angle_Diff,Displacement,Speed,vx,vy,omega_dir,omega_o,omega_diff,Position,Height_m,Weight_kg,Chest_rad_m,px,py,moment,moment_upper,p_magnitude,L_dir,L_diff,Jx,Jy,J_magnitude,torque,torque_internal,InjuryType
str,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,cat,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,str
"""39678-2-1""",0.0,70.110001,30.780001,21.980011,-9.050003,31.030001,,,,,,,,"""DE""",1.97,120.199997,0.191,,,0.365418,0.255793,,,,,,,,,"""Ankle"""
"""39678-2-1""",0.1,70.139999,30.76,26.519989,-15.600006,42.119999,0.036055,0.360547,0.299988,-0.200005,0.792376,-1.143191,1.935567,"""DE""",1.97,120.199997,0.191,36.058533,-24.04055,0.365418,0.255793,43.337811,0.289548,0.495104,,,,,,"""Ankle"""
"""39678-2-1""",0.2,70.18,30.73,30.559998,-21.419998,51.98,0.050001,0.500011,0.400009,-0.300007,0.705114,-1.01578,1.720895,"""DE""",1.97,120.199997,0.191,48.0811,-36.060825,0.365418,0.255793,60.101376,0.257661,0.440192,12.022568,-12.020275,17.000856,-0.318868,-0.549116,"""Ankle"""
"""39678-2-1""",0.3,70.239998,30.700001,33.76001,-25.839996,59.599998,0.067079,0.670793,0.599976,-0.299988,0.558508,-0.771435,1.329943,"""DE""",1.97,120.199997,0.191,72.117065,-36.058533,0.365418,0.255793,80.629333,0.204089,0.34019,24.035965,0.002293,24.035965,-0.535726,-1.000025,"""Ankle"""
"""39678-2-1""",0.4,70.279999,30.66,35.440002,-29.179993,64.620003,0.05657,0.565698,0.400009,-0.400009,0.293214,-0.58294,0.876153,"""DE""",1.97,120.199997,0.191,48.0811,-48.0811,0.365418,0.255793,67.996948,0.107146,0.224114,-24.035965,-12.022568,26.875076,-0.969433,-1.160761,"""Ankle"""
