# NGS ETL

In [1]:
import polars as pl
import sqlalchemy as db
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
import psycopg2

from CleaningFunctions import *
from DataHandler import data_loader, data_shrinker


def calculate_angle_difference(angle1, angle2):
    import numpy as np
    """
    Calculate the smallest angle difference between two angles 
    using trigonometric functions, accounting for edge cases.
    """
    sin_diff = np.sin(np.radians(angle2 - angle1))
    cos_diff = np.cos(np.radians(angle2 - angle1))
    return np.degrees(np.arctan2(sin_diff, cos_diff))

def angle_corrector(df):
    import polars as pl
    """
    Make corrections to angles to reduce fringe errors at 360
    """
    df = df.with_columns([
        ((pl.col("dir") + 180) % 360 - 180).alias("dir")
        , ((pl.col("o") + 180) % 360 - 180).alias("o")
    ]).with_columns(
        (calculate_angle_difference(pl.col("dir"), pl.col("o"))).abs().round(2).alias("Angle_Diff")
        )
    
    return df



def path_calculator(df):
    import polars as pl
    # This provides a summary table that can be integrated with the qualitative data

    # Calculate total distance and displacement for each PlayKey
    # Calculate total distance and displacement for each PlayKey
    result = df.select([
        "PlayKey"
        , pl.col("Displacement").sum().over("PlayKey").alias("Distance")
        , pl.col("x").first().over("PlayKey").alias("start_x")
        , pl.col("y").first().over("PlayKey").alias("start_y")
        , pl.col("x").last().over("PlayKey").alias("end_x")
        , pl.col("y").last().over("PlayKey").alias("end_y")
        , pl.col("Angle_Diff").max().over("PlayKey").alias("Max_Angle_Diff")
        , pl.col("Angle_Diff").mean().over("PlayKey").alias("Mean_Angle_Diff")
        , pl.col("Speed").max().over("PlayKey").alias("Max_Speed")
        , pl.col("Speed").mean().over("PlayKey").alias("Mean_Speed")
        , pl.col("J_magnitude").max().over("PlayKey").alias("Max_Impulse")
        , pl.col("J_magnitude").mean().over("PlayKey").alias("Mean_Impulse")
        , pl.col("torque").max().over("PlayKey").alias("Max_Torque")
        , pl.col("torque").mean().over("PlayKey").alias("Mean_Torque")
        , pl.col("torque_internal").max().over("PlayKey").alias("Max_Int_Torque")
        , pl.col("torque_internal").mean().over("PlayKey").alias("Mean_Int_Torque")

        ]).unique(subset=["PlayKey"])


    # Calculate the displacement
    result = result.with_columns([
        (((pl.col("end_x") - pl.col("start_x"))**2 + 
          (pl.col("end_y") - pl.col("start_y"))**2)**0.5)
        .alias("Displacement")
        ]).with_columns([
            (pl.col("Distance") - pl.col("Displacement")).alias("Path_Diff")
        ])

     
    # Select only the required columns
    result = result.select([
        'PlayKey'
        , 'Distance'
        , 'Displacement'
        , 'Path_Diff'
        , 'Max_Angle_Diff'
        , 'Mean_Angle_Diff'
        , 'Max_Speed'
        , 'Mean_Speed'
        , 'Max_Impulse'
        , 'Mean_Impulse'
        , 'Max_Torque'
        , 'Mean_Torque'
        , 'Max_Int_Torque'
        , 'Mean_Int_Torque'
      
    ]).sort("PlayKey")


    return result

def column_corrector(df):
    import polars as pl
    """
    Add a Play_Time column that acts like the 'time' column did in the injury dataset. 
    Each PlayKey will start at 0.0 and increase by 0.1 for each subsequent record.
    """
    df = df.with_columns([
        pl.concat_str([
            pl.col('gsisid').cast(pl.Int32).cast(pl.Utf8)
            , pl.lit('-')
            , pl.col('gamekey').cast(pl.Utf8)
            , pl.lit('-')
            , pl.col('playid').cast(pl.Utf8)
        ]).alias('PlayKey')
    ])
     
    
    df = df.select([
        'PlayKey'
        , 'time'
        , 'x'
        , 'y'
        , 'o'
        , 'dir'
        , 'gsisid'
        ]).rename({"time":"datetime"})

    df = df.sort(['PlayKey', 'datetime'])

    df = df.with_columns(
        (pl.arange(0, pl.len()) * 0.1).over("PlayKey").alias("time")
        ).with_columns([pl.col('gsisid').cast(pl.Int32)])  
    
    return df



def body_builder(df, df_name):
    body_data = pl.DataFrame({
        "position": ["QB", "RB", "FB", "WR", "TE", "T", "G", "C", "DE", "DT", "NT", "LB", "OLB", "MLB", "CB", "S", "K", "P", "SS", "ILB", "FS", "LS", "DB"]
        # , "Position_Name": ["Quarterback", "Running Back", "Fullback", "Wide Receiver", "Tight End", "Tackle", "Guard", "Center", "Defensive End", "Defensive Tackle", "Nose Tackle", "Linebacker", "Outside Linebacker", "Middle Linebacker", "Cornerback", "Safety", "Kicker", "Punter", "Strong Safety", "Inside Linebacker", "Free Safety", "Long Snapper", "Defensive Back"]
        , "Height_m": [1.91, 1.79, 1.85, 1.88, 1.96, 1.97, 1.90, 1.87, 1.97, 1.92, 1.88, 1.90, 1.90, 1.87, 1.82, 1.84, 1.83, 1.88, 1.84, 1.90, 1.84, 1.88, 1.82]
        , "Weight_kg": [102.1, 95.3, 111.1, 90.7, 114.6, 140.6, 141.8, 136.1, 120.2, 141.8, 152.0, 110.0, 108.9, 113.4, 87.4, 95.9, 92.08, 97.52, 95.9, 110.0, 95.9, 108.86, 87.4]
        , "Chest_rad_m": [0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191]
        })

    valid_df_names = ['ngs_data', 'tracking']
    if df_name not in valid_df_names:
        raise ValueError(f"Invalid dataframe name '{df_name}'. Valid options are: {valid_df_names}")

    try: 
        if df_name == 'ngs_data':
            position = data_loader(dataset='positions', database='nfl_concussion')
            position = position.join(
                body_data
                , left_on='position'
                , right_on='position'
                , how='left'
                )
            
            df = df.join(
                position
                , on='gsisid'
                , how='left'
                ).drop_nulls(subset=['position'])
            

        elif df_name == 'tracking':
            position = data_loader(dataset='play_positions', database='nfl_surface')
            position = position.join(
                body_data
                , left_on='position'
                , right_on='position'
                , how='left'
                )

            df = df.join(
                position
                , left_on='PlayKey'
                , right_on='playkey'
                , how='left'
            ).drop_nulls(subset=['position']).drop(['event'])

            

        return df    
    
    except Exception as e: 
        print(f"An error occurred while loading the dataframe '{df_name}': {e}")
        return None




def velocity_calculator(df):
    import numpy as np
    import polars as pl
    """
    Using the (X,Y) and time columns, perform calculations based on the 
    difference between two rows to find displacement, speed, direction 
    of motion, velocity in x and y components, and the angular velocities 
    of the direction of motion and orientations 
    """
    
    return df.with_columns([
        # Convert 'o' and 'dir' to radians
        (pl.col("o") * np.pi / 180).alias("o_rad"),
        (pl.col("dir") * np.pi / 180).alias("dir_rad")
    ]).with_columns([
        # Pre-calculate shifted values
        pl.col("x").shift(1).over("PlayKey").alias("prev_x")
        , pl.col("y").shift(1).over("PlayKey").alias("prev_y")
        # , pl.col("time").shift(1).over("PlayKey").alias("prev_time")
        , pl.col("dir_rad").shift(1).over("PlayKey").alias("prev_dir")
        , pl.col("o_rad").shift(1).over("PlayKey").alias("prev_o")
    ]).with_columns([
        # Calculate the component displacements 
          (pl.col("x") - pl.col("prev_x")).alias("dx")
        , (pl.col("y") - pl.col("prev_y")).alias("dy")
    ]).with_columns([
        # Calculate displacement
        ((pl.col("dx")**2 + pl.col("dy")**2)**0.5).alias("Displacement")
    ]).with_columns([
        # Calculate speed
        (pl.col("Displacement") / 0.1).alias("Speed")
        # Calculate direction
        , (np.degrees(np.arctan2(pl.col("dx"), pl.col("dy")))).alias("Direction")
        # Calculate velocity components
        , (pl.col("dx") / 0.1).alias("vx")
        , (pl.col("dy") / 0.1).alias("vy")
        # Calculate angular velocities
        , ((pl.col("dir_rad") - pl.col("prev_dir")) / 0.1).alias("omega_dir")
        , ((pl.col("o_rad") - pl.col("prev_o")) / 0.1).alias("omega_o")
    ]).with_columns([
        ((pl.col("omega_dir") - pl.col("omega_o")).abs()).alias("omega_diff")
    ]).drop([
        "prev_x", "prev_y", "prev_dir", "prev_o", "dx", "dy", "o_rad", "dir_rad"
    ])



def impulse_calculator(df):
    import numpy as np
    import polars as pl
    """
    Using the (X,Y) and time columns, perform calculations based on the velocities and changes 
    in velocites along with player mass to get the momentum and impulse, a measure that can 
    be assessed along with medical data related to concussions and injuries
    """
    
    return df.with_columns([
        # Calculate the linear momentum for each instant
        (pl.col('vx') * pl.col('Weight_kg')).alias('px')
        , (pl.col('vy') * pl.col('Weight_kg')).alias('py')

        # Calculate the moment of inertia of a rotating upright body (1/12 mr^2)
        , (1/12 * pl.col('Weight_kg') * (pl.col('Chest_rad_m')**2)).alias('moment')
        
        # Calculate the moment of inertia of the upper body turning upright with respect to waist (70% mass)
        , (1/12 * (pl.col('Weight_kg')*0.7) * (pl.col('Chest_rad_m')**2)).alias('moment_upper')
    
    ]).with_columns([
          # Calculate the magnitude of linear momentum
        ((pl.col("px")**2 + pl.col("py")**2)**0.5).alias("p_magnitude")
        
        # Calculate the angular momentum for the direction
        , (pl.col('omega_dir')*pl.col('moment')).alias('L_dir')

        # Calculate the angular momentum of the upper body with respect to lower
        , (pl.col('omega_diff')*pl.col('moment_upper')).alias('L_diff')


    ]).with_columns([
        # Pre-calculate shifted values for linear and angular momenta
        pl.col("px").shift(1).over("PlayKey").alias("prev_px")
        , pl.col("py").shift(1).over("PlayKey").alias("prev_py")
        , pl.col("L_dir").shift(1).over("PlayKey").alias("prev_L_dir")
        , pl.col("L_diff").shift(1).over("PlayKey").alias("prev_L_diff")
        
    ]).with_columns([
        # Calculate impulse, J, which is the change in linear momentum 
        ((pl.col("px") - pl.col("prev_px"))).alias("Jx")
        , ((pl.col("py") - pl.col("prev_py"))).alias("Jy")
        
    ]).with_columns([
          # Calculate the magnitude of linear momentum
        ((pl.col("Jx")**2 + pl.col("Jy")**2)**0.5).alias("J_magnitude")

        # Calculate torque as the change in angular momentum L over the change in time
        , (((pl.col("L_dir") - pl.col("prev_L_dir"))) / 0.1).alias("torque")
        , (((pl.col("L_diff") - pl.col("prev_L_diff"))) / 0.1).alias("torque_internal")

    ]).drop([
        "prev_L_dir", "prev_px", "prev_py", "prev_L_diff"
    ])

    

In [10]:
quant = data_loader(dataset='tracking', database='nfl_surface')
quant = data_shrinker(quant)
quant = angle_corrector(quant)
quant = body_builder(quant, 'tracking')
quant = velocity_calculator(quant)
quant = impulse_calculator(quant)
summary = path_calculator(quant)

quant.head()

Memory usage of dataframe is 0.63 MB
Memory usage after optimization is: 0.40 MB
Decreased by 36.5%


PlayKey,time,x,y,dir,dis,o,s,Angle_Diff,position,Height_m,Weight_kg,Chest_rad_m,Displacement,Speed,Direction,vx,vy,omega_dir,omega_o,omega_diff,px,py,moment,moment_upper,p_magnitude,L_dir,L_diff,Jx,Jy,J_magnitude,torque,torque_internal
str,f32,f32,f32,f32,f32,f32,f32,f32,str,f64,f64,f64,f32,f32,f32,f32,f32,f32,f32,f32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""26624-1-1""",0.0,87.459999,28.93,-71.76001,0.01,-97.670013,0.13,25.91,"""QB""",1.91,102.1,0.191,,,,,,,,,,,0.310393,0.217275,,,,,,,,
"""26624-1-1""",0.1,87.449997,28.92,-76.089996,0.01,-98.309998,0.12,22.219999,"""QB""",1.91,102.1,0.191,0.014144,0.141438,-134.994522,-0.100021,-0.100002,-0.755726,-0.111697,0.644029,-10.212181,-10.210234,0.310393,0.217275,14.440828,-0.234572,0.139931,,,,,
"""26624-1-1""",0.2,87.440002,28.92,-79.600006,0.01,-98.829987,0.12,19.23,"""QB""",1.91,102.1,0.191,0.009995,0.099945,-90.0,-0.099945,0.0,-0.612612,-0.090756,0.521855,-10.204391,0.0,0.310393,0.217275,10.204391,-0.19015,0.113386,0.00779,10.210234,10.210237,0.444216,-0.265453
"""26624-1-1""",0.3,87.440002,28.92,-81.209991,0.01,-99.339996,0.1,18.129999,"""QB""",1.91,102.1,0.191,0.0,0.0,0.0,0.0,0.0,-0.280997,-0.089014,0.191983,0.0,0.0,0.310393,0.217275,0.0,-0.087219,0.041713,10.204391,0.0,10.204391,1.029309,-0.716729
"""26624-1-1""",0.4,87.440002,28.92,-84.559998,0.01,-99.730011,0.09,15.17,"""QB""",1.91,102.1,0.191,0.0,0.0,0.0,0.0,0.0,-0.584686,-0.068069,0.516617,0.0,0.0,0.310393,0.217275,0.0,-0.181482,0.112248,0.0,0.0,0.0,-0.942629,0.705348


In [7]:
summary.head()

PlayKey,Distance,Displacement,Path_Diff,Max_Angle_Diff,Mean_Angle_Diff,Max_Speed,Mean_Speed,Max_Impulse,Mean_Impulse,Max_Torque,Mean_Torque,Max_Int_Torque,Mean_Int_Torque
str,f32,f32,f32,f32,f32,f32,f32,f64,f64,f64,f64,f64,f64
"""26624-1-1""",16.944927,5.918718,11.026209,178.550003,81.749474,4.826006,0.568622,225.539622,13.937695,195.09577,0.000128,135.78963,0.000817
"""26624-1-10""",17.096617,0.586941,16.509676,179.649994,87.230263,4.472136,0.621695,237.477908,14.586659,200.46984,-0.001167,135.876893,0.005232
"""26624-1-11""",31.267174,6.434011,24.833164,179.240005,75.704842,3.488564,0.849651,102.100779,15.416403,196.574734,-4.4e-05,139.104002,0.007068
"""26624-1-12""",15.000895,0.837736,14.163158,179.800003,100.228218,4.601078,0.421373,245.254338,14.509133,195.209563,-0.001282,150.83316,0.002713
"""26624-1-13""",13.263436,1.133579,12.129857,175.880005,95.547943,4.925422,0.520135,235.715729,22.204451,179.55341,-0.012946,136.2599,-0.00109


In [8]:
track = data_loader(dataset='ngs_data', database='nfl_concussion')
track = data_shrinker(track)
track = column_corrector(track)
track = angle_corrector(track)
track = body_builder(track, 'ngs_data')
track = velocity_calculator(track)
track = impulse_calculator(track)
summary = path_calculator(track)
track.head()

Memory usage of dataframe is 0.42 MB
Memory usage after optimization is: 0.37 MB
Decreased by 11.6%


PlayKey,datetime,x,y,o,dir,gsisid,time,Angle_Diff,position,Height_m,Weight_kg,Chest_rad_m,Displacement,Speed,Direction,vx,vy,omega_dir,omega_o,omega_diff,px,py,moment,moment_upper,p_magnitude,L_dir,L_diff,Jx,Jy,J_magnitude,torque,torque_internal
str,datetime[ns],f32,f32,f32,f32,i32,f64,f32,str,f64,f64,f64,f32,f32,f32,f32,f32,f32,f32,f32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""22824-4-2795""",2016-08-13 02:13:02.800,45.029999,24.1,1.970001,61.830002,22824,0.0,59.860001,"""P""",1.88,97.52,0.191,,,,,,,,,,,0.296469,0.207528,,,,,,,,
"""22824-4-2795""",2016-08-13 02:13:02.800,45.029999,24.1,1.970001,61.830002,22824,0.0,59.860001,"""P""",1.88,97.52,0.191,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.296469,0.207528,0.0,0.0,0.0,,,,,
"""22824-4-2795""",2016-08-13 02:13:02.900,45.110001,24.120001,3.910004,61.709991,22824,0.1,57.799999,"""P""",1.88,97.52,0.191,0.082464,0.82464,75.963753,0.800018,0.200005,-0.020946,0.338594,0.359541,78.017786,19.504446,0.296469,0.207528,80.418893,-0.00621,0.074615,78.017786,19.504446,80.418893,-0.062099,0.746148
"""22824-4-2795""",2016-08-13 02:13:02.900,45.110001,24.120001,3.910004,61.709991,22824,0.1,57.799999,"""P""",1.88,97.52,0.191,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.296469,0.207528,0.0,0.0,0.0,-78.017786,-19.504446,80.418893,0.062099,-0.746148
"""22824-4-2795""",2016-08-13 02:13:03,45.18,24.129999,5.169998,62.440002,22824,0.2,57.27,"""P""",1.88,97.52,0.191,0.07071,0.707101,81.871208,0.699997,0.099983,0.127411,0.219911,0.0925,68.263702,9.750363,0.296469,0.207528,68.956527,0.037773,0.019196,68.263702,9.750363,68.956527,0.377734,0.191963


In [None]:
summary.head()

In [2]:
quals = data_loader('clean_quals', 'nfl_concussion')
quals.head()

Playkey,Position,Role,Game_Date,Yardline,Quarter,Play_Type,Poss_Team,Game_Site,Start_Time,Home_Team_Code,Visit_Team_Code,Stadium_Type,Field_Type,Weather,Temperature,Player_Activity_Derived,Primary_Impact_Type,Primary_Partner_Activity_Derived,Primary_Partner_Gsisid,Home_Score,Visiting_Score,Score_Difference
str,str,str,datetime[ns],str,i32,str,str,str,str,str,str,str,str,str,f64,str,str,str,str,i32,i32,i32
"""19714-119-1829""","""P""","""P""",2016-10-02 00:00:00,"""HST 44""",2,"""Punt""","""HST""","""Houston""","""12:00""","""HST""","""TEN""","""Indoor""","""Synthetic""","""Cloudy""",82.0,,,,,17,17,0
"""19714-119-3233""","""P""","""P""",2016-10-02 00:00:00,"""HST 29""",4,"""Punt""","""HST""","""Houston""","""12:00""","""HST""","""TEN""","""Indoor""","""Synthetic""","""Cloudy""",82.0,,,,,27,20,7
"""19714-119-4021""","""P""","""P""",2016-10-02 00:00:00,"""HST 34""",4,"""Punt""","""HST""","""Houston""","""12:00""","""HST""","""TEN""","""Indoor""","""Synthetic""","""Cloudy""",82.0,,,,,27,20,7
"""19714-119-963""","""P""","""P""",2016-10-02 00:00:00,"""TEN 40""",2,"""Punt""","""HST""","""Houston""","""12:00""","""HST""","""TEN""","""Indoor""","""Synthetic""","""Cloudy""",82.0,,,,,14,3,11
"""19714-135-1488""","""P""","""P""",2016-10-09 00:00:00,"""HST 22""",2,"""Punt""","""HST""","""Minneapolis""","""12:00""","""MIN""","""HST""","""Indoor""","""Synthetic""","""Indoor""",,,,,,23,0,23


In [None]:
# def velocity_calculator(df):
#     import numpy as np
#     import polars as pl
#     """
#     Using the (X,Y) and time columns, perform calculations based on the 
#     difference between two rows to find displacement, speed, direction 
#     of motion, velocity in x and y components, and the angular velocities 
#     of the direction of motion and orientations 
#     """
    
#     return df.with_columns([
#         # Convert 'o' and 'dir' to radians
#         (pl.col("o") * np.pi / 180).alias("o_rad"),
#         (pl.col("dir") * np.pi / 180).alias("dir_rad")
#     ]).with_columns([
#         # Pre-calculate shifted values
#         pl.col("x").shift(1).over("PlayKey").alias("prev_x")
#         , pl.col("y").shift(1).over("PlayKey").alias("prev_y")
#         # , pl.col("time").shift(1).over("PlayKey").alias("prev_time")
#         , pl.col("dir_rad").shift(1).over("PlayKey").alias("prev_dir")
#         , pl.col("o_rad").shift(1).over("PlayKey").alias("prev_o")
#     ]).with_columns([
#         # Calculate the component displacements 
#           (pl.col("x") - pl.col("prev_x")).alias("dx")
#         , (pl.col("y") - pl.col("prev_y")).alias("dy")
#     ]).with_columns([
#         # Calculate displacement
#         ((pl.col("dx")**2 + pl.col("dy")**2)**0.5).alias("Displacement")
#     ]).with_columns([
#         # Calculate speed
#         (pl.col("Displacement") / 0.1).alias("Speed")
#         # Calculate direction
#         , (np.degrees(np.arctan2(pl.col("dx"), pl.col("dy")))).alias("Direction")
#         # Calculate velocity components
#         , (pl.col("dx") / 0.1).alias("vx")
#         , (pl.col("dy") / 0.1).alias("vy")
#         # Calculate angular velocities
#         , ((pl.col("dir_rad") - pl.col("prev_dir")) / 0.1).alias("omega_dir")
#         , ((pl.col("o_rad") - pl.col("prev_o")) / 0.1).alias("omega_o")
#     ]).with_columns([
#         ((pl.col("omega_dir") - pl.col("omega_o")).abs()).alias("omega_diff")
#     ]).drop([
#         "prev_x", "prev_y", "prev_dir", "prev_o", "dx", "dy", "o_rad", "dir_rad"
#     ])

#     # return df

In [None]:
track = velocity_calculator(track)
track.head()

In [None]:
len(track)

## Include Momentum and Impulse 
It's not necessary to actually calculate the force, because we can assess these measurements for Impulse, J, as the change in either linear or angular momentum. 
I will need to include the weights of the players to get the momentum of each. 


Measures I would like to consider: Average chest circumference for NFL players seems to be between 46 and 49 inches - so I'll go with 1.2 M (120 cm)


In [None]:

# def impulse_calculator(df):
#     import numpy as np
#     import polars as pl
#     """
#     Using the (X,Y) and time columns, perform calculations based on the velocities and changes 
#     in velocites along with player mass to get the momentum and impulse, a measure that can 
#     be assessed along with medical data related to concussions and injuries
#     """
    
#     return df.with_columns([
#         # Calculate the linear momentum for each instant
#         (pl.col('vx') * pl.col('Weight_kg')).alias('px')
#         , (pl.col('vy') * pl.col('Weight_kg')).alias('py')

#         # Calculate the moment of inertia of a rotating upright body (1/12 mr^2)
#         , (1/12 * pl.col('Weight_kg') * (pl.col('Chest_rad_m')**2)).alias('moment')
        
#         # Calculate the moment of inertia of the upper body turning upright with respect to waist (70% mass)
#         , (1/12 * (pl.col('Weight_kg')*0.7) * (pl.col('Chest_rad_m')**2)).alias('moment_upper')
    
#     ]).with_columns([
#           # Calculate the magnitude of linear momentum
#         ((pl.col("px")**2 + pl.col("py")**2)**0.5).alias("p_magnitude")
        
#         # Calculate the angular momentum for the direction
#         , (pl.col('omega_dir')*pl.col('moment')).alias('L_dir')

#         # Calculate the angular momentum of the upper body with respect to lower
#         , (pl.col('omega_diff')*pl.col('moment_upper')).alias('L_diff')


#     ]).with_columns([
#         # Pre-calculate shifted values for linear and angular momenta
#         pl.col("px").shift(1).over("PlayKey").alias("prev_px")
#         , pl.col("py").shift(1).over("PlayKey").alias("prev_py")
#         , pl.col("L_dir").shift(1).over("PlayKey").alias("prev_L_dir")
#         , pl.col("L_diff").shift(1).over("PlayKey").alias("prev_L_diff")
        
#     ]).with_columns([
#         # Calculate impulse, J, which is the change in linear momentum 
#         ((pl.col("px") - pl.col("prev_px"))).alias("Jx")
#         , ((pl.col("py") - pl.col("prev_py"))).alias("Jy")
        
#     ]).with_columns([
#           # Calculate the magnitude of linear momentum
#         ((pl.col("Jx")**2 + pl.col("Jy")**2)**0.5).alias("J_magnitude")

#         # Calculate torque as the change in angular momentum L over the change in time
#         , (((pl.col("L_dir") - pl.col("prev_L_dir"))) / 0.1).alias("torque")
#         , (((pl.col("L_diff") - pl.col("prev_L_diff"))) / 0.1).alias("torque_internal")

#     ]).drop([
#         "prev_L_dir", "prev_px", "prev_py", "prev_L_diff"
#     ])

#     # return df

In [None]:
tracks = impulse_calculator(track)
tracks.head()

In [None]:
len(track)

In [None]:
# def path_calculator(df):
#     import polars as pl
#     # This provides a summary table that can be integrated with the qualitative data

#     # Calculate total distance and displacement for each PlayKey
#     # Calculate total distance and displacement for each PlayKey
#     result = df.select([
#         "PlayKey"
#         , pl.col("Displacement").sum().over("PlayKey").alias("Distance")
#         , pl.col("x").first().over("PlayKey").alias("start_x")
#         , pl.col("y").first().over("PlayKey").alias("start_y")
#         , pl.col("x").last().over("PlayKey").alias("end_x")
#         , pl.col("y").last().over("PlayKey").alias("end_y")
#         , pl.col("Angle_Diff").max().over("PlayKey").alias("Max_Angle_Diff")
#         , pl.col("Angle_Diff").mean().over("PlayKey").alias("Mean_Angle_Diff")
#         , pl.col("Speed").max().over("PlayKey").alias("Max_Speed")
#         , pl.col("Speed").mean().over("PlayKey").alias("Mean_Speed")
#         , pl.col("J_magnitude").max().over("PlayKey").alias("Max_Impulse")
#         , pl.col("J_magnitude").mean().over("PlayKey").alias("Mean_Impulse")
#         , pl.col("torque").max().over("PlayKey").alias("Max_Torque")
#         , pl.col("torque").mean().over("PlayKey").alias("Mean_Torque")
#         , pl.col("torque_internal").max().over("PlayKey").alias("Max_Int_Torque")
#         , pl.col("torque_internal").mean().over("PlayKey").alias("Mean_Int_Torque")

#         ]).unique(subset=["PlayKey"])


#     # Calculate the displacement
#     result = result.with_columns([
#         (((pl.col("end_x") - pl.col("start_x"))**2 + 
#           (pl.col("end_y") - pl.col("start_y"))**2)**0.5)
#         .alias("Displacement")
#         ]).with_columns([
#             (pl.col("Distance") - pl.col("Displacement")).alias("Path_Diff")
#         ])

     
#     # Select only the required columns
#     result = result.select([
#         'PlayKey'
#         , 'Distance'
#         , 'Displacement'
#         , 'Path_Diff'
#         , 'Max_Angle_Diff'
#         , 'Mean_Angle_Diff'
#         , 'Max_Speed'
#         , 'Mean_Speed'
#         , 'Max_Impulse'
#         , 'Mean_Impulse'
#         , 'Max_Torque'
#         , 'Mean_Torque'
#         , 'Max_Int_Torque'
#         , 'Mean_Int_Torque'
      
#     ]).sort("PlayKey")


#     return result

In [None]:
summary = path_calculator(tracks)
summary.head()

In [None]:
len(summary)

## Setting up the full function path

In [None]:
track = data_loader(dataset='ngs_data', database='nfl_concussion')
track = data_shrinker(track)
track = column_corrector(track)
track = angle_corrector(track)
track = weight_gainer(track)
track = velocity_calculator(track)
track = impulse_calculator(track)
summary = path_calculator(track)


In [None]:
track.head()

In [None]:
summary.head()

In [None]:
len(track)

In [None]:
len(summary)