### ETL for both Concussion and Surface Data

In [1]:
import polars as pl
import sqlalchemy as db
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
import psycopg2

from CleaningFunctions import *
from TransformFunctions import *
from DataHandler import *

This is the set of functions from the concussion sets. Most of these overlap with the surface injury sets.

In [2]:
clean_injuries()

Play and Injury Data has been cleaned and uploaded as qualitative


In [2]:
clean_concussions()

Concussion Data has been cleaned and uploaded as clean_data


In [2]:
transform_injury_data('summary')

Memory usage of dataframe is 0.63 MB
Memory usage after optimization is: 0.40 MB
Decreased by 36.5%
Memory usage of dataframe is 36.66 MB
Memory usage after optimization is: 23.92 MB
Decreased by 34.7%
Writing all quantitative and qualitative summary data to the database as summary_data. Wait.
Data has been uploaded to the database. Probably.


In [3]:
transform_concussion_data('summary')

Memory usage of dataframe is 0.42 MB
Memory usage after optimization is: 0.37 MB
Decreased by 11.6%
Memory usage of dataframe is 25.36 MB
Memory usage after optimization is: 20.68 MB
Decreased by 18.4%
Writing all quantitative and qualitative summary data to the database as summary_data. Wait.
Data has been uploaded to the database. Probably.


In [2]:
transform_injury_data('tracking')

Memory usage of dataframe is 0.63 MB
Memory usage after optimization is: 0.40 MB
Decreased by 36.5%
Writing the transformed table with physical parameters to the database as quantitative
Data has been uploaded to the database. Go celebrate!


In [3]:
transform_concussion_data('tracking')

Memory usage of dataframe is 0.42 MB
Memory usage after optimization is: 0.37 MB
Decreased by 11.6%
Writing the transformed table with physical parameters to the database as quantitative
Data has been uploaded to the database. Good for you!


In [4]:
# # Cleaning Functions
# # This will set up the code for all cleaning functions. Currently, this cleans only the injury data for surface injuries. 
# # TO RUN: 
#     # from CleaningFunctions import *
#     # clean_injuries()


# ##### Primary Cleaning Functions #####
# def clean_injuries():
#     from DataHandler import data_writer
#     database = "nfl_surface"
#     quals = table_joiner() 
#     quals = column_capitalizer(quals, df_name='quals')
#     quals = stadium_cleaner(quals, df_name='quals')
#     quals = weather_cleaner(quals)
#     quals = injury_cleaner(quals)
#     data_writer(quals, database, "qualitative")
#     print("Play and Injury Data has been cleaned and uploaded as qualitative")
#     del quals


# def clean_concussions():
#     from DataHandler import data_loader, data_writer
#     import polars as pl
#     database = 'nfl_concussion'

#     df = data_loader(database='nfl_concussion', dataset='concussion')
#     df = column_capitalizer(df, 'concussion')
#     df = stadium_cleaner(df, 'concussion')
#     df = weather_cleaner(df)
#     df = turf_cleaner(df)
#     df = df.filter(pl.col("Game_Date").is_not_null())
#     df = score_splitter(df)
#     data_writer(df, database, "clean_data")
#     print("Concussion Data has been cleaned and uploaded as clean_data")
#     # return df



# ######################################################################################
# # Extracts and joins the necessary columns from the Injuries and Plays tables
# def table_joiner():
#     import polars as pl
#     from DataHandler import data_loader

#     plays = data_loader('plays', 'nfl_surface')
#     injuries = data_loader('injuries', 'nfl_surface')

#     quals = (
#         plays.join(injuries, on="playkey", how='left')
#         .select([
#             pl.col("playkey")
#             , pl.col("rosterposition")
#             , pl.col("stadiumtype")
#             , pl.col("fieldtype")
#             , pl.col("temperature")
#             , pl.col("weather")
#             , pl.col("playtype")
#             , pl.col("bodypart")
#             , pl.col("dm_m1")
#             , pl.col("dm_m7")
#             , pl.col("dm_m28")
#             , pl.col("dm_m42")

#         ])
#     )
#     return quals


# # Changes the all lower-case to Capitalized PascalCase column headers 
# def column_capitalizer(df, df_name):          
#     if df_name == 'quals':
#         columns = {
#         'playkey': "PlayKey"
#         , 'position': 'Position'
#         , 'stadiumtype': 'Stadium_Type'
#         , 'fieldtype': 'Field_Type'
#         , 'temperature': 'Temperature'
#         , 'weather': 'Weather'
#         , 'playtype': 'Play_Type'
#         , 'bodypart': 'Body_Part'
#         , 'dm_m1': 'DM_1'
#         , 'dm_m7': 'DM_7'
#         , 'dm_m28': 'DM_28'
#         , 'dm_m42': 'DM_42'
#         }

#     elif df_name == 'concussion':
#         columns = {
#         'playkey': 'Playkey'
#         , 'position': 'Position'
#         , 'role': 'Role'
#         , 'game_date': 'Game_Date'
#         , 'yardline': 'Yardline'
#         , 'quarter': 'Quarter'
#         , 'play_type': 'Play_Type'
#         , 'poss_team': 'Poss_Team'
#         , 'score_home_visiting': 'Score_Home_Visiting'
#         , 'game_site': 'Game_Site'
#         , 'start_time': 'Start_Time'
#         , 'hometeamcode': 'Home_Team_Code'
#         , 'visitteamcode': 'Visit_Team_Code'
#         , 'stadiumtype': 'Stadium_Type'
#         , 'turf': 'Field_Type'
#         , 'gameweather': 'Weather'
#         , 'temperature': 'Temperature'
#         , 'player_activity_derived': 'Player_Activity_Derived'
#         , 'primary_impact_type': 'Primary_Impact_Type'
#         , 'primary_partner_activity_derived': 'Primary_Partner_Activity_Derived'
#         , 'primary_partner_gsisid': 'Primary_Partner_Gsisid'
#         }


#     df = df.rename(columns)
#     return df

# # This changes stadiums to either Indoor or Outdoor per game records - some of the dome stadiums have a roof that can open, if open the game is considered outdoor.
# def stadium_cleaner(df, df_name):
#     import polars as pl 

#     if df_name == 'quals':       
#         stadium_dict = {
#             'Outdoor': 'Outdoor'
#             , 'Indoors': 'Indoor'
#             , 'Oudoor': 'Outdoor'
#             , 'Outdoors': 'Outdoor'
#             , 'Open': 'Outdoor'
#             , 'Closed Dome': 'Indoor'
#             , 'Domed, closed': 'Indoor'
#             , 'Dome': 'Indoor'
#             , 'Indoor': 'Indoor'
#             , 'Domed': 'Indoor'
#             , 'Retr. Roof-Closed': 'Indoor'
#             , 'Outdoor Retr Roof-Open': 'Outdoor'
#             , 'Retractable Roof': 'Indoor'
#             , 'Ourdoor': 'Outdoor'
#             , 'Indoor, Roof Closed': 'Indoor'
#             , 'Retr. Roof - Closed': 'Indoor'
#             , 'Bowl': 'Outdoor'
#             , 'Outddors': 'Outdoor'
#             , 'Retr. Roof-Open': 'Outdoor'
#             , 'Dome, closed': 'Indoor'
#             , 'Indoor, Open Roof': 'Outdoor'
#             , 'Domed, Open': 'Outdoor'
#             , 'Domed, open': 'Outdoor'
#             , 'Heinz Field': 'Outdoor'
#             , 'Cloudy': 'Outdoor'
#             , 'Retr. Roof - Open': 'Outdoor'
#             , 'Retr. Roof Closed': 'Indoor'
#             , 'Outdor': 'Outdoor'
#             , 'Outside': 'Outdoor'
#         }


#         df = df.with_columns(pl.col("Stadium_Type").fill_null("Outdoor")) # Since most stadiums are outdoor and the percentage of games played indoor is already met by the known indoor games those seasons, all unknown games were set to outdoor


#     elif df_name == 'concussion':
#         stadium_dict = {
#             'Outdoor': 'Outdoor'
#             , 'outdoor': 'Outdoor'
#             , 'Indoors': 'Indoor'
#             , 'Indoors (Domed)': 'Indoor'
#             , 'Oudoor': 'Outdoor'
#             , 'Outdoors': 'Outdoor'
#             , 'Outdoors ': 'Outdoor'
#             , 'Open': 'Outdoor'
#             , 'Closed Dome': 'Indoor'
#             , 'Domed, closed': 'Indoor'
#             , 'Dome': 'Indoor'
#             , 'Indoor': 'Indoor'
#             , 'Domed': 'Indoor'
#             , 'Retr. Roof-Closed': 'Indoor'
#             , 'Outdoor Retr Roof-Open': 'Outdoor'
#             , 'Retractable Roof': 'Indoor'
#             , 'Ourdoor': 'Outdoor'
#             , 'Indoor, Roof Closed': 'Indoor'
#             , 'Retr. Roof - Closed': 'Indoor'
#             , 'Bowl': 'Outdoor'
#             , 'Outddors': 'Outdoor'
#             , 'Retr. Roof-Open': 'Outdoor'
#             , 'Dome, closed': 'Indoor'
#             , 'Indoor, Open Roof': 'Outdoor'
#             , 'Domed, Open': 'Outdoor'
#             , 'Domed, open': 'Outdoor'
#             , 'Heinz Field': 'Outdoor'
#             , 'Cloudy': 'Outdoor'
#             , 'Retr. Roof - Open': 'Outdoor'
#             , 'Retr. Roof Closed': 'Indoor'
#             , 'Outdor': 'Outdoor'
#             , 'Outside': 'Outdoor'
#             , 'Indoor, non-retractable roof': 'Indoor'
#             , 'Retr. roof - closed': 'Indoor'
#             , 'Indoor, fixed roof ': 'Indoor'
#             , 'Indoor, Non-Retractable Dome': 'Indoor'
#             , 'Indoor, Fixed Roof': 'Indoor'
#             , 'Indoor, fixed roof': 'Indoor'
#             , None: 'Outdoor'  # It was verified that all fields with null values are indeed outdoor
#         }


#     df = df.with_columns(pl.col("Stadium_Type").replace(stadium_dict)) # This uses the dict to assign naming conventions

#     return df

# # Cleans up the weather data from having a lot of different but similar to a few categories
# def weather_cleaner(df):
#      import polars as pl
     
#      weather_dict = {
#             'Clear and warm': 'Clear'
#             , 'Mostly Cloudy': 'Cloudy'
#             , 'Sunny': 'Clear'
#             , 'Clear': 'Clear'
#             , 'Cloudy': 'Cloudy'
#             , 'Cloudy, fog started developing in 2nd quarter': 'Hazy/Fog'
#             , 'Rain': 'Rain'
#             , 'Partly Cloudy': 'Cloudy'
#             , 'Mostly cloudy': 'Cloudy'
#             , 'Cloudy and cold': 'Cloudy'
#             , 'Cloudy and Cool': 'Cloudy'
#             , 'Rain Chance 40%': 'Rain'
#             , 'Controlled Climate': 'Indoor'
#             , 'Sunny and warm': 'Clear'
#             , 'Partly cloudy': 'Cloudy'
#             , 'Clear and Cool': 'Cloudy'
#             , 'Clear and cold': 'Cloudy'
#             , 'Sunny and cold': 'Clear'
#             , 'Indoor': 'Indoor'
#             , 'Partly Sunny': 'Clear'
#             , 'N/A (Indoors)': 'Indoor'
#             , 'Mostly Sunny': 'Clear'
#             , 'Indoors': 'Indoor'
#             , 'Clear Skies': 'Clear'
#             , 'Partly sunny': 'Clear'
#             , 'Showers': 'Rain'
#             , 'N/A Indoor': 'Indoor'
#             , 'Sunny and clear': 'Clear'
#             , 'Snow': 'Snow'
#             , 'Scattered Showers': 'Rain'
#             , 'Party Cloudy': 'Cloudy'
#             , 'Clear skies': 'Clear'
#             , 'Rain likely, temps in low 40s.': 'Rain'
#             , 'Hazy': 'Hazy/Fog'
#             , 'Partly Clouidy': 'Cloudy'
#             , 'Sunny Skies': 'Clear'
#             , 'Overcast': 'Cloudy'
#             , 'Cloudy, 50% change of rain': 'Cloudy'
#             , 'Fair': 'Clear'
#             , 'Light Rain': 'Rain'
#             , 'Partly clear': 'Clear'
#             , 'Mostly Coudy': 'Cloudy'
#             , '10% Chance of Rain': 'Cloudy'
#             , 'Cloudy, chance of rain': 'Cloudy'
#             , 'Heat Index 95': 'Clear'
#             , 'Sunny, highs to upper 80s': 'Clear'
#             , 'Sun & clouds': 'Cloudy'
#             , 'Heavy lake effect snow': 'Snow'
#             , 'Mostly sunny': 'Clear'
#             , 'Cloudy, Rain': 'Rain'
#             , 'Sunny, Windy': 'Windy'
#             , 'Mostly Sunny Skies': 'Clear'
#             , 'Rainy': 'Rain'
#             , '30% Chance of Rain': 'Rain'
#             , 'Cloudy, light snow accumulating 1-3"': 'Snow'
#             , 'cloudy': 'Cloudy'
#             , 'Clear and Sunny': 'Clear'
#             , 'Coudy': 'Cloudy'
#             , 'Clear and sunny': 'Clear'
#             , 'Clear to Partly Cloudy': 'Clear'
#             , 'Cloudy with periods of rain, thunder possible. Winds shifting to WNW, 10-20 mph.': 'Windy'
#             , 'Rain shower': 'Rain'
#             , 'Cold': 'Clear'
#             , 'Partly cloudy, lows to upper 50s.': 'Cloudy'
#             , 'Scattered thunderstorms': 'Rain'
#             , 'CLEAR': 'Clear'
#             , 'Partly CLoudy': 'Cloudy'
#             , 'Chance of Showers': 'Rain'
#             , 'Snow showers': 'Snow'
#             , 'Clear and Cold': 'Clear'
#             , 'Cloudy with rain': 'Rain'
#             , 'Sunny intervals': 'Clear'
#             , 'Clear and cool': 'Clear'
#             , 'Cloudy, Humid, Chance of Rain': 'Rain'
#             , 'Cloudy and Cold': 'Cloudy'
#             , 'Cloudy with patches of fog': 'Hazy/Fog'
#             , 'Controlled': 'Indoor'
#             , 'Sunny and Clear': 'Clear'
#             , 'Cloudy with Possible Stray Showers/Thundershowers': 'Rain'
#             , 'Suny': 'Clear'
#             , 'T-Storms': 'Rain'
#             , 'Sunny and cool': 'Clear'
#             , 'Cloudy, steady temps': 'Cloudy'
#             , 'Hazy, hot and humid': 'Hazy/Fog'
#             , 'Sunny Intervals': 'Clear'
#             , 'Partly Cloudy, Chance of Rain 80%': 'Rain'
#             , 'Mostly Clear. Gusting ot 14.': 'Windy'
#             , 'Mostly CLoudy': 'Cloudy'
#             , 'Snow Showers, 3 to 5 inches expected.': 'Snow'
#             }


#      df = df.with_columns(pl.col("Weather").replace(weather_dict)) # Standardizes the weather to a few main types

#      df = df.with_columns(             # Null handling - all null weather conditions for indoor stadiums are filled "indoor"
#                 pl.when(pl.col("Stadium_Type") == "Indoor")
#                 .then(pl.col("Weather").fill_null("Indoor"))
#                 .otherwise(pl.col("Weather"))
#                 .alias("Weather")
#                 )
     
#      # For the non-indoor games with null values for weather, to maintain the percentage of games that were clear/cloudy, temperature was used as a divider, above and below 70 degrees
#      df = df.with_columns(
#                 pl.when(pl.col("Temperature") > 70)
#                 .then(pl.col("Weather").fill_null("Clear"))
#                 .otherwise(pl.col("Weather"))
#                 .alias("Weather")
#                 )
#      df = df.with_columns(pl.col("Weather").fill_null("Cloudy"))

#      return df


# # This fixes the issues with introduced nulls following the joins 
# def injury_cleaner(quals):
#     import polars as pl
#     quals = quals.filter(pl.col('Play_Type').is_not_null()) # 0.14% of rows did not have a play type, and ALL of these were non-injury plays, so they were removed

#     quals = quals.with_columns(pl.col("Body_Part").fill_null("No_Injury")) # This fills all null from the join with No Injury

#     quals = quals.with_columns(
#     pl.col(["DM_1", "DM_7", "DM_28", "DM_42"]).fill_null(0)) # This fills the nulls from the Join with 0s, since there were no injuries.

#     return quals


# # This will standardize the types of Turf for the FieldType to either natural or synthetic

# def turf_cleaner(df):
#     ''' 
#     Changes the many different types of turf listed into either natural or synthetic
#     '''
#     import polars as pl

#     turf_dict = {
#         'Grass': 'Natural'
#         , 'Field Turf': 'Synthetic'
#         , 'Natural Grass': 'Natural'
#         , 'grass': 'Natural'
#         , 'Artificial': 'Synthetic'
#         , 'FieldTurf': 'Synthetic'
#         , 'DD GrassMaster': 'Synthetic'
#         , 'A-Turf Titan': 'Synthetic'
#         , 'UBU Sports Speed S5-M': 'Synthetic'
#         , 'UBU Speed Series S5-M': 'Synthetic'
#         , 'Artifical': 'Synthetic'
#         , 'UBU Speed Series-S5-M': 'Synthetic'
#         , 'FieldTurf 360': 'Synthetic'
#         , 'Natural grass': 'Natural'
#         , 'Field turf': 'Synthetic'
#         , 'Natural': 'Natural'
#         , 'Natrual Grass': 'Natural'
#         , 'Synthetic': 'Synthetic'
#         , 'Natural Grass ': 'Natural'
#         , 'Naturall Grass': 'Natural'
#         , 'FieldTurf360': 'Synthetic'
#         , None: 'Natural'  # The only field with null values is Miami Gardens, which has Natural
#         }

    
#     df = df.with_columns(pl.col("Field_Type").replace(turf_dict))
#     return df

# def score_splitter(df):
#     ''' 
#     Splits the string column from Score_Home_Visiting into two numeric columns for each of the scores. It also creates a column that calculates the difference. 
#     '''
#     import polars as pl

#     df = df.with_columns([
#         pl.col("Score_Home_Visiting").str.extract(r"(\d+)\s*-\s*(\d+)", 1).cast(pl.Int16).alias("Home_Score")
#         , pl.col("Score_Home_Visiting").str.extract(r"(\d+)\s*-\s*(\d+)", 2).cast(pl.Int16).alias("Visiting_Score") # Find difference between scores
#         ])

#     df = df.with_columns([
#         (pl.col("Home_Score") - pl.col("Visiting_Score")).cast(pl.Int16).alias("Score_Difference")
#         ])
    
#     df = df.drop("Score_Home_Visiting")
    
#     return df

In [5]:
# def transform_injury_data(output):
#     from DataHandler import data_loader, data_shrinker, data_writer

#     valid_outputs = ['tracking', 'summary']
#     if output not in valid_outputs:
#         raise ValueError(f"Invalid ouptut selection: '{output}'. Valid options are: '{valid_outputs}'")

#     try: 
#         # Transform the tracking data
#         quant = data_loader(dataset='tracking', database='nfl_surface')
#         quant = data_shrinker(quant)
#         quant = angle_corrector(quant)
#         quant = body_builder(quant, 'tracking')
#         quant = velocity_calculator(quant)
#         quant = impulse_calculator(quant)
    
#         if output == 'summary':
#             summary = path_calculator(quant)
#             del quant # remove the large table from memory
#             # Open and merge the qualitative data
#             quals = data_loader('qualitative', 'nfl_surface')
#             qual_quant = qual_quant_merger(quals, summary)
            
#             print("Writing all quantitative and qualitative summary data to the database as summary_data. Wait.")
#             data_writer(qual_quant, 'nfl_surface', 'summary_data')
#             print("Data has been uploaded to the database. Probably.")        

#         elif output == 'tracking':
#             # upload the physical data to the database for machine learning
#             print("Writing the transformed table with physical parameters to the database as quantitative")
#             data_writer(quant, 'nfl_surface', 'quantitative')
#             print("Data has been uploaded to the database. Go celebrate!")
        
#     except Exception as e:
#         print(f"An error occurred with your selection, '{output}': {e}")
#         return None


       
# def transform_concussion_data(output):
#     from DataHandler import data_loader, data_shrinker, data_writer

#     valid_outputs = ['tracking', 'summary']
#     if output not in valid_outputs:
#         raise ValueError(f"Invalid ouptut selection: '{output}'. Valid options are: '{valid_outputs}'")

#     try: 
#         track = data_loader(dataset='ngs_data', database='nfl_concussion')
#         track = data_shrinker(track)
#         track = column_corrector(track)
#         track = angle_corrector(track)
#         track = body_builder(track, 'ngs_data')
#         track = velocity_calculator(track)
#         track = impulse_calculator(track)
    
#         if output == 'summary':
#             summary =  path_calculator(track)
#             del track # remove the large table from memory
#             # Open and merge the qualitative data
#             quals = data_loader('qualitative', 'nfl_concussion')
#             qual_quant = qual_quant_merger(quals, summary)
            
#             print("Writing all quantitative and qualitative summary data to the database as summary_data. Wait.")
#             data_writer(qual_quant, 'nfl_concussion', 'summary_data')
#             print("Data has been uploaded to the database. Probably.")        

#         elif output == 'tracking':
#             # upload the physical data to the database for machine learning
#             print("Writing the transformed table with physical parameters to the database as quantitative")
#             data_writer(track, 'nfl_surface', 'quantitative')
#             print("Data has been uploaded to the database. Good for you!")
        
#     except Exception as e:
#         print(f"An error occurred with your selection, '{output}': {e}")
#         return None



# def transform_concussion_data():
#     from DataHandler import data_loader, data_shrinker, data_writer

#     track = data_loader(dataset='ngs_data', database='nfl_concussion')
#     track = data_shrinker(track)
#     track = column_corrector(track)
#     track = angle_corrector(track)
#     track = body_builder(track, 'ngs_data')
#     track = velocity_calculator(track)
#     track = impulse_calculator(track)
#     summary = path_calculator(track)
#     # transform the ngs_data







# #############################################
# def column_corrector(df):
#     import polars as pl
#     """
#     Add a Play_Time column that acts like the 'time' column did in the injury dataset. 
#     Each PlayKey will start at 0.0 and increase by 0.1 for each subsequent record.
#     """
#     df = df.with_columns([
#         pl.concat_str([
#             pl.col('gsisid').cast(pl.Int32).cast(pl.Utf8)
#             , pl.lit('-')
#             , pl.col('gamekey').cast(pl.Utf8)
#             , pl.lit('-')
#             , pl.col('playid').cast(pl.Utf8)
#         ]).alias('PlayKey')
#     ])
     
    
#     df = df.select([
#         'PlayKey'
#         , 'time'
#         , 'x'
#         , 'y'
#         , 'o'
#         , 'dir'
#         , 'gsisid'
#         ]).rename({"time":"datetime"})

#     df = df.sort(['PlayKey', 'datetime'])

#     df = df.with_columns(
#         (pl.arange(0, pl.len()) * 0.1).over("PlayKey").alias("time")
#         ).with_columns([pl.col('gsisid').cast(pl.Int32)])  
    
#     return df



# def calculate_angle_difference(angle1, angle2):
#     import numpy as np
#     """
#     Calculate the smallest angle difference between two angles 
#     using trigonometric functions, accounting for edge cases.
#     """
#     sin_diff = np.sin(np.radians(angle2 - angle1))
#     cos_diff = np.cos(np.radians(angle2 - angle1))
#     return np.degrees(np.arctan2(sin_diff, cos_diff))

# def angle_corrector(df):
#     import polars as pl
#     """
#     Make corrections to angles to reduce fringe errors at 360
#     """
#     df = df.with_columns([
#         ((pl.col("dir") + 180) % 360 - 180).alias("dir")
#         , ((pl.col("o") + 180) % 360 - 180).alias("o")
#     ]).with_columns(
#         (calculate_angle_difference(pl.col("dir"), pl.col("o"))).abs().round(2).alias("Angle_Diff")
#         )
    
#     return df


# def body_builder(df, df_name):
#     body_data = pl.DataFrame({
#         "position": ["QB", "RB", "FB", "WR", "TE", "T", "G", "C", "DE", "DT", "NT", "LB", "OLB", "MLB", "CB", "S", "K", "P", "SS", "ILB", "FS", "LS", "DB"]
#         # , "Position_Name": ["Quarterback", "Running Back", "Fullback", "Wide Receiver", "Tight End", "Tackle", "Guard", "Center", "Defensive End", "Defensive Tackle", "Nose Tackle", "Linebacker", "Outside Linebacker", "Middle Linebacker", "Cornerback", "Safety", "Kicker", "Punter", "Strong Safety", "Inside Linebacker", "Free Safety", "Long Snapper", "Defensive Back"]
#         , "Height_m": [1.91, 1.79, 1.85, 1.88, 1.96, 1.97, 1.90, 1.87, 1.97, 1.92, 1.88, 1.90, 1.90, 1.87, 1.82, 1.84, 1.83, 1.88, 1.84, 1.90, 1.84, 1.88, 1.82]
#         , "Weight_kg": [102.1, 95.3, 111.1, 90.7, 114.6, 140.6, 141.8, 136.1, 120.2, 141.8, 152.0, 110.0, 108.9, 113.4, 87.4, 95.9, 92.08, 97.52, 95.9, 110.0, 95.9, 108.86, 87.4]
#         , "Chest_rad_m": [0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191, 0.191]
#         })

#     valid_df_names = ['ngs_data', 'tracking']
#     if df_name not in valid_df_names:
#         raise ValueError(f"Invalid dataframe name '{df_name}'. Valid options are: {valid_df_names}")

#     try: 
#         if df_name == 'ngs_data':
#             position = data_loader(dataset='positions', database='nfl_concussion')
#             position = position.join(
#                 body_data
#                 , left_on='position'
#                 , right_on='position'
#                 , how='left'
#                 )
            
#             df = df.join(
#                 position
#                 , on='gsisid'
#                 , how='left'
#                 ).drop_nulls(subset=['position'])
            

#         elif df_name == 'tracking':
#             position = data_loader(dataset='play_positions', database='nfl_surface')
#             position = position.join(
#                 body_data
#                 , left_on='position'
#                 , right_on='position'
#                 , how='left'
#                 )

#             df = df.join(
#                 position
#                 , left_on='PlayKey'
#                 , right_on='playkey'
#                 , how='left'
#             ).drop_nulls(subset=['position']).drop(['event'])

            

#         return df    
    
#     except Exception as e: 
#         print(f"An error occurred while loading the dataframe '{df_name}': {e}")
#         return None




# def velocity_calculator(df):
#     import numpy as np
#     import polars as pl
#     """
#     Using the (X,Y) and time columns, perform calculations based on the 
#     difference between two rows to find displacement, speed, direction 
#     of motion, velocity in x and y components, and the angular velocities 
#     of the direction of motion and orientations 
#     """
    
#     return df.with_columns([
#         # Convert 'o' and 'dir' to radians
#         (pl.col("o") * np.pi / 180).alias("o_rad"),
#         (pl.col("dir") * np.pi / 180).alias("dir_rad")
#     ]).with_columns([
#         # Pre-calculate shifted values
#         pl.col("x").shift(1).over("PlayKey").alias("prev_x")
#         , pl.col("y").shift(1).over("PlayKey").alias("prev_y")
#         # , pl.col("time").shift(1).over("PlayKey").alias("prev_time")
#         , pl.col("dir_rad").shift(1).over("PlayKey").alias("prev_dir")
#         , pl.col("o_rad").shift(1).over("PlayKey").alias("prev_o")
#     ]).with_columns([
#         # Calculate the component displacements 
#           (pl.col("x") - pl.col("prev_x")).alias("dx")
#         , (pl.col("y") - pl.col("prev_y")).alias("dy")
#     ]).with_columns([
#         # Calculate displacement
#         ((pl.col("dx")**2 + pl.col("dy")**2)**0.5).alias("Displacement")
#     ]).with_columns([
#         # Calculate speed
#         (pl.col("Displacement") / 0.1).alias("Speed")
#         # Calculate direction
#         , (np.degrees(np.arctan2(pl.col("dx"), pl.col("dy")))).alias("Direction")
#         # Calculate velocity components
#         , (pl.col("dx") / 0.1).alias("vx")
#         , (pl.col("dy") / 0.1).alias("vy")
#         # Calculate angular velocities
#         , ((pl.col("dir_rad") - pl.col("prev_dir")) / 0.1).alias("omega_dir")
#         , ((pl.col("o_rad") - pl.col("prev_o")) / 0.1).alias("omega_o")
#     ]).with_columns([
#         ((pl.col("omega_dir") - pl.col("omega_o")).abs()).alias("omega_diff")
#     ]).drop([
#         "prev_x", "prev_y", "prev_dir", "prev_o", "dx", "dy", "o_rad", "dir_rad"
#     ])


# def impulse_calculator(df):
#     import numpy as np
#     import polars as pl
#     """
#     Using the (X,Y) and time columns, perform calculations based on the velocities and changes 
#     in velocites along with player mass to get the momentum and impulse, a measure that can 
#     be assessed along with medical data related to concussions and injuries
#     """
    
#     return df.with_columns([
#         # Calculate the linear momentum for each instant
#         (pl.col('vx') * pl.col('Weight_kg')).alias('px')
#         , (pl.col('vy') * pl.col('Weight_kg')).alias('py')

#         # Calculate the moment of inertia of a rotating upright body (1/12 mr^2)
#         , (1/12 * pl.col('Weight_kg') * (pl.col('Chest_rad_m')**2)).alias('moment')
        
#         # Calculate the moment of inertia of the upper body turning upright with respect to waist (70% mass)
#         , (1/12 * (pl.col('Weight_kg')*0.7) * (pl.col('Chest_rad_m')**2)).alias('moment_upper')
    
#     ]).with_columns([
#           # Calculate the magnitude of linear momentum
#         ((pl.col("px")**2 + pl.col("py")**2)**0.5).alias("p_magnitude")
        
#         # Calculate the angular momentum for the direction
#         , (pl.col('omega_dir')*pl.col('moment')).alias('L_dir')

#         # Calculate the angular momentum of the upper body with respect to lower
#         , (pl.col('omega_diff')*pl.col('moment_upper')).alias('L_diff')


#     ]).with_columns([
#         # Pre-calculate shifted values for linear and angular momenta
#         pl.col("px").shift(1).over("PlayKey").alias("prev_px")
#         , pl.col("py").shift(1).over("PlayKey").alias("prev_py")
#         , pl.col("L_dir").shift(1).over("PlayKey").alias("prev_L_dir")
#         , pl.col("L_diff").shift(1).over("PlayKey").alias("prev_L_diff")
        
#     ]).with_columns([
#         # Calculate impulse, J, which is the change in linear momentum 
#         ((pl.col("px") - pl.col("prev_px"))).alias("Jx")
#         , ((pl.col("py") - pl.col("prev_py"))).alias("Jy")
        
#     ]).with_columns([
#           # Calculate the magnitude of linear momentum
#         ((pl.col("Jx")**2 + pl.col("Jy")**2)**0.5).alias("J_magnitude")

#         # Calculate torque as the change in angular momentum L over the change in time
#         , (((pl.col("L_dir") - pl.col("prev_L_dir"))) / 0.1).alias("torque")
#         , (((pl.col("L_diff") - pl.col("prev_L_diff"))) / 0.1).alias("torque_internal")

#     ]).drop([
#         "prev_L_dir", "prev_px", "prev_py", "prev_L_diff"
#     ])


# def path_calculator(df):
#     import polars as pl
#     # This provides a summary table that can be integrated with the qualitative data

#     # Calculate total distance and displacement for each PlayKey
#     # Calculate total distance and displacement for each PlayKey
#     result = df.select([
#         "PlayKey"
#         , pl.col("Displacement").sum().over("PlayKey").alias("Distance")
#         , pl.col("x").first().over("PlayKey").alias("start_x")
#         , pl.col("y").first().over("PlayKey").alias("start_y")
#         , pl.col("x").last().over("PlayKey").alias("end_x")
#         , pl.col("y").last().over("PlayKey").alias("end_y")
#         , pl.col("Angle_Diff").max().over("PlayKey").alias("Max_Angle_Diff")
#         , pl.col("Angle_Diff").mean().over("PlayKey").alias("Mean_Angle_Diff")
#         , pl.col("Speed").max().over("PlayKey").alias("Max_Speed")
#         , pl.col("Speed").mean().over("PlayKey").alias("Mean_Speed")
#         , pl.col("J_magnitude").max().over("PlayKey").alias("Max_Impulse")
#         , pl.col("J_magnitude").mean().over("PlayKey").alias("Mean_Impulse")
#         , pl.col("torque").max().over("PlayKey").alias("Max_Torque")
#         , pl.col("torque").mean().over("PlayKey").alias("Mean_Torque")
#         , pl.col("torque_internal").max().over("PlayKey").alias("Max_Int_Torque")
#         , pl.col("torque_internal").mean().over("PlayKey").alias("Mean_Int_Torque")

#         ]).unique(subset=["PlayKey"])


#     # Calculate the displacement
#     result = result.with_columns([
#         (((pl.col("end_x") - pl.col("start_x"))**2 + 
#           (pl.col("end_y") - pl.col("start_y"))**2)**0.5)
#         .alias("Displacement")
#         ]).with_columns([
#             (pl.col("Distance") - pl.col("Displacement")).alias("Path_Diff")
#         ])

     
#     # Select only the required columns
#     result = result.select([
#         'PlayKey'
#         , 'Distance'
#         , 'Displacement'
#         , 'Path_Diff'
#         , 'Max_Angle_Diff'
#         , 'Mean_Angle_Diff'
#         , 'Max_Speed'
#         , 'Mean_Speed'
#         , 'Max_Impulse'
#         , 'Mean_Impulse'
#         , 'Max_Torque'
#         , 'Mean_Torque'
#         , 'Max_Int_Torque'
#         , 'Mean_Int_Torque'
      
#     ]).sort("PlayKey")


#     return result

# # Join the Qualitative with the Quantitative to create Summary Table
# def qual_quant_merger(quals, quant):
#     from DataHandler import data_shrinker
#     qual_quant = quals.join(quant, on="PlayKey", how="left")
#     qual_quant = data_shrinker(qual_quant)

#     return qual_quant