In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# Assuming the CSV files are in the same directory as your Python script or Jupyter Notebook

# Import Game data
games_data = pd.read_csv('nfl-big-data-bowl-2024/games.csv')

# Import Play data
plays_data = pd.read_csv('nfl-big-data-bowl-2024/plays.csv')

# Import Player data
players_data = pd.read_csv('nfl-big-data-bowl-2024/players.csv')

# Import Tackles data
tackles_data = pd.read_csv('nfl-big-data-bowl-2024/tackles.csv')

# Example for importing all weeks of Tracking data
# Adjust the range based on the actual weeks in your dataset
tracking_data_all_weeks = pd.concat([pd.read_csv(f'nfl-big-data-bowl-2024/tracking_week_{week}.csv') for week in range(1, 10)])

# Now you can use these DataFrames in your analysis

In [7]:
games_data.head()
players_data.head()
plays_data.head()

Unnamed: 0,gameId,playId,ballCarrierId,ballCarrierDisplayName,playDescription,quarter,down,yardsToGo,possessionTeam,defensiveTeam,...,preSnapHomeTeamWinProbability,preSnapVisitorTeamWinProbability,homeTeamWinProbabilityAdded,visitorTeamWinProbilityAdded,expectedPoints,expectedPointsAdded,foulName1,foulName2,foulNFLId1,foulNFLId2
0,2022100908,3537,48723,Parker Hesse,(7:52) (Shotgun) M.Mariota pass short middle t...,4,1,10,ATL,TB,...,0.976785,0.023215,-0.00611,0.00611,2.360609,0.981955,,,,
1,2022091103,3126,52457,Chase Claypool,(7:38) (Shotgun) C.Claypool right end to PIT 3...,4,1,10,PIT,CIN,...,0.160485,0.839515,-0.010865,0.010865,1.733344,-0.263424,,,,
2,2022091111,1148,42547,Darren Waller,(8:57) D.Carr pass short middle to D.Waller to...,2,2,5,LV,LAC,...,0.756661,0.243339,-0.037409,0.037409,1.312855,1.133666,,,,
3,2022100212,2007,46461,Mike Boone,(13:12) M.Boone left tackle to DEN 44 for 7 ya...,3,2,10,DEN,LV,...,0.620552,0.379448,-0.002451,0.002451,1.641006,-0.04358,,,,
4,2022091900,1372,47857,Devin Singletary,(8:33) D.Singletary right guard to TEN 32 for ...,2,1,10,BUF,TEN,...,0.83629,0.16371,0.001053,-0.001053,3.686428,-0.167903,,,,


In [None]:
def calculate_average_speed(tracking_data, nflId):
    # Filter tracking data for the specific player
    player_tracking = tracking_data[tracking_data['nflId'] == nflId]

    # Calculate the total distance traveled by the player
    total_distance = player_tracking['dis'].sum()

    # Calculate the total time the player was in motion
    total_time_in_motion = player_tracking[player_tracking['s'] > 0]['time'].count()

    # Calculate the average speed
    if total_time_in_motion > 0:
        average_speed = total_distance / total_time_in_motion
        return average_speed
    else:
        return 0

# Example usage:
# Assuming you have loaded the tracking data into a DataFrame named 'tracking_data'
# and the player data into a DataFrame named 'player_data'

# Replace 'your_nflId_here' with the actual nflId of the player you want to calculate the average speed for
nflId_to_calculate = your_nflId_here  

average_speed = calculate_average_speed(tracking_data, nflId_to_calculate)
print(f"Average speed for player with nflId {nflId_to_calculate}: {average_speed} yards/second")


In [None]:
def calculate_normalized_aggression_score(tackles_data, nflId):
    # Filter tackles data for the specific player
    player_tackles = tackles_data[tackles_data['nflId'] == nflId]

    if len(player_tackles) == 0:
        print(f"Player with nflId {nflId} not found in the tackles data.")
        return None

    # Calculate normalized aggression score based on tackles, assisted tackles, forced fumbles, and missed tackles
    tackles = player_tackles['tackle'].sum()
    assisted_tackles = player_tackles['assist'].sum()
    forced_fumbles = player_tackles['forcedFumble'].sum()
    missed_tackles = player_tackles['pff_missedTackle'].sum()

    # Define a formula for the normalized aggression score (you can adjust the weights as needed)
    aggression_score = (tackles + assisted_tackles + forced_fumbles -
                        missed_tackles) / max(1, len(player_tackles))

    return aggression_score


# Example usage:
# Assuming you have loaded the tackles data into a DataFrame named 'tackles_data'

# Replace 'your_nflId_here' with the actual nflId of the player you want to calculate the normalized aggression score for
nflId_to_calculate_aggression_score = your_nflId_here

player_aggression_score = calculate_normalized_aggression_score(
    tackles_data, nflId_to_calculate_aggression_score)
if player_aggression_score is not None:
    print(
        f"Normalized Aggression Score for player with nflId {nflId_to_calculate_aggression_score}: {player_aggression_score:.2f}"
    )

In [None]:
def calculate_bmi(player_data, nflId):
    # Filter player data for the specific player
    player_info = player_data[player_data['nflId'] == nflId]

    if len(player_info) == 0:
        print(f"Player with nflId {nflId} not found.")
        return None

    # Extract weight and height
    weight_kg = player_info['weight'].values[0] * 0.453592  # Convert pounds to kilograms
    height_meters = convert_height_to_meters(player_info['height'].values[0])  # Convert feet and inches to meters

    # Calculate BMI
    bmi = weight_kg / (height_meters ** 2)

    return bmi

def convert_height_to_meters(height):
    # Convert height from feet and inches to meters
    feet, inches = map(int, height.split('-'))
    height_meters = (feet * 12 + inches) * 0.0254  # 1 foot = 12 inches, 1 inch = 0.0254 meters
    return height_meters

# Example usage:
# Assuming you have loaded the player data into a DataFrame named 'player_data'

# Replace 'your_nflId_here' with the actual nflId of the player you want to calculate the BMI for
nflId_to_calculate_bmi = your_nflId_here  

player_bmi = calculate_bmi(player_data, nflId_to_calculate_bmi)
if player_bmi is not None:
    print(f"BMI for player with nflId {nflId_to_calculate_bmi}: {player_bmi:.2f}")

In [None]:
def count_interceptions(player_data, play_data, ball_carrier_id):
    # Filter play data for interceptions where the specified player is the ball carrier
    player_interceptions = play_data[(play_data['ballCarrierId'] == ball_carrier_id) & (play_data['passResult'] == 'IN')]

    # Count the number of interceptions
    num_interceptions = len(player_interceptions)

    return num_interceptions

# Example usage:
# Assuming you have loaded the play data into a DataFrame named 'plays_data'

# Replace 'your_ballCarrierId_here' with the actual ballCarrierId of the player you want to analyze
ballCarrierId_to_analyze_interceptions = your_ballCarrierId_here  

interceptions_count = count_interceptions(plays_data, ballCarrierId_to_analyze_interceptions)
print(f"Number of interceptions gotten by player with ballCarrierId {ballCarrierId_to_analyze_interceptions}: {interceptions_count}")


In [None]:
def count_passes_caught(play_data, ball_carrier_id, pass_result='C'):
    # Filter play data for completed passes where the specified player is the ball carrier
    player_passes_caught = play_data[(play_data['ballCarrierId'] == ball_carrier_id) & (play_data['passResult'] == pass_result)]

    # Count the number of passes caught
    num_passes_caught = len(player_passes_caught)

    return num_passes_caught

# Example usage:
# Assuming you have loaded the play data into a DataFrame named 'plays_data'

# Replace 'your_ballCarrierId_here' with the actual ballCarrierId of the player you want to analyze
ballCarrierId_to_analyze = your_ballCarrierId_here  

passes_caught = count_passes_caught(plays_data, ballCarrierId_to_analyze)
print(f"Number of passes caught by player with ballCarrierId {ballCarrierId_to_analyze}: {passes_caught}")


In [None]:
def calculate_average_time_per_game(play_data, tracking_data, ball_carrier_id):
    # Filter play data for plays where the specified player is the ball carrier
    player_plays = play_data[play_data['ballCarrierId'] == ball_carrier_id]

    if len(player_plays) == 0:
        print(f"Player with nflId {ball_carrier_id} not found in the play data.")
        return None

    # Merge play data with tracking data to get the corresponding gameId and playId
    merged_data = pd.merge(player_plays[['gameId', 'playId']], tracking_data, on=['gameId', 'playId'], how='inner')

    # Calculate the average time the player carries the ball per game
    average_time_per_game = merged_data.groupby('gameId')['time'].max().mean()

    return average_time_per_game

# Example usage:
# Assuming you have loaded the play data into a DataFrame named 'plays_data'
# and the tracking data into a DataFrame named 'tracking_data'

# Replace 'your_nflId_here' with the actual nflId of the player you want to analyze
nflId_to_analyze_average_time = your_nflId_here

average_time_per_game = calculate_average_time_per_game(plays_data, tracking_data, nflId_to_analyze_average_time)
if average_time_per_game is not None:
    print(f"Average time per game for player with nflId {nflId_to_analyze_average_time}: {average_time_per_game:.2f} seconds")


In [None]:
def count_passes_thrown(play_data, passer_id):
    # Filter play data for passes thrown where the specified player is the passer
    player_passes_thrown = play_data[(play_data['passResult'] == 'C') & (play_data['passer_player_id'] == passer_id)]

    # Count the number of passes thrown
    num_passes_thrown = len(player_passes_thrown)

    return num_passes_thrown

# Example usage:
# Assuming you have loaded the play data into a DataFrame named 'plays_data'

# Replace 'your_passerId_here' with the actual passer_player_id of the player you want to analyze
passerId_to_analyze_passes_thrown = your_passerId_here  

passes_thrown_count = count_passes_thrown(plays_data, passerId_to_analyze_passes_thrown)
print(f"Number of passes thrown by player with passer_player_id {passerId_to_analyze_passes_thrown}: {passes_thrown_count}")


In [None]:
# Sample data loading (replace with your actual data loading)
games_data = pd.read_csv('games.csv')
plays_data = pd.read_csv('plays.csv')
players_data = pd.read_csv('players.csv')
tackles_data = pd.read_csv('tackles.csv')
tracking_data = pd.concat([pd.read_csv(f'tracking_week_{week}.csv') for week in range(1, 18)])

# Sample functions for calculating metrics
def calculate_bmi(player_data, nflId):
    # ... (code from previous responses)

def calculate_player_age(player_data, nflId):
    # ... (code from previous responses)

def calculate_average_speed(tracking_data, nflId):
    # ... (code from previous responses)

def calculate_normalized_aggression_score(tackles_data, nflId):
    # ... (code from previous responses)

def count_passes_caught(play_data, ball_carrier_id):
    # ... (code from previous responses)

def count_interceptions(play_data, ball_carrier_id):
    # ... (code from previous responses)

def count_passes_thrown(play_data, passer_id):
    # ... (code from previous responses)

def calculate_average_time_per_game(play_data, tracking_data, ball_carrier_id):
    # ... (code from previous responses)

# Function to create a table of players with metrics
def create_players_table(players_data, plays_data, tackles_data, tracking_data):
    player_metrics = []

    for index, player in players_data.iterrows():
        nflId = player['nflId']

        bmi = calculate_bmi(players_data, nflId)
        age = calculate_player_age(players_data, nflId)
        average_speed = calculate_average_speed(tracking_data, nflId)
        aggression_score = calculate_normalized_aggression_score(tackles_data, nflId)
        passes_caught = count_passes_caught(plays_data, nflId)
        interceptions = count_interceptions(plays_data, nflId)
        passes_thrown = count_passes_thrown(plays_data, nflId)
        avg_ball_carrying_time = calculate_average_time_per_game(plays_data, tracking_data, nflId)

        player_metrics.append({
            'nflId': nflId,
            'BMI': bmi,
            'Age': age,
            'Average Speed': average_speed,
            'Aggression Score': aggression_score,
            'Passes Caught': passes_caught,
            'Interceptions': interceptions,
            'Passes Thrown': passes_thrown,
            'Average Ball Carrying Time': avg_ball_carrying_time
        })

    players_table = pd.DataFrame(player_metrics)
    return players_table

# Example usage:
players_table = create_players_table(players_data, plays_data, tackles_data, tracking_data)
print(players_table)


In [None]:
# Assuming you have a labeled dataset with player metrics and positions
# Here, 'players_table' is assumed to contain the player metrics including 'Position' as the target variable

# Drop rows with missing values (NaN) for simplicity
players_table = players_table.dropna()

# Define features (X) and target variable (y)
features = players_table[['BMI', 'Age', 'Average Speed', 'Aggression Score', 'Passes Caught', 'Interceptions', 'Passes Thrown', 'Average Ball Carrying Time']]
target = players_table['Position']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Initialize the decision tree classifier
clf = DecisionTreeClassifier(random_state=42)

# Train the model
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Display classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))
