In [1]:
# Import dependencies
import pandas as pd
import numpy as np
import math
import psycopg2
from sqlalchemy import create_engine

In [2]:
# Store environment variable
from getpass import getpass
dbUser = getpass('Enter username')
dbPassword = getpass('Enter database password')

Enter username········
Enter database password········


In [3]:
# Read data
plays = pd.read_csv('../Resources/plays.csv')
players = pd.read_csv('../Resources/players.csv')

week1 = pd.read_csv('../Resources/week1.csv')
week2 = pd.read_csv('../Resources/week2.csv')
week3 = pd.read_csv('../Resources/week3.csv')
week4 = pd.read_csv('../Resources/week4.csv')
week5 = pd.read_csv('../Resources/week5.csv')
week6 = pd.read_csv('../Resources/week6.csv')
week7 = pd.read_csv('../Resources/week7.csv')
week8 = pd.read_csv('../Resources/week8.csv')

FileNotFoundError: [Errno 2] No such file or directory: '../Resources/plays.csv'

In [None]:
# Clean and concatenate week data
week1['week'] = 1
week2['week'] = 2
week3['week'] = 3
week4['week'] = 4
week5['week'] = 5
week6['week'] = 6
week7['week'] = 7
week8['week'] = 8

tracking = week1.append([
    week2,
    week3,
    week4,
    week5,
    week6,
    week7,
    week8,
])

In [None]:
# Slice plays for Red Zone plays
red_zone_plays = plays[plays['absoluteYardlineNumber'] <= 30]

In [None]:
# Slice tracking for Red Zone plays and merge possessionTeam
red_zone_tracking = tracking.merge(red_zone_plays[['gameId', 'playId', 'possessionTeam']], how='right')

In [None]:
# Slice for players and merge officialPosition
red_zone_player_tracking = red_zone_tracking.merge(players[['nflId', 'officialPosition']], how='inner')

In [None]:
# Find tracking possession
red_zone_player_tracking['possession'] = red_zone_player_tracking['team'] == red_zone_player_tracking['possessionTeam']
red_zone_player_tracking['possession'] = red_zone_player_tracking['possession'].apply(lambda x: int(x))

In [None]:
# Slice for QB and other players
red_zone_other_player_tracking = red_zone_player_tracking[
    red_zone_player_tracking['officialPosition'] != 'QB'
]
red_zone_qb_tracking = red_zone_player_tracking[
    red_zone_player_tracking['officialPosition'] == 'QB'
]

In [None]:
# Generate proximity table

# Initialize DataFrame to house proximity results
proximity_results = pd.DataFrame(columns=[
    'gameId',
    'playId',
    'frameId',
    'nflId',
    'officialPosition',
    'nflId2',
    'officialPosition2',
    'matchupOpposing',
    'distance',
    'angle'
])

# Perform merge on each game slice
for week in red_zone_other_player_tracking['week'].unique():
    # Slice DataFrame for game
    week_tracking_other_player = red_zone_other_player_tracking[red_zone_other_player_tracking['week'] == week]
    week_tracking_qb = red_zone_qb_tracking[red_zone_qb_tracking['week'] == week]
    
    # Merge offense and defense
    proximity = week_tracking_qb.merge(
        week_tracking_other_player[['gameId', 'playId', 'frameId', 'nflId', 'team', 'officialPosition', 'x', 'y']],
        on=['gameId', 'playId', 'frameId'],
        how='inner',
        suffixes=('', '2')
    )
    
    # Remove comparisons to self
    proximity = proximity[proximity['nflId'] != proximity['nflId2']]
    
    # Extract distance metric
    proximity['distance'] = proximity.apply(
        lambda row : math.sqrt(((row['x'] - row['x2'])**2 + (row['y'] - row['y2'])**2)), axis=1)
    
    # Calculate angle column
    def extract_angle(x_diff, y_diff):
        try:
            if x_diff > 0:
                if y_diff > 0:
                    return 90 - math.degrees(math.atan(y_diff / x_diff))
                else:
                    return 90 + math.degrees(math.atan(-y_diff / x_diff))
            else:
                if y_diff > 0:
                    return 270 + math.degrees(math.atan(-y_diff / x_diff))
                else:
                    return 270 - math.degrees(math.atan(y_diff / x_diff))
        except Exception as e:
            if y_diff > 0:
                return 0
            else:
                return 180
    
    proximity['angle'] = proximity.apply(
        lambda row : extract_angle(row['x2'] - row['x'], row['y2'] - row['y']), axis=1)
    
    # Define matchupOpposing column
    proximity['matchupOpposing'] = proximity['team'] != proximity['team2']
    proximity['matchupOpposing'] = proximity['matchupOpposing'].apply(lambda x: int(x))
    
    # Clean distance proximity DataFrame to match results schema
    proximity = proximity[[
        'gameId',
        'playId',
        'frameId',
        'nflId',
        'officialPosition',
        'nflId2',
        'officialPosition2',
        'matchupOpposing',
        'distance',
        'angle'
    ]]
    
    # Append to DataFrame
    proximity_results = proximity_results.append(proximity)
    
    print(f"Week {week} has been processed. {len(proximity)} matchups were stored.")

In [None]:
# Write result to SQL table 
db = create_engine(f'postgresql://{dbUser}:{dbPassword}@34.72.136.99:5432/big-data-bowl')
conn = db.connect()
conn1 = psycopg2.connect(
    database="big-data-bowl",
    user=dbUser, 
    password=dbPassword, 
    host='34.72.136.99', 
    port= '5432'
)
  
conn1.autocommit = True
cursor = conn1.cursor()
  
# drop table if it already exists
cursor.execute('drop table if exists redzoneqbproximity')
    
sql_create = '''CREATE TABLE redzoneqbproximity(
    gameId integer,
    playId integer,
    frameId integer,
    nflId integer,
    officialPosition varchar,
    nflId2 integer,
    officialPosition2 varchar,
    possession integer,
    matchupOpposing integer,
    distance double precision,
    angle double precision
);'''
  
cursor.execute(sql_create)

# converting data to sql
proximity_results.to_sql('redzoneqbproximity', conn, if_exists= 'replace', method='multi', index=False)
  
conn1.commit()
conn1.close()

In [None]:
 proximity_results.to_csv('../Resources/redZoneQBProximity.csv', index=False)