In [1]:
# Import dependencies
import pandas as pd
import numpy as np
import math
import psycopg2
from sqlalchemy import create_engine

In [2]:
# Store environment variable
from getpass import getpass
dbUser = getpass('Enter username')
dbPassword = getpass('Enter database password')

Enter username········
Enter database password········


In [3]:
# Read data
plays = pd.read_csv('../Resources/plays.csv')
players = pd.read_csv('../Resources/players.csv')

week1 = pd.read_csv('../Resources/week1.csv')
week2 = pd.read_csv('../Resources/week2.csv')
week3 = pd.read_csv('../Resources/week3.csv')
week4 = pd.read_csv('../Resources/week4.csv')
week5 = pd.read_csv('../Resources/week5.csv')
week6 = pd.read_csv('../Resources/week6.csv')
week7 = pd.read_csv('../Resources/week7.csv')
week8 = pd.read_csv('../Resources/week8.csv')

In [4]:
# Clean and concatenate week data
week1['week'] = 1
week2['week'] = 2
week3['week'] = 3
week4['week'] = 4
week5['week'] = 5
week6['week'] = 6
week7['week'] = 7
week8['week'] = 8

tracking = week1.append([
    week2,
    week3,
    week4,
    week5,
    week6,
    week7,
    week8,
])

In [5]:
# Slice plays for Red Zone plays
red_zone_plays = plays[plays['absoluteYardlineNumber'] <= 30]

In [6]:
# Slice tracking for Red Zone plays and merge possessionTeam
red_zone_tracking = tracking.merge(red_zone_plays[['gameId', 'playId', 'possessionTeam']], how='right')

In [7]:
# Slice for players and merge officialPosition
red_zone_player_tracking = red_zone_tracking.merge(players[['nflId', 'officialPosition']], how='inner')

In [8]:
# Find tracking possession
red_zone_player_tracking['possession'] = red_zone_player_tracking['team'] == red_zone_player_tracking['possessionTeam']
red_zone_player_tracking['possession'] = red_zone_player_tracking['possession'].apply(lambda x: int(x))

In [9]:
# Slice for line players and offensive players
slice_positions = ['NT', 'DT', 'DE', 'OLB']
red_zone_line_player_tracking = red_zone_player_tracking[
    red_zone_player_tracking['officialPosition'].isin(slice_positions)
]
red_zone_offensive_player_tracking = red_zone_player_tracking[
    red_zone_player_tracking['possession'] == 1
]

In [10]:
# Generate proximity table

# Initialize DataFrame to house proximity results
proximity_results = pd.DataFrame(columns=[
    'gameId',
    'playId',
    'frameId',
    'nflId',
    'officialPosition',
    'nflId2',
    'officialPosition2',
    'distance',
    'angle'
])

# Perform merge on each game slice
for week in red_zone_line_player_tracking['week'].unique():
    # Slice DataFrame for game
    week_tracking_line = red_zone_line_player_tracking[red_zone_line_player_tracking['week'] == week]
    week_tracking_offense = red_zone_offensive_player_tracking[red_zone_offensive_player_tracking['week'] == week]
    
    # Merge offense and defense
    proximity = week_tracking_line.merge(
        week_tracking_offense[['gameId', 'playId', 'frameId', 'nflId', 'officialPosition', 'x', 'y']],
        on=['gameId', 'playId', 'frameId'],
        how='inner',
        suffixes=('', '2')
    )
    
    # Remove comparisons to self
    proximity = proximity[proximity['nflId'] != proximity['nflId2']]
    
    # Extract distance metric
    proximity['distance'] = proximity.apply(
        lambda row : math.sqrt(((row['x'] - row['x2'])**2 + (row['y'] - row['y2'])**2)), axis=1)
    
    def extract_angle(x, y):
        try:
            if x > 0:
                if y > 0:
                    return 90 - math.degrees(math.atan(y / x))
                else:
                    return 90 + math.degrees(math.atan(-y / x))
            else:
                if y > 0:
                    return 270 + math.degrees(math.atan(-y / x))
                else:
                    return 270 - math.degrees(math.atan(y / x))
        except Exception as e:
            if y > 0:
                return 0
            else:
                return 180
    
    proximity['angle'] = proximity.apply(
        lambda row : extract_angle(row['x2'] - row['x'], row['y2'] - row['y']), axis=1)
    
    # Clean distance proximity DataFrame to match results schema
    proximity = proximity[[
        'gameId',
        'playId',
        'frameId',
        'nflId',
        'officialPosition',
        'nflId2',
        'officialPosition2',
        'distance',
        'angle'
    ]]
    
    # Append to DataFrame
    proximity_results = proximity_results.append(proximity)
    
    print(f"Week {week} has been processed. {len(proximity)} matchups were stored.")

Week 1 has been processed. 285450 matchups were stored.
Week 3 has been processed. 308605 matchups were stored.
Week 4 has been processed. 359502 matchups were stored.
Week 6 has been processed. 220869 matchups were stored.
Week 8 has been processed. 306130 matchups were stored.
Week 2 has been processed. 226644 matchups were stored.
Week 5 has been processed. 247016 matchups were stored.
Week 7 has been processed. 229020 matchups were stored.


In [None]:
# Write result to SQL table 
db = create_engine(f'postgresql://{dbUser}:{dbPassword}@34.72.136.99:5432/big-data-bowl')
conn = db.connect()
conn1 = psycopg2.connect(
    database="big-data-bowl",
    user=dbUser, 
    password=dbPassword, 
    host='34.72.136.99', 
    port= '5432'
)
  
conn1.autocommit = True
cursor = conn1.cursor()
  
# drop table if it already exists
cursor.execute('drop table if exists redzonelineplayerproximity')
    
sql_create = '''CREATE TABLE redzonelineplayerproximity(
    gameId integer,
    playId integer,
    frameId integer,
    nflId integer,
    officialPosition varchar,
    nflId2 integer,
    officialPosition2 varchar,
    distance double precision,
    angle double precision
);'''
  
cursor.execute(sql_create)

# converting data to sql
proximity_results.to_sql('redzonelineplayerproximity', conn, if_exists= 'replace', method='multi', index=False)
  
conn1.commit()
conn1.close()