In [13]:
# Import dependencies
import pandas as pd
import numpy as np
import math
import psycopg2
from sqlalchemy import create_engine

In [14]:
# Store environment variable
from getpass import getpass
dbUser = getpass('Enter username')
dbPassword = getpass('Enter database password')

Enter username········
Enter database password········


In [15]:
# Read data
plays = pd.read_csv('../Resources/plays.csv')
players = pd.read_csv('../Resources/players.csv')

week1 = pd.read_csv('../Resources/week1.csv')
week2 = pd.read_csv('../Resources/week2.csv')
week3 = pd.read_csv('../Resources/week3.csv')
week4 = pd.read_csv('../Resources/week4.csv')
week5 = pd.read_csv('../Resources/week5.csv')
week6 = pd.read_csv('../Resources/week6.csv')
week7 = pd.read_csv('../Resources/week7.csv')
week8 = pd.read_csv('../Resources/week8.csv')

In [16]:
# Clean and concatenate week data
week1['week'] = 1
week2['week'] = 2
week3['week'] = 3
week4['week'] = 4
week5['week'] = 5
week6['week'] = 6
week7['week'] = 7
week8['week'] = 8

tracking = week1.append([
    week2,
    week3,
    week4,
    week5,
    week6,
    week7,
    week8,
])

In [17]:
# Slice plays for Red Zone plays
red_zone_plays = plays[plays['absoluteYardlineNumber'] <= 30]

In [18]:
# Slice tracking for Red Zone plays and merge possessionTeam
red_zone_tracking = tracking.merge(red_zone_plays[['gameId', 'playId', 'possessionTeam']], how='right')

In [19]:
# Slice for players and merge officialPosition
red_zone_player_tracking = red_zone_tracking.merge(players[['nflId', 'officialPosition']], how='inner')

In [20]:
# Find tracking possession
red_zone_player_tracking['possession'] = red_zone_player_tracking['team'] == red_zone_player_tracking['possessionTeam']

In [22]:
# Slice for line players and offensive players
slice_positions = ['G','C','T']
red_zone_line_player_tracking = red_zone_player_tracking[
    (red_zone_player_tracking['officialPosition'].isin(slice_positions)) | \
    (red_zone_player_tracking['possession'])
]

In [28]:
# Generate proximity table

# Initialize DataFrame to house proximity results
proximity_results = pd.DataFrame(columns=[
    'gameId',
    'playId',
    'frameId',
    'nflId',
    'officialPosition',
    'nflId2',
    'officialPosition2',
    'possession',
    'possession2',
    'matchupOpposing',
    'distance',
    'angle'
])

# Perform merge on each game slice
for week in red_zone_line_player_tracking['week'].unique():
    # Slice DataFrame for week
    week_tracking = red_zone_line_player_tracking[red_zone_line_player_tracking['week'] == week]
    
    # Merge offense and defense
    proximity = week_tracking.merge(
        week_tracking[['gameId', 'playId', 'frameId', 'nflId', 'possession', 'officialPosition', 'x', 'y']],
        on=['gameId', 'playId', 'frameId'],
        how='inner',
        suffixes=('', '2')
    )
    
    proximity = proximity[proximity['nflId'] != proximity['nflId2']]
    
    # Extract distance metric
    proximity['distance'] = proximity.apply(
        lambda row : math.sqrt(((row['x'] - row['x2'])**2 + (row['y'] - row['y2'])**2)), axis=1)
    
    def extract_angle(x, y):
        try:
            if x > 0:
                if y > 0:
                    return 90 - math.degrees(math.atan(y / x))
                else:
                    return 90 + math.degrees(math.atan(y / x))
            else:
                if y > 0:
                    return 270 + math.degrees(math.atan(y / x))
                else:
                    return 270 - math.degrees(math.atan(y / x))
        except Exception as e:
            if y > 0:
                return 0
            else:
                return 180
    
    proximity['angle'] = proximity.apply(
        lambda row : extract_angle(row['x2'] - row['x'], row['y2'] - row['y']), axis=1)
    
    # Define opposing column
    proximity['matchupOpposing'] = proximity.apply(lambda row: int(row['possession'] != row['possession2']), axis=1)
    
    # Convert possession boolean to integer
    proximity['possession'] = proximity['possession'].apply(lambda x: int(x))
    
    # Clean distance proximity DataFrame to match results schema
    proximity = proximity[[
        'gameId',
        'playId',
        'frameId',
        'nflId',
        'officialPosition',
        'nflId2',
        'officialPosition2',
        'possession',
        'possession2',
        'matchupOpposing',
        'distance',
        'angle'
    ]]
    
    # Append to DataFrame
    proximity_results = proximity_results.append(proximity)
    
    print(f"Week {week} has been processed. {len(proximity)} matchups were stored.")
    break

Week 1 has been processed. 610060 matchups were stored.


In [29]:
proximity_results.matchupOpposing.mean()

0.0

In [24]:
# Filter out rows comparing to self
proximity_results = proximity_results[proximity_results['nflId'] != proximity_results['nflId2']]
proximity_results[proximity_results['possession'] == 0]

Unnamed: 0,gameId,playId,frameId,nflId,officialPosition,nflId2,officialPosition2,opposing,distance,angle,possession,matchupOpposing


In [12]:
# Write result to SQL table 
db = create_engine(f'postgresql://{dbUser}:{dbPassword}@34.72.136.99:5432/big-data-bowl')
conn = db.connect()
conn1 = psycopg2.connect(
    database="big-data-bowl",
    user=dbUser, 
    password=dbPassword, 
    host='34.72.136.99', 
    port= '5432'
)
  
conn1.autocommit = True
cursor = conn1.cursor()
  
# drop table if it already exists
cursor.execute('drop table if exists redzonelineplayerproximity')
    
sql_create = '''CREATE TABLE redzonelineplayerproximity(
    gameId integer,
    playId integer,
    frameId integer,
    nflId integer,
    officialPosition varchar,
    nflId2 integer,
    officialPosition2 varchar,
    possession integer,
    matchupOpposing integer,
    distance double precision,
    angle double precision
);'''
  
cursor.execute(sql_create)

# converting data to sql
proximity_results.to_sql('redzonelineplayerproximity', conn, if_exists= 'replace', method='multi', index=False)
  
conn1.commit()
conn1.close()