In [1]:
# Import dependencies
import pandas as pd
import math
import psycopg2
from sqlalchemy import create_engine

In [2]:
# Store environment variable
from getpass import getpass
dbUser = getpass('Enter username')
dbPassword = getpass('Enter database password')

Enter username········
Enter database password········


In [3]:
# Read data
plays = pd.read_csv('../Resources/plays.csv')
players = pd.read_csv('../Resources/players.csv')

week1 = pd.read_csv('../Resources/week1.csv')
week2 = pd.read_csv('../Resources/week2.csv')
week3 = pd.read_csv('../Resources/week3.csv')
week4 = pd.read_csv('../Resources/week4.csv')
week5 = pd.read_csv('../Resources/week5.csv')
week6 = pd.read_csv('../Resources/week6.csv')
week7 = pd.read_csv('../Resources/week7.csv')
week8 = pd.read_csv('../Resources/week8.csv')

In [4]:
# Concatenate week data
tracking = week1.append([
    week2,
    week3,
    week4,
    week5,
    week6,
    week7,
    week8,
])

In [5]:
# Slice for player tracking and merge officialPosition
player_tracking = tracking[tracking['team'] != 'football']
player_tracking = player_tracking.merge(players[['nflId', 'officialPosition']])

In [6]:
# Slice plays for Red Zone
plays['redZone'] = plays['absoluteYardlineNumber'] <= 30
red_zone_plays = plays.loc[plays['redZone']]
red_zone_plays.drop('redZone', axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [7]:
# Slice tracking for Red Zone plays
player_tracking = player_tracking.merge(red_zone_plays[['gameId', 'playId']], how='right')

In [8]:
# Generate proximity table

# Initialize DataFrame to house proximity results
proximity_results = pd.DataFrame(columns=[
    'gameId',
    'playId',
    'frameId',
    'nflId',
    'nflId2',
    'distance'
])

# Perform merge on each game slice
for gameId in player_tracking['gameId'].unique():
    # Slice DataFrame for game
    game_tracking = player_tracking[player_tracking['gameId'] == gameId]
    
    # Merge offense and defense
    proximity = game_tracking.merge(
        game_tracking[['gameId', 'playId', 'frameId', 'nflId', 'x', 'y']],
        on=['gameId', 'playId', 'frameId'],
        how='left',
        suffixes=('', '2')
    )
    
    # Extract distance metric
    proximity['distance'] = proximity.apply(
        lambda row : math.sqrt(((row['x'] - row['x2'])**2 + (row['y'] - row['y2'])**2)), axis=1)
    
    # Clean distance proximity DataFrame to match results schema
    proximity = proximity[[
        'gameId',
        'playId',
        'frameId',
        'nflId',
        'nflId2',
        'distance'
    ]]
    
    # Append to DataFrame
    proximity_results = proximity_results.append(proximity)
    
    print(f"Game {gameId} has been processed. {len(proximity)} matchups were stored.")

Game 2021090900 has been processed. 108900 matchups were stored.
Game 2021091200 has been processed. 194084 matchups were stored.
Game 2021091201 has been processed. 107448 matchups were stored.
Game 2021091202 has been processed. 99220 matchups were stored.
Game 2021091203 has been processed. 113256 matchups were stored.
Game 2021091204 has been processed. 158752 matchups were stored.
Game 2021091205 has been processed. 151008 matchups were stored.
Game 2021091206 has been processed. 239580 matchups were stored.
Game 2021091207 has been processed. 216348 matchups were stored.
Game 2021091208 has been processed. 251680 matchups were stored.
Game 2021091209 has been processed. 147136 matchups were stored.
Game 2021091210 has been processed. 98252 matchups were stored.
Game 2021091211 has been processed. 182468 matchups were stored.
Game 2021091212 has been processed. 261360 matchups were stored.
Game 2021091213 has been processed. 150524 matchups were stored.
Game 2021091300 has been pr

In [9]:
proximity_results = proximity_results[proximity_results['nflId'] != proximity_results['nflId2']]

In [None]:
# Write result to SQL table 
db = create_engine(f'postgresql://{dbUser}:{dbPassword}@34.72.136.99:5432/big-data-bowl')
conn = db.connect()
conn1 = psycopg2.connect(
    database="big-data-bowl",
    user=dbUser, 
    password=dbPassword, 
    host='34.72.136.99', 
    port= '5432'
)
  
conn1.autocommit = True
cursor = conn1.cursor()
  
# drop table if it already exists
cursor.execute('drop table if exists playerproximity')
    
sql_create = '''CREATE TABLE playerproximity(
    gameId integer,
    playId integer,
    frameId integer,
    nflId integer,
    nflId2 integer,
    distance double precision
);'''
  
cursor.execute(sql_create)

# converting data to sql
proximity_results.to_sql('playerproximity', conn, if_exists= 'replace', method='multi', index=False)
  
# fetching all rows
sql_select='''select * from playerproximity;'''
cursor.execute(sql_select)
for i in cursor.fetchall():
    print(i)
  
conn1.commit()
conn1.close()