# Post-Analysis - Turf Merges

- This uses the functions established in Turf_Data_Cleaning to acquire and clean the data
- Subsequently, this merges and cleans the data

---
# Dependencies

In [1]:
import pandas as pd
import numpy as np
from PostAnalysisCleaningFunctions import clean_and_merge

import sqlalchemy as db
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
import psycopg2


In [2]:
# Connect to the Database using the postgres server and sqlalchemy
from config import db_password

db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5433/NFL_Turf"
engine = db.create_engine(db_string)
conn = engine.connect()
metadata = db.MetaData()
del db_password

### Get and clean the Playlist Data

This process will be done with the Visualization Material first, then 2 functions to automate the entire acquisition and merge process will be created for the vis and ml

In [3]:
table = db.Table('playlist', metadata, autoload=True, autoload_with=engine)
query = db.select(table)
Results = conn.execute(query).fetchall()

playlist = pd.DataFrame(Results)
playlist.columns = Results[0].keys()

# vis_playlist = vis_process_playlist(playlist)
# vis_playlist.head()

## Get and Clean the Injuries Data

In [4]:
# Read in the specific table - this can be done on the same connection:
injuries_sql = db.Table('injuries', metadata,
                        autoload=True, autoload_with=engine)
query = db.select(injuries_sql)
Results = conn.execute(query).fetchall()

# Create the new dataframe and set the keys
injuries = pd.DataFrame(Results)
injuries.columns = Results[0].keys()
conn.close()
del Results, metadata, conn, engine, query, table, db_string

# vis_injuries = vis_process_injuries(injuries)
# vis_injuries.head()


In [5]:
playlist.head()

Unnamed: 0,playerkey,gameid,playkey,rosterposition,playerday,playergame,stadiumtype,fieldtype,temperature,weather,playtype,playergameplay,position,postiongroup
0,26624,26624-1,26624-1-1,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Pass,1,QB,QB
1,26624,26624-1,26624-1-2,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Pass,2,QB,QB
2,26624,26624-1,26624-1-3,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Rush,3,QB,QB
3,26624,26624-1,26624-1-4,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Rush,4,QB,QB
4,26624,26624-1,26624-1-5,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Pass,5,QB,QB


In [6]:
injuries.head()

Unnamed: 0,playerkey,gameid,playkey,bodypart,fieldtype,dm_m1,dm_m7,dm_m28,dm_m42
0,39873,39873-4,39873-4-32,Knee,Synthetic,1,1,1,1
1,46074,46074-7,46074-7-26,Knee,Natural,1,1,0,0
2,36557,36557-1,36557-1-70,Ankle,Synthetic,1,1,1,1
3,46646,46646-3,46646-3-30,Ankle,Natural,1,0,0,0
4,43532,43532-5,43532-5-69,Ankle,Synthetic,1,1,1,1


# Perform the Outer Merge 

To maintain the non-injury baseline data, we have to perform an outer merge

In [8]:
ml_play_injuries = clean_and_merge(playlist, injuries, process='ml')
ml_play_injuries.head()

Unnamed: 0,PlayKey,PlayerGame,PlayerGamePlay,SyntheticField,Outdoor,RosterPosition_Num,Position_Num,PlayCode,DaysPlayed,InjuryType,InjuryDuration,SevereInjury,IsInjured
0,26624-1-1,1,1,1,1,0,0,0.0,64,0.0,0.0,0.0,0
1,26624-1-2,1,2,1,1,0,0,0.0,64,0.0,0.0,0.0,0
2,26624-1-3,1,3,1,1,0,0,1.0,64,0.0,0.0,0.0,0
3,26624-1-4,1,4,1,1,0,0,1.0,64,0.0,0.0,0.0,0
4,26624-1-5,1,5,1,1,0,0,0.0,64,0.0,0.0,0.0,0


In [9]:
vis_play_injuries = clean_and_merge(playlist, injuries, process='vis')
vis_play_injuries.head()

Unnamed: 0,PlayKey,RosterPosition,PlayerGame,StadiumType,FieldType,PlayType,PlayerGamePlay,Position,DaysPlayed,BodyPart,InjuryDuration,SevereInjury,IsInjured
0,26624-1-1,QB,1,Outdoor,Synthetic,Pass,1,QB,64,NoInjury,0.0,NoInjury,NoInjury
1,26624-1-2,QB,1,Outdoor,Synthetic,Pass,2,QB,64,NoInjury,0.0,NoInjury,NoInjury
2,26624-1-3,QB,1,Outdoor,Synthetic,Rush,3,QB,64,NoInjury,0.0,NoInjury,NoInjury
3,26624-1-4,QB,1,Outdoor,Synthetic,Rush,4,QB,64,NoInjury,0.0,NoInjury,NoInjury
4,26624-1-5,QB,1,Outdoor,Synthetic,Pass,5,QB,64,NoInjury,0.0,NoInjury,NoInjury
