# Post-Analysis Concussion

- This Cleans and transforms the Concussion data from the Punt datasets

--- 
# Dependencies

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PostAnalysisCleaningFunctions import clean_punt
random_state=42

import sqlalchemy as db
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
import psycopg2

## Retrieve the Concussion Positive Data from the Database

Prior to this SQL call, 4 of the files were previously merged in SQL: 
- play_player_role
- play_info
- game_data
- player_punt_data
- reviews
- ngs data

In [None]:
# Make connection to the database
from config import db_password
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5433/NFL_Punt"
engine = db.create_engine(db_string)
conn = engine.connect()
metadata = db.MetaData()

del db_password

# Read in the injuries table:
table = db.Table('concussion_ngs', metadata,
                 autoload=True, autoload_with=engine)
query = db.select(table)
Results = conn.execute(query).fetchall()

# Create the new dataframe and set the keys
concussion = pd.DataFrame(Results)
concussion.columns = Results[0].keys()
conn.close()
del Results, query, table
concussion.head()

In [None]:
vis_concussion = concussion
vis_concussion = clean_punt(vis_concussion, 'vis')
vis_concussion.head()


In [None]:
ml_concussion = concussion
ml_concussion = clean_punt(ml_concussion, 'ml')
ml_concussion.head()


In [None]:
ml_concussion.GamePlay_ID.nunique()


In [None]:
# Make connection to the database
from config import db_password
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5433/NFL_Injuries"
engine = db.create_engine(db_string)

del db_string, db_password
# Write table to database
vis_concussion.to_sql(name='vis_concussions', con=engine, index=False)
ml_concussion.to_sql(name='ml_concussions', con=engine, index=False)



At this point, I need to rethink how to get data for a model - sampling the NGS is only removing movement data from the individual plays, which is problematic. 
- This will be much better to do in SQL
- Pull out data by position? 

# Import the SQL Sampled Concussion Control Group Data


In [2]:
# Make connection to the database
from config import db_password
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5433/NFL_Punt"
engine = db.create_engine(db_string)
conn = engine.connect()
metadata = db.MetaData()

del db_password
# Read in the injuries table:
table = db.Table('control_ngs', metadata,
                 autoload=True, autoload_with=engine)
query = db.select(table)
Results = conn.execute(query).fetchall()

# Create the new dataframe and set the keys
control = pd.DataFrame(Results)
control.columns = Results[0].keys()
conn.close()
del Results, query, table
control.head()


Unnamed: 0,gamekey,playid,gsisid,season_year,season_type,quarter,score_home_visiting,stadiumtype,turf,week,p_position,player_activity,turnover_related,impact_type,g_time,x,y,o,dir
0,332,3764,27647,2016,Post,4,28 - 20,Retractable Roof,Artificial,4,FS,,,,2017-02-06 02:53:01.100,66.790001,40.779999,159.289993,245.059998
1,332,3764,30558,2016,Post,4,28 - 20,Retractable Roof,Artificial,4,OLB,,,,2017-02-06 02:53:01.100,65.489998,27.959999,183.839996,331.609985
2,332,3764,30558,2016,Post,4,28 - 20,Retractable Roof,Artificial,4,OLB,,,,2017-02-06 02:53:01.100,65.489998,27.959999,183.839996,331.609985
3,332,3764,31856,2016,Post,4,28 - 20,Retractable Roof,Artificial,4,OLB,,,,2017-02-06 02:53:01.100,65.5,27.190001,174.899994,272.269989
4,332,3764,31856,2016,Post,4,28 - 20,Retractable Roof,Artificial,4,OLB,,,,2017-02-06 02:53:01.100,65.5,27.190001,174.899994,272.269989


In [3]:
vis_control = control
vis_control = clean_punt(vis_control, 'vis')
vis_control.head()


Unnamed: 0,Quarter,StadiumType,Week,Position,Player_Activity,Impact_Type,g_time,x,y,FieldType,HomeScore,Score_Difference,GamePlay,GamePlay_ID,Twist
0,4,Indoor,26,FS,,,2017-02-06 02:53:01.100,66.790001,40.779999,Synthetic,28,8,332-3764,332-3764-27647,44.349991
1,4,Indoor,26,OLB,,,2017-02-06 02:53:01.100,65.489998,27.959999,Synthetic,28,8,332-3764,332-3764-30558,147.769989
2,4,Indoor,26,OLB,,,2017-02-06 02:53:01.100,65.5,27.190001,Synthetic,28,8,332-3764,332-3764-31856,87.169983
3,4,Indoor,26,FS,,,2017-02-06 02:53:01.100,65.959999,26.190001,Synthetic,28,8,332-3764,332-3764-29570,53.419998
4,4,Indoor,26,OLB,,,2017-02-06 02:53:01.200,65.510002,23.09,Synthetic,28,8,332-3764,332-3764-29250,70.359985


In [4]:
ml_control = control
ml_control = clean_punt(ml_control, 'ml')
ml_control.head()


Unnamed: 0,Quarter,Week,Position,Player_Activity,Impact_Type,x,y,SyntheticField,Outdoor,HomeScore,Score_Difference,GamePlay,GamePlay_ID,Twist
0,4,26,27,,,66.790001,40.779999,1.0,0,28,8,332-3764,332-3764-27647,44.349991
1,4,26,20,,,65.489998,27.959999,1.0,0,28,8,332-3764,332-3764-30558,147.769989
2,4,26,20,,,65.5,27.190001,1.0,0,28,8,332-3764,332-3764-31856,87.169983
3,4,26,27,,,65.959999,26.190001,1.0,0,28,8,332-3764,332-3764-29570,53.419998
4,4,26,20,,,65.510002,23.09,1.0,0,28,8,332-3764,332-3764-29250,70.359985


In [5]:
# Make connection to the database
from config import db_password
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5433/NFL_Injuries"
engine = db.create_engine(db_string)

del db_string, db_password
# Write table to database
vis_control.to_sql(name='vis_concussion_control', con=engine, index=False)
ml_control.to_sql(name='ml_concussion_control', con=engine, index=False)
