# NFL Concussion Study

A dataset analysis of the head injuries and concussions from seasons 2012 to 2014

Team Group 2 - Kelsey Brantner, Daniel Carrasco and Joel Pangilinan

![nfl-league-logo-300x300.png](attachment:nfl-league-logo-300x300.png)

# Read the clean dataset

In [149]:
# Import Dependencies
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np

In [150]:
# Read the csv file
clean_df = pd.read_csv("clean_concussion_2012_2014.csv")
clean_df.head()

Unnamed: 0,Player,Team,Game,Date,Opposing Team,Position,Team Type,Pre-Season Injury?,Winning Team?,Week of Injury,Season,Weeks Injured,Games Missed,Unknown Injury?,Reported Injury Type,Total Snaps,Play Time After Injury,Average Playtime Before Injury
0,Aldrick Robinson,Washington Redskins,Washington Redskins vs. Tampa Bay Buccaneers (...,30/09/2012,Tampa Bay Buccaneers,Wide Receiver,offense,No,Yes,4,2012,1,1.0,No,Head,0,14 downs,37.00 downs
1,D.J. Fluker,San Diego Chargers,Tennessee Titans vs. San Diego Chargers (22/9/...,22/09/2013,Tennessee Titans,Offensive Tackle,offense,No,No,3,2013,1,1.0,No,Concussion,0,78 downs,73.50 downs
2,Marquise Goodwin,Buffalo Bills,Houston Texans vs. Buffalo Bills (28/9/2014),28/09/2014,Houston Texans,Wide Receiver,offense,No,No,4,2014,1,1.0,No,Concussion,0,25 downs,17.50 downs
3,Bryan Stork,New England Patriots,New England Patriots vs. Buffalo Bills (12/10/...,12/10/2014,Buffalo Bills,Center,offense,No,Yes,6,2014,1,1.0,No,Head,0,82 downs,41.50 downs
4,Lorenzo Booker,Chicago Bears,Chicago Bears vs. Indianapolis Colts (9/9/2012),9/09/2012,Indianapolis Colts,Running Back,offense,Yes,Yes,1,2012,0,,No,Head,0,Did not return from injury,


# Create a graph that shows the number of injuries per team

# Head Injury/Concussion from 2012 to 2014 by position

In [151]:
# Count the number of injuries per team and put it in a dataframe
team_injury_df = pd.DataFrame(clean_df.groupby("Team")["Reported Injury Type"].count())
team_injury_df = team_injury_df.rename(columns={"Reported Injury Type":"Number of Injuries"})
team_injury_df

Unnamed: 0_level_0,Number of Injuries
Team,Unnamed: 1_level_1
Arizona Cardinals,10
Atlanta Falcons,6
Baltimore Ravens,15
Buffalo Bills,5
Carolina Panthers,8
Chicago Bears,9
Cincinnati Bengals,21
Cleveland Browns,22
Dallas Cowboys,10
Denver Broncos,11


In [152]:
# Create the bar graph that shows the number of injuries per team
team_injury_df.plot(kind="bar", 
                    xlabel="NFL Teams", 
                    ylabel="Number of Injuries", 
                    title="Number of injuries per team (2012 - 2014)", 
                    position=0.5, 
                    color="red")

# Save the plot to a file
plt.savefig("Images/Injuries_per_team_2012-2014.png", bbox_inches="tight")

# Show the plot
plt.show()

<IPython.core.display.Javascript object>

# Breakdown on injuries by team (Offense and Defense)

In [153]:
# Create a dataframe with offensive positions only
offense_df = clean_df.loc[clean_df["Team Type"] == "offense"]
offense_df.head()

Unnamed: 0,Player,Team,Game,Date,Opposing Team,Position,Team Type,Pre-Season Injury?,Winning Team?,Week of Injury,Season,Weeks Injured,Games Missed,Unknown Injury?,Reported Injury Type,Total Snaps,Play Time After Injury,Average Playtime Before Injury
0,Aldrick Robinson,Washington Redskins,Washington Redskins vs. Tampa Bay Buccaneers (...,30/09/2012,Tampa Bay Buccaneers,Wide Receiver,offense,No,Yes,4,2012,1,1.0,No,Head,0,14 downs,37.00 downs
1,D.J. Fluker,San Diego Chargers,Tennessee Titans vs. San Diego Chargers (22/9/...,22/09/2013,Tennessee Titans,Offensive Tackle,offense,No,No,3,2013,1,1.0,No,Concussion,0,78 downs,73.50 downs
2,Marquise Goodwin,Buffalo Bills,Houston Texans vs. Buffalo Bills (28/9/2014),28/09/2014,Houston Texans,Wide Receiver,offense,No,No,4,2014,1,1.0,No,Concussion,0,25 downs,17.50 downs
3,Bryan Stork,New England Patriots,New England Patriots vs. Buffalo Bills (12/10/...,12/10/2014,Buffalo Bills,Center,offense,No,Yes,6,2014,1,1.0,No,Head,0,82 downs,41.50 downs
4,Lorenzo Booker,Chicago Bears,Chicago Bears vs. Indianapolis Colts (9/9/2012),9/09/2012,Indianapolis Colts,Running Back,offense,Yes,Yes,1,2012,0,,No,Head,0,Did not return from injury,


In [154]:
# Determine the number of head injuries and concussions per position on offense
offense_injuries= offense_df.groupby(["Position"]).apply(lambda x: x[x["Team Type"] == "offense"]["Weeks Injured"].count())
offense_injuries

Position
Center               7
Full Back            3
Guard               26
Offensive Tackle    22
Quarterback         14
Running Back        39
Tight End           39
Wide Receiver       56
dtype: int64

In [155]:
# Put it in a dataframe
offense_injuries_df = pd.DataFrame(offense_injuries)
offense_injuries_df = offense_injuries_df.reset_index()
offense_injuries_df = offense_injuries_df.rename(columns = {"Position":"Offensive Position", 0:"Number of Offensive Injuries"})
offense_injuries_df

Unnamed: 0,Offensive Position,Number of Offensive Injuries
0,Center,7
1,Full Back,3
2,Guard,26
3,Offensive Tackle,22
4,Quarterback,14
5,Running Back,39
6,Tight End,39
7,Wide Receiver,56


In [156]:
# Create a dataframe with defensive positions only
defense_df = clean_df.loc[clean_df["Team Type"] == "defense"]
defense_df.head()

Unnamed: 0,Player,Team,Game,Date,Opposing Team,Position,Team Type,Pre-Season Injury?,Winning Team?,Week of Injury,Season,Weeks Injured,Games Missed,Unknown Injury?,Reported Injury Type,Total Snaps,Play Time After Injury,Average Playtime Before Injury
9,Brandon Ghee,Cincinnati Bengals,Chicago Bears vs. Cincinnati Bengals (8/9/2013),8/09/2013,Chicago Bears,Cornerback,defense,Yes,No,1,2013,0,,No,Concussion,0,37 downs,
10,Pat Angerer,Indianapolis Colts,Indianapolis Colts vs. Oakland Raiders (8/9/2013),8/09/2013,Oakland Raiders,Linebacker,defense,Yes,Yes,1,2013,0,,No,Concussion,0,,
11,Darryl Sharpton,Houston Texans,Houston Texans vs. San Diego Chargers (9/9/2013),9/09/2013,San Diego Chargers,Linebacker,defense,Yes,Yes,1,2013,0,,No,Concussion,0,,
12,Demetri Goodson,Green Bay Packers,Seattle Seahawks vs. Green Bay Packers (4/9/2014),4/09/2014,Seattle Seahawks,Cornerback,defense,Yes,No,1,2014,0,,No,Concussion,0,2 downs,
14,Nick Roach,Oakland Raiders,New York Jets vs. Oakland Raiders (7/9/2014),7/09/2014,New York Jets,Linebacker,defense,Yes,No,1,2014,0,,No,Concussion,0,,


In [157]:
# Determine the number of head injuries and concussions per position on defense
defense_injuries= defense_df.groupby(["Position"]).apply(lambda x: x[x["Team Type"] == "defense"]["Weeks Injured"].count())
defense_injuries

Position
Cornerback          64
Defensive End       20
Defensive Tackle    13
Linebacker          37
Safety              51
dtype: int64

In [158]:
# Put it in a dataframe
defense_injuries_df = pd.DataFrame(defense_injuries)
defense_injuries_df = defense_injuries_df.reset_index()
defense_injuries_df = defense_injuries_df.rename(columns = {"Position":"Defensive Position", 0:"Number of Defensive Injuries"})
defense_injuries_df

Unnamed: 0,Defensive Position,Number of Defensive Injuries
0,Cornerback,64
1,Defensive End,20
2,Defensive Tackle,13
3,Linebacker,37
4,Safety,51


In [159]:
# Rename the columns
offense_injuries_df = offense_injuries_df.rename(columns={"Offensive Position":"Position"})
defense_injuries_df = defense_injuries_df.rename(columns={"Defensive Position":"Position"})

In [160]:
# Merge the offense and defense dataframes
bar_injuries_df = pd.merge(offense_injuries_df, defense_injuries_df, how="outer", on="Position")
bar_injuries_df

Unnamed: 0,Position,Number of Offensive Injuries,Number of Defensive Injuries
0,Center,7.0,
1,Full Back,3.0,
2,Guard,26.0,
3,Offensive Tackle,22.0,
4,Quarterback,14.0,
5,Running Back,39.0,
6,Tight End,39.0,
7,Wide Receiver,56.0,
8,Cornerback,,64.0
9,Defensive End,,20.0


In [162]:
# Create the bar graph
bar_injuries_df = bar_injuries_df.set_index("Position")
bar_injuries_df.plot(kind="bar", 
                     xlabel="Team Positions", 
                     ylabel="Number of Injuries", 
                     title="Total number of injuries per position (2012 - 2014)", 
                     position=0.5)

# Save the plot to a file
plt.savefig("Images/Total_injuries_on_2012-2014.png", bbox_inches="tight")

# Show the plot
plt.show()

KeyError: "None of ['Position'] are in the columns"

# Create a pie graph that shows the percentage of injuries between offensive and defensive teams

# Injury Percentage of Offense and Defense

In [132]:
# Create the dataframe that shows the total number of injuries per team type
number_injuries_df = pd.DataFrame(clean_df["Team Type"].value_counts())
number_injuries_df = number_injuries_df.rename(columns={"Team Type":"Number of Injuries"})
number_injuries_df

Unnamed: 0,Number of Injuries
offense,206
defense,185


In [133]:
# Determine the total number of injuries
total_number_of_injuries = len(clean_df["Player"])
total_number_of_injuries

392

In [134]:
# Get the percentage of offensive and defensive injuries
percent_offense_injury = round((number_injuries_df.iloc[0]/total_number_of_injuries)*100)
percent_defense_injury = round((number_injuries_df.iloc[1]/total_number_of_injuries)*100)
percent_offense_injury, percent_defense_injury

(Number of Injuries    53.0
 Name: offense, dtype: float64,
 Number of Injuries    47.0
 Name: defense, dtype: float64)

In [135]:
# Create the pie graph that will show the percentage of injury on offense and defense
labels = number_injuries_df.index
plt.pie(number_injuries_df["Number of Injuries"], labels=labels, autopct="%1.1f%%")
plt.ylabel("Number of Injuries")
plt.title("Injuries: Offense versus Defense (2012 - 2014)")

# Save the plot to a file
plt.savefig("Images/Percentage_of_injuries_between_offense_defense_2012-2014.png", bbox_inches="tight")

# Show the plot
plt.show()

# Injury trend from 2012 to 2014 (Offense and Defense)

In [136]:
# Create a dataframe showing the injuries per year and reset the index
injury_offense_per_year_df = pd.DataFrame(offense_df.groupby("Season")["Reported Injury Type"].count())
injury_offense_per_year_df = injury_offense_per_year_df.rename(columns={"Reported Injury Type":"Number of Injuries"})
injury_offense_per_year_df = injury_offense_per_year_df.reset_index()
injury_offense_per_year_df

Unnamed: 0,Season,Number of Injuries
0,2012,99
1,2013,79
2,2014,27


In [137]:
# Create a dataframe showing the injuries per year and reset the index
injury_defense_per_year_df = pd.DataFrame(defense_df.groupby("Season")["Reported Injury Type"].count())
injury_defense_per_year_df = injury_defense_per_year_df.rename(columns={"Reported Injury Type":"Number of Injuries"})
injury_defense_per_year_df = injury_defense_per_year_df.reset_index()
injury_defense_per_year_df

Unnamed: 0,Season,Number of Injuries
0,2012,71
1,2013,72
2,2014,40


In [138]:
# Rename the columns for number of injuries on injury_offense_per_year_df
injury_offense_per_year_df = injury_offense_per_year_df.rename(columns={"Number of Injuries":"Offensive Injuries"})
injury_offense_per_year_df

Unnamed: 0,Season,Offensive Injuries
0,2012,99
1,2013,79
2,2014,27


In [139]:
# Rename the columns for number of injuries on injury_defense_per_year_df
injury_defense_per_year_df = injury_defense_per_year_df.rename(columns={"Number of Injuries":"Defensive Injuries"})
injury_defense_per_year_df

Unnamed: 0,Season,Defensive Injuries
0,2012,71
1,2013,72
2,2014,40


In [140]:
# Merge the dataframe for offense and defense
injury_per_year_df = pd.merge(injury_offense_per_year_df, injury_defense_per_year_df, on="Season")
injury_per_year_df

Unnamed: 0,Season,Offensive Injuries,Defensive Injuries
0,2012,99,71
1,2013,79,72
2,2014,27,40


In [141]:
# Create the x-axis and y-axis
seasons = injury_per_year_df["Season"]
offensive_injury_number = injury_per_year_df["Offensive Injuries"]
defensive_injury_number = injury_per_year_df["Defensive Injuries"]
seasons, offensive_injury_number, defensive_injury_number

(0    2012
 1    2013
 2    2014
 Name: Season, dtype: int64,
 0    99
 1    79
 2    27
 Name: Offensive Injuries, dtype: int64,
 0    71
 1    72
 2    40
 Name: Defensive Injuries, dtype: int64)

In [142]:
# Create the line plot
plt.xlabel("NFL Seasons")
plt.ylabel("Number of Injuries")
plt.title("Number of Injuries Per NFL Season")
plt.xticks(seasons)
plt.plot(seasons, offensive_injury_number, label="Offense")
plt.plot(seasons, defensive_injury_number, label="Defense")
plt.legend()

# Save the plot
plt.savefig("Images/Injuries_per_year.png", bbox_inches="tight")

# Show the plot
plt.show()

# Break down of injuries per team and per year (2012 to 2014)

In [143]:
# Group the dataset per team and per season
nfl_injury_df = pd.DataFrame(clean_df.groupby(["Team", "Season"])["Reported Injury Type"].count())
nfl_injury_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Reported Injury Type
Team,Season,Unnamed: 2_level_1
Arizona Cardinals,2012,4
Arizona Cardinals,2013,2
Arizona Cardinals,2014,4
Atlanta Falcons,2012,2
Atlanta Falcons,2013,3
...,...,...
Tennessee Titans,2013,5
Tennessee Titans,2014,2
Washington Redskins,2012,6
Washington Redskins,2013,5


In [144]:
# Reset the index
nfl_injury_df = nfl_injury_df.reset_index()
nfl_injury_df = nfl_injury_df.rename(columns={"Reported Injury Type":"Number_of_Injuries"})
nfl_injury_df

Unnamed: 0,Team,Season,Number_of_Injuries
0,Arizona Cardinals,2012,4
1,Arizona Cardinals,2013,2
2,Arizona Cardinals,2014,4
3,Atlanta Falcons,2012,2
4,Atlanta Falcons,2013,3
...,...,...,...
86,Tennessee Titans,2013,5
87,Tennessee Titans,2014,2
88,Washington Redskins,2012,6
89,Washington Redskins,2013,5


In [145]:
# Create a crosstab dataframe
crosstab_df = pd.crosstab(nfl_injury_df.Team,nfl_injury_df.Season,values=nfl_injury_df.Number_of_Injuries,aggfunc=np.sum)
crosstab_df.head()

Season,2012,2013,2014
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Arizona Cardinals,4.0,2.0,4.0
Atlanta Falcons,2.0,3.0,1.0
Baltimore Ravens,9.0,3.0,3.0
Buffalo Bills,3.0,,2.0
Carolina Panthers,3.0,2.0,3.0


In [146]:
# Replace NaN values with 0
crosstab_df = crosstab_df.fillna(0)
crosstab_df.head()

Season,2012,2013,2014
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Arizona Cardinals,4.0,2.0,4.0
Atlanta Falcons,2.0,3.0,1.0
Baltimore Ravens,9.0,3.0,3.0
Buffalo Bills,3.0,0.0,2.0
Carolina Panthers,3.0,2.0,3.0


In [147]:
# %matplotlib notebook
# Create the 3D bar graph
# thickness of the bars
dx = 0.5
dy = 0.5

# prepare 3d axes
plt.figure(figsize=(20,12))
ax = plt.axes(projection="3d")

# set up positions for the bars 
xpos=np.arange(crosstab_df.shape[0])
ypos=np.arange(crosstab_df.shape[1])

# set the ticks in the middle of the bars
ax.set_xticks(xpos + dx/2)
ax.set_yticks(ypos + dy/2)

# create meshgrid 
xpos, ypos = np.meshgrid(xpos+0.5, ypos)
xpos = xpos.flatten()
ypos = ypos.flatten()

# the bars starts from 0 attitude
zpos=np.zeros(crosstab_df.shape).flatten()

# the bars height
dz = list(crosstab_df[2012]) + list(crosstab_df[2013]) + list(crosstab_df[2014])

# plot 
ax.bar3d(xpos,ypos,zpos,dx,dy,dz,shade=True)

# put the column / index labels
ax.w_yaxis.set_ticklabels(crosstab_df.columns)
ax.w_xaxis.set_ticklabels(crosstab_df.index,size=7,rotation=90)

# name the axes
ax.set_xlabel("NFL Teams", labelpad=40)
ax.set_ylabel("NFL Season", labelpad=10)
ax.set_zlabel("Number of Injuries", labelpad=0.1)
ax.set_title("NFL Injuries (2012 - 2014)")

# Save the plot
plt.savefig("Images/3D_graph.png", bbox_inches="tight")

# Show the plot
plt.show()

<IPython.core.display.Javascript object>

  ax.w_yaxis.set_ticklabels(crosstab_df.columns)
  ax.w_xaxis.set_ticklabels(crosstab_df.index,size=7,rotation=90)
