In [4]:
import pandas as pd
import numpy as np
import matplotlib as plt

dataFrame = pd.read_csv("NFLDraftDataCleaned.csv")

In [5]:
#Produce a summary statistics table about all the numerical values in the dataFrame
statisticalTable = dataFrame.describe().to_excel(r'Data Visualisation/Summary Statistics Table.xlsx', index=None, header=True)

In [6]:
#Visualise data about forty yard dash
fortyYardData = dataFrame['fortyYardDash']

plt.figure(figsize=(20, 10))
plt.hist(fortyYardData, bins = 50)
plt.title("Overall Distribution of Forty Yard Dash Times")
plt.xlabel("Forty Yard Dash Time (seconds)")
plt.ylabel("Frequency")
plt.savefig("Data Visualisation/Forty Yard Dash Histogram.png")
plt.show

AttributeError: module 'matplotlib' has no attribute 'figure'

In [None]:
#Visualise data about the vertical jump
verticalJumpData = dataFrame['verticalJump']

plt.figure(figsize=(20, 10))
plt.hist(verticalJumpData, bins = 25)
plt.title("Overall Distribution of Vertical Jump Distance")
plt.xlabel("Vertical Jump Height (inches)")
plt.ylabel("Frequency")
plt.savefig("Data Visualisation/Vertical Jump Histogram.png")
plt.show

In [None]:
#Visualise data about the bench press
benchPressData = dataFrame['benchPress']

plt.figure(figsize=(20, 10))
plt.hist(benchPressData, bins = 25)
plt.title("Overall Distribution of Bench Press Reps")
plt.xlabel("Number of Bench Press Reps")
plt.ylabel("Frequency")
plt.savefig("Data Visualisation/Bench Press Histogram.png")
plt.show

In [None]:
#Visualise data about the broad jump
broadJumpData = dataFrame['broadJump']

plt.figure(figsize=(20, 10))
plt.hist(broadJumpData, bins = 25)
plt.title("Overall Distribution of Broad Jump Distance")
plt.xlabel("Broad Jump Distance (inches)")
plt.ylabel("Frequency")
plt.savefig("Data Visualisation/Broad Jump Histogram.png")
plt.show

In [None]:
#Visualise data about the three cone drill
threeConeData = dataFrame['threeCone']

plt.figure(figsize=(20, 10))
plt.hist(threeConeData, bins = 50)
plt.title("Overall Distribution of Three Cone Drill Times")
plt.xlabel("Three Cone Drill Time (seconds)")
plt.ylabel("Frequency")
plt.savefig("Data Visualisation/Three Cone Drill Histogram.png")
plt.show

In [None]:
#Visualise data about the shuttle drill
shuttleData = dataFrame['shuttle']

plt.figure(figsize=(20, 10))
plt.hist(threeConeData, bins = 50)
plt.title("Overall Distribution of Shuttle Drill Times")
plt.xlabel("Shuttle Drill Time (seconds)")
plt.ylabel("Frequency")
plt.savefig("Data Visualisation/Shuttle Drill Histogram.png")
plt.show

In [None]:
#Write a generic function which can calculate average data by position given a specific column heading
#Will return two arrays, one containing the x-axis data (i.e. all positions) and the other containing y-axis
#data (i.e. associated averages for each position)
def getAverageGroupedPosition(columnHeading):
    positionData = ["All Positions"]
    averageData = []
    
    #Get the overall average column value for all players in the data set
    average = dataFrame[columnHeading].mean()
    averageData.append(average)
    
    #Get the average for that column grouped by position
    positions = dataFrame.position.unique()
    for position in positions:
        tempDataFrame = dataFrame[dataFrame['position'] == position]
        average = tempDataFrame[columnHeading].mean()
        positionData.append(position)
        averageData.append(average)
    
    return(positionData, averageData)

#Write a function which can calculate standard deviation by position given a specific column heading
#Will return two arrays, one containing the x-axis data (i.e. all positions) and the other containing y-axis
#data (i.e. associated standard deviations for each position)
def getStandardDeviationGroupedPosition(columnHeading):
    positionData = ["All Positions"]
    deviationData = []
    
    #Get the overall average column value for all players in the data set
    deviation = dataFrame[columnHeading].std()
    deviationData.append(deviation)
    
    #Get the average for that column grouped by position
    positions = dataFrame.position.unique()
    for position in positions:
        tempDataFrame = dataFrame[dataFrame['position'] == position]
        deviation = tempDataFrame[columnHeading].std()
        positionData.append(position)
        deviationData.append(deviation)
        
    return(positionData, deviationData)

In [None]:
#Retrieve data on each positions average forty yard time
(positionData, averageFortyYardData) = getAverageGroupedPosition("fortyYardDash")
(positionData, deviationFortyYardData) = getStandardDeviationGroupedPosition("fortyYardDash")

#Plot the data on a bar chart
plt.figure(figsize = (20,10))
plt.bar(positionData, averageFortyYardData, yerr = deviationFortyYardData)
plt.ylim(bottom = 4, top = 5.5)
plt.yticks(np.arange(4, 5.5, step = 0.1))
plt.title("Average Forty Yard Dash Time by Position")
plt.xlabel("Position")
plt.ylabel("Forty Yard Dash Time (seconds)")
plt.savefig("Data Visualisation/Forty Yard Dash Bar Chart.png")
plt.show

In [None]:
#Retrieve data on each positions average vertical jump time
(positionData, averageVerticalJumpData) = getAverageGroupedPosition("verticalJump")
(positionData, deviationVerticalJumpData) = getStandardDeviationGroupedPosition("verticalJump")

#Plot the data on a bar chart
plt.figure(figsize = (20,10))
plt.bar(positionData, averageVerticalJumpData, yerr = deviationVerticalJumpData)
plt.ylim(bottom = 25, top = 40)
plt.yticks(np.arange(24, 40, step = 0.5))
plt.title("Average Vertical Jump Height by Position")
plt.xlabel("Position")
plt.ylabel("Vertical Jump (inches)")
plt.savefig("Data Visualisation/Vertical Jump Bar Chart.png")
plt.show

In [None]:
#Retrieve data on each positions average bench press
(positionData, averageBenchPressData) = getAverageGroupedPosition("benchPress")
(positionData, deviationBenchPressData) = getStandardDeviationGroupedPosition("benchPress")

#Plot the data on a bar chart
plt.figure(figsize = (20,10))
plt.bar(positionData, averageBenchPressData, yerr = deviationBenchPressData)
plt.ylim(bottom = 10, top = 27)
plt.yticks(np.arange(10, 27, step = 1))
plt.title("Average Number of Bench Press Reps per Position")
plt.xlabel("Position")
plt.ylabel("Number of Reps")
plt.savefig("Data Visualisation/Bench Press Bar Chart.png")
plt.show

In [None]:
#Retrieve data on each positions average broad jump distance
(positionData, averageBroadJumpData) = getAverageGroupedPosition("broadJump")
(positionData, deviationBroadJumpData) = getStandardDeviationGroupedPosition("broadJump")

#Plot the data on a bar chart
plt.figure(figsize = (20,10))
plt.bar(positionData, averageBroadJumpData, yerr = deviationBroadJumpData)
plt.ylim(bottom = 95, top = 135)
plt.yticks(np.arange(95, 135, step = 2.5))
plt.title("Average Broad Jump Distance per Position")
plt.xlabel("Position")
plt.ylabel("Broad Jump Distance (inches)")
plt.savefig("Data Visualisation/Broad Jump Bar Chart.png")
plt.show

In [None]:
#Retrieve data on each positions average three cone drill time
(positionData, averageThreeConeData) = getAverageGroupedPosition("threeCone")
(positionData, deviationThreeConeData) = getStandardDeviationGroupedPosition("threeCone")

#Plot the data on a bar chart
plt.figure(figsize = (20,10))
plt.bar(positionData, averageThreeConeData, yerr = deviationThreeConeData)
plt.ylim(bottom = 6, top = 8)
plt.yticks(np.arange(6, 8, step = 0.1))
plt.title("Average Three Cone Drill Speed per Position")
plt.xlabel("Position")
plt.ylabel("Three Cone Drill Time (seconds)")
plt.savefig("Data Visualisation/Three Cone Drill Bar Chart.png")
plt.show

In [None]:
#Retrieve data on each positions average forty yard time
(positionData, averageShuttleData) = getAverageGroupedPosition("shuttle")
(positionData, deviationShuttleData) = getStandardDeviationGroupedPosition("shuttle")

#Plot the data on a bar chart
plt.figure(figsize = (20,10))
plt.bar(positionData, averageShuttleData, yerr = deviationShuttleData)
plt.ylim(bottom = 3, top = 5)
plt.yticks(np.arange(3, 5, step = 0.1))
plt.title("Average Shuttle Drill Speed per Position")
plt.xlabel("Position")
plt.ylabel("Shuttle Drill Time (seconds)")
plt.savefig("Data Visualisation/Shuttle Drill Bar Chart.png")
plt.show