### Glossary  <br>

`Year` -- Year at time of season finale <br>
`Age` -- Player age at time of finale <br>
`Pos` -- Player position <br>
`GP` -- Number of Games Played in season <br>
`G` -- Goals <br>
`A` -- Assists <br>
`PTS` -- Points <br>
`PS` -- Point Shares; an estimate of the number of points contributed by a player. <br>
`PP` -- Power Play Goals <br>
`PP.1` -- Power Play Assists <br>
`S` -- Shots on Goal <br>
`S%` -- Shooting Percentage (total goals / total shots on goal) <br>
`TOI` -- Total Time on Ice (in minutes) <br>
`ATOI` -- Average Time on Ice per game <br>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from scipy import stats
from scipy.stats import ttest_ind
import glob

We are going to add a 'Year' column to each .csv file (each season's data) one-by-one before compiling them all into one large dataframe. <br>


In [2]:
# # Reading csv file
# nhl2006= pd.read_csv("C:/Users/joshm/Documents/BrainStation/Capstone Project/hockey-reference.com data/nhl2006.csv", index_col=1)

# # Inserting a new 'Year' column into df
# nhl2006.insert(loc=1, column='Year', value=2006)

# # saving our modified .csv file
# nhl2006.to_csv("C:/Users/joshm/Documents/BrainStation/Capstone Project/hockey-reference.com data/nhl2006.csv")

In [None]:
# # Sort by any column
# sorted_nhl2018 = nhl2018.sort_values(by='G', ascending=False)
# sorted_nhl2018.head(30)

In [None]:
# # applying a filter (in this case gp >= 30) and then sorting.
# filtered_nhl23 = nhl23[nhl23['GP'] >= 30]
# filtered_nhl23.sort_values(by='Age', ascending=True).head(30)

In [7]:
# Combining multiple .csv files into one dataframe

# Get a list of all CSV files in the directory
csv_files = glob.glob("C:/Users/joshm/Documents/BrainStation/Capstone Project/hockey-reference.com data/*.csv")

# Initialize an empty list to store DataFrames
dfs = []

# Iterate over each CSV file
for csv_file in csv_files:
    # Read the CSV file into a DataFrame
    combined_df = pd.read_csv(csv_file, index_col=0)
    # Append the DataFrame to the list
    dfs.append(combined_df)

# Concatenate all DataFrames in the list into one DataFrame
nhl_df = pd.concat(dfs, ignore_index=False)

nhl_df

Unnamed: 0_level_0,Rk,Year,Age,Tm,Pos,GP,G,A,PTS,+/-,...,S,S%,TOI,ATOI,-9999,BLK,HIT,FOW,FOL,FO%
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Joe Thornton,1,2006,26.0,TOT,C,81,29,96,125,31,...,195,14.9,1728,21:20,thornjo01,,,,,
Jaromír Jágr,2,2006,33.0,NYR,RW,82,54,69,123,34,...,368,14.7,1810,22:05,jagrja01,,,,,
Alex Ovechkin,3,2006,20.0,WSH,LW,81,52,54,106,2,...,425,12.2,1751,21:37,ovechal01,,,,,
Daniel Alfredsson*,4,2006,33.0,OTT,RW,77,43,60,103,29,...,249,17.3,1670,21:41,alfreda01,,,,,
Dany Heatley,5,2006,25.0,OTT,LW,82,50,53,103,29,...,300,16.7,1734,21:09,heatlda01,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Chris Wagner,865,2024,32.0,COL,C,4,0,0,0,0,...,2,0.0,28,6:54,wagnech01,0.0,11.0,7.0,3.0,70.0
Samuel Walker,866,2024,24.0,MIN,C,4,0,0,0,-3,...,3,0.0,37,9:22,walkesa01,1.0,2.0,0.0,1.0,0.0
Colin White,867,2024,27.0,TOT,C,13,0,0,0,-1,...,10,0.0,131,10:02,whiteco02,5.0,13.0,16.0,13.0,55.2
Ryan Winterton,868,2024,20.0,SEA,C,3,0,0,0,-1,...,1,0.0,25,8:12,wintery01,1.0,3.0,0.0,0.0,


In [8]:
nhl_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 16937 entries, Joe Thornton to Shane Wright
Data columns (total 29 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Rk      16937 non-null  int64  
 1   Year    16937 non-null  int64  
 2   Age     16935 non-null  float64
 3   Tm      16937 non-null  object 
 4   Pos     16937 non-null  object 
 5   GP      16937 non-null  int64  
 6   G       16937 non-null  int64  
 7   A       16937 non-null  int64  
 8   PTS     16937 non-null  int64  
 9   +/-     16937 non-null  int64  
 10  PIM     16937 non-null  int64  
 11  PS      16937 non-null  float64
 12  EV      16937 non-null  int64  
 13  PP      16937 non-null  int64  
 14  SH      16937 non-null  int64  
 15  GW      16937 non-null  int64  
 16  EV.1    16937 non-null  int64  
 17  PP.1    16937 non-null  int64  
 18  SH.1    16937 non-null  int64  
 19  S       16937 non-null  int64  
 20  S%      16430 non-null  float64
 21  TOI     16937 non-null

In [9]:
# Dropping unwanted columns
nhl_df.drop(columns=['Rk','Tm','+/-','PIM','EV','SH','GW','EV.1','SH.1','BLK','HIT','FOW','FOL','FO%','-9999'], inplace=True)
nhl_df

Unnamed: 0_level_0,Year,Age,Pos,GP,G,A,PTS,PS,PP,PP.1,S,S%,TOI,ATOI
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Joe Thornton,2006,26.0,C,81,29,96,125,13.3,11,40,195,14.9,1728,21:20
Jaromír Jágr,2006,33.0,RW,82,54,69,123,15.5,24,28,368,14.7,1810,22:05
Alex Ovechkin,2006,20.0,LW,81,52,54,106,12.7,21,31,425,12.2,1751,21:37
Daniel Alfredsson*,2006,33.0,RW,77,43,60,103,12.3,16,32,249,17.3,1670,21:41
Dany Heatley,2006,25.0,LW,82,50,53,103,12.9,23,20,300,16.7,1734,21:09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Chris Wagner,2024,32.0,C,4,0,0,0,0.0,0,0,2,0.0,28,6:54
Samuel Walker,2024,24.0,C,4,0,0,0,-0.2,0,0,3,0.0,37,9:22
Colin White,2024,27.0,C,13,0,0,0,-0.3,0,0,10,0.0,131,10:02
Ryan Winterton,2024,20.0,C,3,0,0,0,-0.1,0,0,1,0.0,25,8:12


In [10]:
# Renaming some columns
nhl_df.rename(columns={'PS':'PtSh','PP':'PPG','PP.1':'PPA', 'S':'Shots'}, inplace=True)
nhl_df

Unnamed: 0_level_0,Year,Age,Pos,GP,G,A,PTS,PtSh,PPG,PPA,Shots,S%,TOI,ATOI
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Joe Thornton,2006,26.0,C,81,29,96,125,13.3,11,40,195,14.9,1728,21:20
Jaromír Jágr,2006,33.0,RW,82,54,69,123,15.5,24,28,368,14.7,1810,22:05
Alex Ovechkin,2006,20.0,LW,81,52,54,106,12.7,21,31,425,12.2,1751,21:37
Daniel Alfredsson*,2006,33.0,RW,77,43,60,103,12.3,16,32,249,17.3,1670,21:41
Dany Heatley,2006,25.0,LW,82,50,53,103,12.9,23,20,300,16.7,1734,21:09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Chris Wagner,2024,32.0,C,4,0,0,0,0.0,0,0,2,0.0,28,6:54
Samuel Walker,2024,24.0,C,4,0,0,0,-0.2,0,0,3,0.0,37,9:22
Colin White,2024,27.0,C,13,0,0,0,-0.3,0,0,10,0.0,131,10:02
Ryan Winterton,2024,20.0,C,3,0,0,0,-0.1,0,0,1,0.0,25,8:12


In [11]:
nhl_df

Unnamed: 0_level_0,Year,Age,Pos,GP,G,A,PTS,PtSh,PPG,PPA,Shots,S%,TOI,ATOI
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Joe Thornton,2006,26.0,C,81,29,96,125,13.3,11,40,195,14.9,1728,21:20
Jaromír Jágr,2006,33.0,RW,82,54,69,123,15.5,24,28,368,14.7,1810,22:05
Alex Ovechkin,2006,20.0,LW,81,52,54,106,12.7,21,31,425,12.2,1751,21:37
Daniel Alfredsson*,2006,33.0,RW,77,43,60,103,12.3,16,32,249,17.3,1670,21:41
Dany Heatley,2006,25.0,LW,82,50,53,103,12.9,23,20,300,16.7,1734,21:09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Chris Wagner,2024,32.0,C,4,0,0,0,0.0,0,0,2,0.0,28,6:54
Samuel Walker,2024,24.0,C,4,0,0,0,-0.2,0,0,3,0.0,37,9:22
Colin White,2024,27.0,C,13,0,0,0,-0.3,0,0,10,0.0,131,10:02
Ryan Winterton,2024,20.0,C,3,0,0,0,-0.1,0,0,1,0.0,25,8:12


In [12]:
nhl_df.isnull().sum()

Year       0
Age        2
Pos        0
GP         0
G          0
A          0
PTS        0
PtSh       0
PPG        0
PPA        0
Shots      0
S%       507
TOI        0
ATOI       0
dtype: int64

In [13]:
# setting a new dataframe that excludes all players that have played less than 30 games in any given season

new_nhl_df = nhl_df[nhl_df['GP'] >= 30]
new_nhl_df.sort_values(by='PTS', ascending=False)

Unnamed: 0_level_0,Year,Age,Pos,GP,G,A,PTS,PtSh,PPG,PPA,Shots,S%,TOI,ATOI
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Connor McDavid,2023,26.0,C,82,64,89,153,18.2,21,50,352,18.2,1836,22:23
Nikita Kucherov,2019,25.0,RW,82,41,87,128,14.6,15,33,246,16.7,1637,19:58
Leon Draisaitl,2023,27.0,C,80,52,76,128,14.2,32,30,247,21.1,1739,21:44
Joe Thornton,2006,26.0,C,81,29,96,125,13.3,11,40,195,14.9,1728,21:20
Connor McDavid,2022,25.0,C,80,44,79,123,14.0,10,34,314,14.0,1765,22:04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Raitis Ivanāns,2010,31.0,LW,61,0,0,0,-0.9,0,0,18,0.0,299,4:54
Cam Janssen,2010,25.0,RW,43,0,0,0,-0.5,0,0,11,0.0,203,4:43
John Scott,2013,30.0,D,34,0,0,0,0.0,0,0,15,0.0,185,5:27
Zenon Konopka,2013,32.0,C,37,0,0,0,-0.7,0,0,18,0.0,312,8:26


In [14]:
new_nhl_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11722 entries, Joe Thornton to Oskar Steen
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Year    11722 non-null  int64  
 1   Age     11722 non-null  float64
 2   Pos     11722 non-null  object 
 3   GP      11722 non-null  int64  
 4   G       11722 non-null  int64  
 5   A       11722 non-null  int64  
 6   PTS     11722 non-null  int64  
 7   PtSh    11722 non-null  float64
 8   PPG     11722 non-null  int64  
 9   PPA     11722 non-null  int64  
 10  Shots   11722 non-null  int64  
 11  S%      11722 non-null  float64
 12  TOI     11722 non-null  int64  
 13  ATOI    11722 non-null  object 
dtypes: float64(3), int64(9), object(2)
memory usage: 1.3+ MB


In [15]:
# This is how we can look at the mean of a particular player's stats
new_nhl_df.groupby('Player').mean(numeric_only=True).round(2).sort_values(by='PTS', ascending=False).head(30)

Unnamed: 0_level_0,Year,Age,GP,G,A,PTS,PtSh,PPG,PPA,Shots,S%,TOI
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Connor McDavid,2020.0,23.0,69.11,36.0,68.33,104.33,12.23,8.44,27.11,236.11,15.1,1501.44
Sidney Crosby,2015.17,27.17,67.94,31.89,52.83,84.72,10.16,9.33,20.72,217.5,14.68,1409.83
Nikita Kucherov,2018.8,24.8,70.3,31.4,51.7,83.1,9.95,8.7,21.9,215.3,14.38,1313.8
Artemi Panarin,2020.0,28.0,72.11,27.67,54.56,82.22,9.4,7.22,17.89,192.0,14.49,1418.33
Leon Draisaitl,2019.5,23.5,69.3,33.4,48.1,81.5,9.39,13.9,14.6,183.5,16.82,1419.9
Alex Ovechkin,2015.0,29.0,73.68,44.11,36.26,80.37,10.57,16.11,13.89,344.37,12.81,1530.42
Marc Savard,2008.0,30.0,72.2,20.0,58.4,78.4,7.8,8.6,26.8,186.4,10.74,1444.6
Mitch Marner,2020.5,22.5,70.38,23.88,54.38,78.25,8.21,5.25,18.62,184.0,12.99,1395.88
Nathan MacKinnon,2019.0,23.0,69.82,28.91,48.82,77.73,8.9,8.18,16.82,277.18,10.2,1413.27
Martin St. Louis*,2010.5,34.5,77.0,28.2,49.2,77.4,8.43,7.2,18.0,213.8,13.39,1669.8


In [16]:
# This is how we can look at totals or means across the years
new_nhl_df.groupby('Year').mean(numeric_only=True).round(2).sort_values(by='Year', ascending=False).head(30)

Unnamed: 0_level_0,Age,GP,G,A,PTS,PtSh,PPG,PPA,Shots,S%,TOI
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2024,27.79,50.33,9.12,15.25,24.37,2.71,1.93,3.66,88.45,9.29,850.64
2023,27.4,67.56,12.19,20.55,32.73,3.6,2.57,4.89,119.88,9.22,1131.57
2022,27.18,65.66,11.83,19.93,31.76,3.52,2.29,4.42,118.58,9.08,1104.37
2021,26.97,48.05,8.25,13.83,22.09,2.65,1.65,3.19,83.38,9.24,814.96
2020,26.83,59.09,10.26,17.05,27.32,3.18,2.08,3.99,106.58,8.9,997.02
2019,26.53,66.75,11.5,19.3,30.8,3.54,2.25,4.28,119.8,8.64,1120.4
2018,26.61,67.34,11.31,19.05,30.36,3.56,2.39,4.58,122.12,8.52,1127.73
2017,26.81,67.43,10.59,17.68,28.27,3.6,2.25,4.28,115.92,8.4,1131.1
2016,27.01,67.09,10.36,17.31,27.67,3.58,2.29,4.34,113.84,8.25,1126.66
2015,27.21,66.55,10.18,17.42,27.6,3.53,2.2,4.2,112.94,8.1,1118.28


In [17]:
# Here is our proper code to determine points per game
new_nhl_df.loc[:, 'PTS/g'] = (new_nhl_df['PTS'] / new_nhl_df['GP']).round(2)
new_nhl_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_nhl_df.loc[:, 'PTS/g'] = (new_nhl_df['PTS'] / new_nhl_df['GP']).round(2)


Unnamed: 0_level_0,Year,Age,Pos,GP,G,A,PTS,PtSh,PPG,PPA,Shots,S%,TOI,ATOI,PTS/g
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Joe Thornton,2006,26.0,C,81,29,96,125,13.3,11,40,195,14.9,1728,21:20,1.54
Jaromír Jágr,2006,33.0,RW,82,54,69,123,15.5,24,28,368,14.7,1810,22:05,1.50
Alex Ovechkin,2006,20.0,LW,81,52,54,106,12.7,21,31,425,12.2,1751,21:37,1.31
Daniel Alfredsson*,2006,33.0,RW,77,43,60,103,12.3,16,32,249,17.3,1670,21:41,1.34
Dany Heatley,2006,25.0,LW,82,50,53,103,12.9,23,20,300,16.7,1734,21:09,1.26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Mark Kastelic,2024,24.0,C,38,2,1,3,-0.2,0,0,33,6.1,274,7:13,0.08
Matt Martin,2024,34.0,LW,35,2,1,3,0.1,0,0,29,6.9,323,9:13,0.09
Ryan Reaves,2024,37.0,RW,30,2,1,3,-0.4,0,0,14,14.3,239,7:57,0.10
Benoit-Olivier Groulx,2024,23.0,C,32,0,2,2,-0.6,0,0,27,0.0,406,12:41,0.06


In [18]:
columns = list(new_nhl_df.columns)

# Remove the column to move from its current position
columns.remove('PTS/g')

# Insert the column at the new position
columns.insert(7, 'PTS/g')

# Reassign the column order by selecting columns in the new order
new_nhl_df = new_nhl_df[columns]

# Display the DataFrame after moving the column
print(new_nhl_df)

                       Year   Age Pos  GP   G   A  PTS  PTS/g  PtSh  PPG  PPA  \
Player                                                                          
Joe Thornton           2006  26.0   C  81  29  96  125   1.54  13.3   11   40   
Jaromír Jágr           2006  33.0  RW  82  54  69  123   1.50  15.5   24   28   
Alex Ovechkin          2006  20.0  LW  81  52  54  106   1.31  12.7   21   31   
Daniel Alfredsson*     2006  33.0  RW  77  43  60  103   1.34  12.3   16   32   
Dany Heatley           2006  25.0  LW  82  50  53  103   1.26  12.9   23   20   
...                     ...   ...  ..  ..  ..  ..  ...    ...   ...  ...  ...   
Mark Kastelic          2024  24.0   C  38   2   1    3   0.08  -0.2    0    0   
Matt Martin            2024  34.0  LW  35   2   1    3   0.09   0.1    0    0   
Ryan Reaves            2024  37.0  RW  30   2   1    3   0.10  -0.4    0    0   
Benoit-Olivier Groulx  2024  23.0   C  32   0   2    2   0.06  -0.6    0    0   
Oskar Steen            2024 

In [19]:
new_nhl_df.head(30)

Unnamed: 0_level_0,Year,Age,Pos,GP,G,A,PTS,PTS/g,PtSh,PPG,PPA,Shots,S%,TOI,ATOI
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Joe Thornton,2006,26.0,C,81,29,96,125,1.54,13.3,11,40,195,14.9,1728,21:20
Jaromír Jágr,2006,33.0,RW,82,54,69,123,1.5,15.5,24,28,368,14.7,1810,22:05
Alex Ovechkin,2006,20.0,LW,81,52,54,106,1.31,12.7,21,31,425,12.2,1751,21:37
Daniel Alfredsson*,2006,33.0,RW,77,43,60,103,1.34,12.3,16,32,249,17.3,1670,21:41
Dany Heatley,2006,25.0,LW,82,50,53,103,1.26,12.9,23,20,300,16.7,1734,21:09
Sidney Crosby,2006,18.0,C,81,39,63,102,1.26,10.5,16,31,278,14.0,1630,20:08
Eric Staal,2006,21.0,C,82,45,55,100,1.22,11.1,19,21,279,16.1,1611,19:39
Ilya Kovalchuk,2006,22.0,LW,78,52,46,98,1.26,11.2,27,29,323,16.1,1731,22:11
Marc Savard,2006,28.0,C,82,28,69,97,1.18,9.3,14,36,212,13.2,1682,20:30
Jonathan Cheechoo,2006,25.0,RW,82,56,37,93,1.13,12.7,24,17,317,17.7,1637,19:57


In [20]:
new_nhl_df.groupby('Player').sum(numeric_only=True).sort_values(by='PTS/g', ascending=False).head(30)

Unnamed: 0_level_0,Year,Age,GP,G,A,PTS,PTS/g,PtSh,PPG,PPA,Shots,S%,TOI
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Sidney Crosby,36273,489.0,1223,574,951,1525,22.72,182.8,168,373,3915,264.3,25377
Alex Ovechkin,38285,551.0,1400,838,689,1527,20.6,200.8,306,264,6543,243.4,29078
Evgeni Malkin,36279,513.0,1118,487,786,1273,20.02,149.0,176,310,3630,237.0,22029
Patrick Kane,32248,424.0,1180,451,786,1237,16.86,138.2,124,278,3911,186.2,23900
Anže Kopitar,36279,495.0,1348,410,777,1187,15.81,129.2,118,298,3193,232.7,28202
Steven Stamkos,30247,382.0,1043,529,557,1086,15.59,135.4,204,200,3186,249.5,20042
Joe Thornton,34238,578.0,1205,270,848,1118,14.97,116.8,93,328,2020,228.4,22434
Ryan Getzlaf,34238,476.0,1157,282,737,1019,14.84,106.7,86,255,2559,184.0,22552
Nicklas Bäckström,32248,440.0,1097,271,761,1032,14.77,105.3,86,335,2252,192.8,21310
Claude Giroux,32264,456.0,1153,347,707,1054,14.5,110.8,95,292,3002,188.5,22565


In [21]:
new_nhl_df.groupby('Player').mean(numeric_only=True).round(2).sort_values(by='PTS/g', ascending=False).head(30)

Unnamed: 0_level_0,Year,Age,GP,G,A,PTS,PTS/g,PtSh,PPG,PPA,Shots,S%,TOI
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Connor McDavid,2020.0,23.0,69.11,36.0,68.33,104.33,1.51,12.23,8.44,27.11,236.11,15.1,1501.44
Sidney Crosby,2015.17,27.17,67.94,31.89,52.83,84.72,1.26,10.16,9.33,20.72,217.5,14.68,1409.83
Nikita Kucherov,2018.8,24.8,70.3,31.4,51.7,83.1,1.18,9.95,8.7,21.9,215.3,14.38,1313.8
Auston Matthews,2020.5,22.5,67.12,43.88,33.5,77.38,1.16,10.38,11.38,10.25,268.25,16.6,1325.62
Artemi Panarin,2020.0,28.0,72.11,27.67,54.56,82.22,1.16,9.4,7.22,17.89,192.0,14.49,1418.33
Kirill Kaprizov,2022.5,24.5,63.5,35.0,39.25,74.25,1.16,8.93,12.25,14.25,221.0,15.88,1260.0
Leon Draisaitl,2019.5,23.5,69.3,33.4,48.1,81.5,1.14,9.39,13.9,14.6,183.5,16.82,1419.9
Nathan MacKinnon,2019.0,23.0,69.82,28.91,48.82,77.73,1.14,8.9,8.18,16.82,277.18,10.2,1413.27
Mitch Marner,2020.5,22.5,70.38,23.88,54.38,78.25,1.13,8.21,5.25,18.62,184.0,12.99,1395.88
Evgeni Malkin,2015.5,28.5,62.11,27.06,43.67,70.72,1.11,8.28,9.78,17.22,201.67,13.17,1223.83


In [22]:
new_nhl_df.groupby('Player').sum(numeric_only=True).sort_values(by='PTS', ascending=False).head(30)

Unnamed: 0_level_0,Year,Age,GP,G,A,PTS,PTS/g,PtSh,PPG,PPA,Shots,S%,TOI
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Alex Ovechkin,38285,551.0,1400,838,689,1527,20.6,200.8,306,264,6543,243.4,29078
Sidney Crosby,36273,489.0,1223,574,951,1525,22.72,182.8,168,373,3915,264.3,25377
Evgeni Malkin,36279,513.0,1118,487,786,1273,20.02,149.0,176,310,3630,237.0,22029
Patrick Kane,32248,424.0,1180,451,786,1237,16.86,138.2,124,278,3911,186.2,23900
Anže Kopitar,36279,495.0,1348,410,777,1187,15.81,129.2,118,298,3193,232.7,28202
Joe Thornton,34238,578.0,1205,270,848,1118,14.97,116.8,93,328,2020,228.4,22434
Steven Stamkos,30247,382.0,1043,529,557,1086,15.59,135.4,204,200,3186,249.5,20042
Claude Giroux,32264,456.0,1153,347,707,1054,14.5,110.8,95,292,3002,188.5,22565
Joe Pavelski,36279,549.0,1309,469,581,1050,14.26,128.0,174,203,3656,235.7,24335
Eric Staal,34239,494.0,1284,444,588,1032,13.53,112.4,131,179,3877,194.2,24748
