# ESTIMATING PHYSICAL FATIGUE INDEX OF PLAYERS USING ICE HOCKEY METRICS

## Objective: To develop an application that enables coaches to input match data and obtain instant feedback on potential fatigue levels,based on the dataset's metrics.


In [60]:
# Import required Libraries
import pandas as pd

In [61]:
# # We change the delimiter to ; and save the file for ease of use
# df_lugano.to_csv("lugano.csv", sep=",", index=False)
# df_lugano.to_csv("zsc.csv", sep=",", index=False)

In [62]:
# Load the datasets into Pandas DataFrame
df_lugano = pd.read_csv("lugano.csv", encoding="ISO-8859-1")
# df_zsc = pd.read_csv("zsc.csv", encoding="ISO-8859-1")

In [63]:
# Print the first 5 rows of the datasets
df_lugano.head()

Unnamed: 0,Group name,League ID,Description,Session ID,Types,Session end (Local timezone),Session end date (Local timezone),Session end date (UTC),Session end time (Local timezone),Session end time (UTC),...,Sprints / min (medium),Sprints / min (high),Sprints / min (very high),Sprints / min (near max),Turns,Sharp Turns,Wide Turns,Turns / min,Sharp Turns / min,Wide Turns / min
0,Defense,,vs Lugano,194,Match,16.11.2022 22:09:47,16.11.2022,16.11.2022,22:09:47,21:09:47,...,35,6,0,0,33,10,23,191,58,133
1,Offense,,vs Lugano,194,Match,16.11.2022 22:09:47,16.11.2022,16.11.2022,22:09:47,21:09:47,...,104,7,17,0,56,9,47,488,78,409
2,Offense,,vs Lugano,194,Match,16.11.2022 22:09:47,16.11.2022,16.11.2022,22:09:47,21:09:47,...,58,32,26,0,48,6,42,309,39,27
3,Offense,,vs Lugano,194,Match,16.11.2022 22:09:47,16.11.2022,16.11.2022,22:09:47,21:09:47,...,86,4,12,0,58,12,46,334,69,265
4,Defense,,vs Lugano,194,Match,16.11.2022 22:09:47,16.11.2022,16.11.2022,22:09:47,21:09:47,...,196,0,0,0,2,0,2,131,0,131


In [64]:
# List of columns to keep in the dataset
columns_to_keep = [
    "Duration (s)",
    "Distance (m)",
    "Position",
    "Speed (max.) (km/h)",
    "Speed_Avg",
    "Time on Ice (s)",
    "Acceleration_max",
    "High Metabolic Power Distance (m)",
    "High Metabolic Power Distance / min (m)",
    "Mechanical Intensity",
    "Mechanical Load",
]

In [65]:
# Create a new DataFrame with only the recommended columns to keep
df_imp_lugano = df_lugano[columns_to_keep]
# print(df_imp_lugano)

In [66]:
# add a new column called player id to the dataframe and fill it with the player id starting from 1 using .loc as the first column
df_imp_lugano.insert(loc=0, column="Player ID", value=range(1, 1 + len(df_imp_lugano)))

In [67]:
df_imp_lugano.head()

Unnamed: 0,Player ID,Duration (s),Distance (m),Position,Speed (max.) (km/h),Speed_Avg,Time on Ice (s),Acceleration_max,High Metabolic Power Distance (m),High Metabolic Power Distance / min (m),Mechanical Intensity,Mechanical Load
0,1,8836,3163,D,2883,1097,1038,443,1287,74.0,4594,79465
1,2,8836,2881,F,3202,1505,689,364,1724,150.0,4285,49222
2,3,8836,3586,F,3377,1385,932,435,2090,134.0,4612,71661
3,4,8836,3881,F,314,1339,1043,414,1956,113.0,4438,77162
4,5,8836,354,D,2738,1385,92,349,179,117.0,4495,6885


In [68]:
# Check the data types of the columns
df_imp_lugano.dtypes

Player ID                                    int64
Duration (s)                                 int64
Distance (m)                                 int64
Position                                    object
Speed (max.) (km/h)                         object
Speed_Avg                                   object
Time on Ice (s)                              int64
Acceleration_max                            object
High Metabolic Power Distance (m)            int64
High Metabolic Power Distance / min (m)    float64
Mechanical Intensity                        object
Mechanical Load                             object
dtype: object

In [69]:
# Convert the data type of the column mechanical intensity from string to float

df_imp_lugano["Mechanical Intensity"] = (
    df_imp_lugano["Mechanical Intensity"].str.replace(",", ".").astype(float)
)

# Check the data types of the columns
df_imp_lugano.dtypes

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_imp_lugano["Mechanical Intensity"] = (


Player ID                                    int64
Duration (s)                                 int64
Distance (m)                                 int64
Position                                    object
Speed (max.) (km/h)                         object
Speed_Avg                                   object
Time on Ice (s)                              int64
Acceleration_max                            object
High Metabolic Power Distance (m)            int64
High Metabolic Power Distance / min (m)    float64
Mechanical Intensity                       float64
Mechanical Load                             object
dtype: object

In [70]:
# As we want only unique player data, we select only the first 21 rows of the dataframe
# We infer that because an ice hockey team typically consists of 20 players, including 2 goaltenders and 18 skaters.
# and the the proportion of forwards to defensemen
df_imp_lugano_21 = df_imp_lugano.iloc[0:21, :]
df_imp_lugano_21

Unnamed: 0,Player ID,Duration (s),Distance (m),Position,Speed (max.) (km/h),Speed_Avg,Time on Ice (s),Acceleration_max,High Metabolic Power Distance (m),High Metabolic Power Distance / min (m),Mechanical Intensity,Mechanical Load
0,1,8836,3163,D,2883,1097,1038,443,1287,74.0,45.94,79465
1,2,8836,2881,F,3202,1505,689,364,1724,150.0,42.85,49222
2,3,8836,3586,F,3377,1385,932,435,2090,134.0,46.12,71661
3,4,8836,3881,F,314,1339,1043,414,1956,113.0,44.38,77162
4,5,8836,354,D,2738,1385,92,349,179,117.0,44.95,6885
5,6,8836,5087,D,3083,1347,1360,398,2501,110.0,41.27,93539
6,7,8836,1066,Goalkeeper,1299,96,4007,311,7,0.0,19.29,128788
7,8,8836,4683,F,3246,1451,1162,391,2758,142.0,44.56,86295
8,9,8836,4009,F,3351,1165,1239,416,2066,100.0,38.62,79727
9,10,8836,4383,D,3424,1208,1307,361,1913,88.0,45.25,98554


In [71]:
# Check if any NA in Each Row for the first 21 rows
df_imp_lugano_21.isnull().sum(axis=1)
# df_imp_lugano.isna().any(axis=1)

# We see there are no NA values in the first 21 rows of the dataframe

0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
11    0
12    0
13    0
14    0
15    0
16    0
17    0
18    0
19    0
20    0
dtype: int64

In [72]:
# Calculate the ratio of forwards to defensemen using position column

df_imp_lugano_21["Position"].value_counts()

Position
F             14
D              6
Goalkeeper     1
Name: count, dtype: int64

In [73]:
# Calculate ratio of F to D
F = df_imp_lugano_21["Position"].value_counts()[0]
D = df_imp_lugano_21["Position"].value_counts()[1]
ratio = F / D

# print ratio as a fraction

print(ratio)

2.3333333333333335


  F = df_imp_lugano_21["Position"].value_counts()[0]
  D = df_imp_lugano_21["Position"].value_counts()[1]


## CREATING A FATIGUE LEVEL FORMULA

- It is a complex task that typically involves empirical research, biomechanics, and physiological considerations.
- However, we propose a simple, hypothetical formula to estimate fatigue.
- DISCLAIMER : This is a rough estimation and not based on scientific research.
- Using the important metrics as recommended by Professor Martin Rumo, we attempt to create a formula that can be used to estimate fatigue levels.


### Fatigue Index = [(Time on ice/ Match Duration) + (High Metabolic Power Distance / Total Distance) + (Mechanical Intensity)

- TIME FACTOR : Here, the idea is simple: the more time a player spends on the ice, the more fatigued they're likely to be.
- DISTANCE OR INTENSITY FACTOR : Percentage of the distance covered at high intensities. It's a straightforward indicator of how hard a player pushed themselves relative to their overall activity. It is the distance covered at high metabolic power divided by the total distance covered.
- MECHANICAL OR LOAD INTENSITY : How intense a player's session was concerning accelerations and decelerations.It models the intensity of a session or phase and provides important information about its impact on the lower limbs. It is the Mechanical Load divided by the total time in minutes.


In [74]:
# First we need to normalize the Mechanical Intensity column, so that the values are between 0 and 1
# This is to make sure that it can be added to TIME FACTOR, and DISTANCE FACTOR which are ratios in the next step
# With historical data, from the clubs, min and max intensities can be adjusted further in the future to normalize the intensity

min_val = df_imp_lugano_21["Mechanical Intensity"].min()
max_val = df_imp_lugano_21["Mechanical Intensity"].max()

df_imp_lugano_21["Normalized Mechanical Intensity"] = (
    df_imp_lugano_21["Mechanical Intensity"] - min_val
) / (max_val - min_val)

df_imp_lugano_21

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_imp_lugano_21["Normalized Mechanical Intensity"] = (


Unnamed: 0,Player ID,Duration (s),Distance (m),Position,Speed (max.) (km/h),Speed_Avg,Time on Ice (s),Acceleration_max,High Metabolic Power Distance (m),High Metabolic Power Distance / min (m),Mechanical Intensity,Mechanical Load,Normalized Mechanical Intensity
0,1,8836,3163,D,2883,1097,1038,443,1287,74.0,45.94,79465,0.961371
1,2,8836,2881,F,3202,1505,689,364,1724,150.0,42.85,49222,0.871624
2,3,8836,3586,F,3377,1385,932,435,2090,134.0,46.12,71661,0.966599
3,4,8836,3881,F,314,1339,1043,414,1956,113.0,44.38,77162,0.916062
4,5,8836,354,D,2738,1385,92,349,179,117.0,44.95,6885,0.932617
5,6,8836,5087,D,3083,1347,1360,398,2501,110.0,41.27,93539,0.825733
6,7,8836,1066,Goalkeeper,1299,96,4007,311,7,0.0,19.29,128788,0.187337
7,8,8836,4683,F,3246,1451,1162,391,2758,142.0,44.56,86295,0.92129
8,9,8836,4009,F,3351,1165,1239,416,2066,100.0,38.62,79727,0.748766
9,10,8836,4383,D,3424,1208,1307,361,1913,88.0,45.25,98554,0.94133


In [77]:
# Create a python function in pandas to calculate fatigue index


def fatigue_index(df):
    """This function calculates the fatigue index for each player"""
    return (
        (df["Time on Ice (s)"] / df["Duration (s)"])
        + df["High Metabolic Power Distance (m)"] / df["Distance (m)"]
    ) + df["Normalized Mechanical Intensity"]

In [79]:
fatigue_index(df_imp_lugano_21)

0     1.485737
1     1.548003
2     1.654899
3     1.538095
4     1.448679
5     1.471295
6     0.647389
7     1.641736
8     1.404328
9     1.525707
10    1.669477
11    0.951123
12    1.532141
13    0.001019
14    1.403397
15    1.548994
16    1.517963
17    1.519416
18    1.453339
19    1.443519
20    1.616113
dtype: float64

In [80]:
# 1. Compute the intermediate fatigue index for each row
df_imp_lugano_21["Intermediate Fatigue Index"] = df_imp_lugano_21.apply(
    fatigue_index, axis=1
)

# 2. Normalize this intermediate fatigue index
min_fatigue = df_imp_lugano_21["Intermediate Fatigue Index"].min()
max_fatigue = df_imp_lugano_21["Intermediate Fatigue Index"].max()

df_imp_lugano_21["Final Fatigue Index"] = (
    df_imp_lugano_21["Intermediate Fatigue Index"] - min_fatigue
) / (max_fatigue - min_fatigue)

df_imp_lugano_21

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_imp_lugano_21["Intermediate Fatigue Index"] = df_imp_lugano_21.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_imp_lugano_21["Final Fatigue Index"] = (


Unnamed: 0,Player ID,Duration (s),Distance (m),Position,Speed (max.) (km/h),Speed_Avg,Time on Ice (s),Acceleration_max,High Metabolic Power Distance (m),High Metabolic Power Distance / min (m),Mechanical Intensity,Mechanical Load,Normalized Mechanical Intensity,Intermediate Fatigue Index,Final Fatigue Index
0,1,8836,3163,D,2883,1097,1038,443,1287,74.0,45.94,79465,0.961371,1.485737,0.889874
1,2,8836,2881,F,3202,1505,689,364,1724,150.0,42.85,49222,0.871624,1.548003,0.927194
2,3,8836,3586,F,3377,1385,932,435,2090,134.0,46.12,71661,0.966599,1.654899,0.991262
3,4,8836,3881,F,314,1339,1043,414,1956,113.0,44.38,77162,0.916062,1.538095,0.921256
4,5,8836,354,D,2738,1385,92,349,179,117.0,44.95,6885,0.932617,1.448679,0.867663
5,6,8836,5087,D,3083,1347,1360,398,2501,110.0,41.27,93539,0.825733,1.471295,0.881218
6,7,8836,1066,Goalkeeper,1299,96,4007,311,7,0.0,19.29,128788,0.187337,0.647389,0.387406
7,8,8836,4683,F,3246,1451,1162,391,2758,142.0,44.56,86295,0.92129,1.641736,0.983373
8,9,8836,4009,F,3351,1165,1239,416,2066,100.0,38.62,79727,0.748766,1.404328,0.841081
9,10,8836,4383,D,3424,1208,1307,361,1913,88.0,45.25,98554,0.94133,1.525707,0.913831


In [82]:
df_imp_lugano_21_sorted = df_imp_lugano_21.sort_values(
    by="Final Fatigue Index", ascending=False
)

df_imp_lugano_21_sorted

Unnamed: 0,Player ID,Duration (s),Distance (m),Position,Speed (max.) (km/h),Speed_Avg,Time on Ice (s),Acceleration_max,High Metabolic Power Distance (m),High Metabolic Power Distance / min (m),Mechanical Intensity,Mechanical Load,Normalized Mechanical Intensity,Intermediate Fatigue Index,Final Fatigue Index
10,11,8836,4935,F,3287,166,1070,416,3228,181.0,43.63,77838,0.894278,1.669477,1.0
2,3,8836,3586,F,3377,1385,932,435,2090,134.0,46.12,71661,0.966599,1.654899,0.991262
7,8,8836,4683,F,3246,1451,1162,391,2758,142.0,44.56,86295,0.92129,1.641736,0.983373
20,21,8836,2278,F,3279,1504,545,454,1263,139.0,47.27,42953,1.0,1.616113,0.968016
15,16,8836,3174,F,3103,1389,823,39,1764,129.0,43.83,6011,0.900087,1.548994,0.927788
1,2,8836,2881,F,3202,1505,689,364,1724,150.0,42.85,49222,0.871624,1.548003,0.927194
3,4,8836,3881,F,314,1339,1043,414,1956,113.0,44.38,77162,0.916062,1.538095,0.921256
12,13,8836,3498,F,3251,1346,936,394,1689,108.0,45.32,70675,0.943363,1.532141,0.917687
9,10,8836,4383,D,3424,1208,1307,361,1913,88.0,45.25,98554,0.94133,1.525707,0.913831
17,18,8836,2621,F,341,1289,732,377,1279,105.0,45.5,55503,0.948591,1.519416,0.91006
