In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as mno

In [3]:
data=pd.read_csv("/content/imputed_data.csv")

Adding the 5 modalities as columns based on their dependencies

In [11]:
def calculate_sleep_score(data):
    """
    Calculate the sleep score based on the provided features.

    Args:
    - data: DataFrame containing the features associated with sleep quality.

    Returns:
    - sleep_score: Series containing the calculated sleep score for each row.
    """
    # Define weights for each feature based on empirical evidence from research studies
    weights = {
        'Hours.in.Bed': 0.20,                   # Adjusted based on studies indicating the importance of adequate sleep duration
        'Hours.of.Sleep': 0.15,                 # Adjusted based on research suggesting the relationship between sleep duration and quality
        'Sleep.Need': 0.10,                     # Adjusted based on evidence linking sleep need to overall sleep quality and performance
        'Sleep.Efficiency....': 0.15,               # Adjusted based on findings indicating the importance of sleep efficiency in sleep quality
        'Sleep.Disturbances': 0.10,             # Adjusted based on studies showing the impact of disturbances on sleep architecture
        'Latency..min.': 0.05,                  # Adjusted based on research suggesting the importance of sleep onset latency
        'Total.Cycle.Sleep.Time..hours.': 0.10, # Adjusted based on evidence linking sleep cycles to overall sleep quality
        'Restorative.Sleep..hours.': 0.10,      # Adjusted based on studies indicating the importance of deep and restorative sleep stages
        'Sleep.Consistency': 0.05,              # Adjusted based on evidence supporting the role of sleep consistency in sleep quality
        'Respiratory.Rate': 0.05               # Adjusted based on research suggesting the impact of respiratory rate on sleep quality
    }

    # Calculate weighted sum for each row
    weighted_sum = sum(data[feature] * weight for feature, weight in weights.items())

    # Normalize the weighted sum to a scale of 0 to 100 (optional)
    normalized_sum = weighted_sum / len(weights)

    return normalized_sum


def calculate_training_intensity(data):
    """
    Calculate the training intensity score based on the provided features.

    Args:
    - data: DataFrame containing the features associated with training load.

    Returns:
    - training_intensity: Series containing the calculated training intensity score for each row.
    """
    # Define weights for each feature based on empirical evidence from research studies
    weights = {
        'Training.load.score': 0.25,   # Adjusted based on studies indicating the importance of training load in performance monitoring
        'Cardio.load': 0.20,            # Adjusted based on evidence supporting the role of cardiovascular training in overall fitness
        'Duration': 0.15,               # Adjusted based on research suggesting the importance of training duration in adaptation
        'HR.min..bpm.': 0.15,           # Adjusted based on studies showing the relationship between heart rate and training intensity
        'HR.avg..bpm.': 0.15,           # Adjusted based on evidence supporting the role of average heart rate in training assessment
        'HR.max..bpm.': 0.10,           # Adjusted based on research suggesting the significance of maximum heart rate in effort assessment
        'RT.Volume.Load': 0.10          # Adjusted based on studies indicating the importance of resistance training volume in adaptation
    }

    # Calculate weighted sum for each row
    weighted_sum = sum(data[feature] * weight for feature, weight in weights.items())

    # Normalize the weighted sum to a scale of 0 to 100 (optional)
    normalized_sum = weighted_sum / len(weights)

    return normalized_sum


def calculate_cardiac_rhythm(data):
    """
    Calculate the cardiac rhythm score based on the provided features.

    Args:
    - data: DataFrame containing the features associated with cardiac rhythm.

    Returns:
    - cardiac_rhythm_score: Series containing the calculated cardiac rhythm score for each row.
    """
    # Define weights for each feature based on empirical evidence from research studies
    weights = {
        'RHR': 0.40,            # Adjusted based on studies indicating the significance of resting heart rate in cardiovascular health
        'HR.min..bpm.': 0.20,  # Adjusted based on evidence supporting the role of minimum heart rate in recovery assessment
        'HR.avg..bpm.': 0.20,  # Adjusted based on research suggesting the significance of average heart rate in overall fitness
        'HR.max..bpm.': 0.20   # Adjusted based on studies showing the relationship between maximum heart rate and cardiovascular capacity
    }

    # Calculate weighted sum for each row
    weighted_sum = sum(data[feature] * weight for feature, weight in weights.items())

    # Normalize the weighted sum to a scale of 0 to 100 (optional)
    normalized_sum = weighted_sum / len(weights)

    return normalized_sum

def calculate_jump(data):
    """
    Calculate the jump score based on the provided features.

    Args:
    - data: DataFrame containing the features 'Jump.Height', 'Peak.Power', 'Peak.Power.BM', and 'RSI'.

    Returns:
    - jump_score: Series containing the calculated jump score for each row.
    """
    # Define adjusted weights for each feature based on empirical evidence
    weights = {
        'Jump.Height': 0.45,   # Adjusted weight based on the significance of jump height in jump performance
        'Peak.Power': 0.30,    # Adjusted weight based on the importance of power output in jump performance
        'Peak.Power.BM': 0.15, # Adjusted weight based on the relationship between power and body mass
        'Body.Weight' : 0.10,
        'RSI': 0.10            # Adjusted weight based on the influence of strength on jump performance
    }

    # Calculate weighted sum for each row
    weighted_sum = sum(data[feature] * weight for feature, weight in weights.items())

    # Normalize the weighted sum to a scale of 0 to 100 (optional)
    normalized_sum = weighted_sum / len(weights)

    return normalized_sum



def calculate_cognitive_performance(data):
    """
    Calculate the cognitive performance score based on the provided features.

    Args:
    - data: DataFrame containing the features associated with cognitive performance.

    Returns:
    - cognitive_performance_score: Series containing the calculated cognitive performance score for each row.
    """
    # Define weights for each feature based on empirical evidence from research studies
    weights = {
        'Recovery': 0.20,                    # Adjusted based on studies indicating the importance of recovery in cognitive function
        'Sleep.Score': 0.20,                 # Adjusted based on evidence supporting the relationship between sleep quality and cognitive performance
        'Total.distance..m.': 0.15,          # Adjusted based on research suggesting the impact of physical activity on cognitive function
        'Distance...min..m.min.': 0.15,      # Adjusted based on studies showing the relationship between exercise duration and cognitive performance
        'Maximum.speed..km.h.': 0.10,        # Adjusted based on evidence supporting the role of exercise intensity in cognitive function
        'Average.speed..km.h.': 0.10,        # Adjusted based on research suggesting the significance of exercise intensity in cognitive function
        'Game.Score': 0.10                  # Adjusted based on studies indicating the impact of game performance on cognitive function
    }

    # Calculate weighted sum for each row
    weighted_sum = sum(data[feature] * weight for feature, weight in weights.items())

    # Normalize the weighted sum to a scale of 0 to 100 (optional)
    normalized_sum = weighted_sum / len(weights)

    return normalized_sum


In [12]:
# Apply the function to calculate the 'Sleep' column
data['Sleep'] = calculate_sleep_score(data)

data['Training'] = calculate_training_intensity(data)

data['Cardiac Rhythm'] = calculate_cardiac_rhythm(data)

data['Jump'] = calculate_jump(data)

data['Cognitive'] = calculate_cognitive_performance(data)

# Print the DataFrame to verify the new column
print(data)

      Athlete   RHR    HRV  Recovery  Sleep.Score  Hours.in.Bed  \
0           1  51.0  129.0      73.0        100.0          9.81   
1          10  57.0   76.0      53.0         50.0          4.35   
2          11  65.0  120.0      71.0         94.0          8.62   
3          12  52.0  107.0      83.0         85.0          8.14   
4          13  53.0   51.0      76.0         76.0          8.86   
...       ...   ...    ...       ...          ...           ...   
4938       21  51.0   90.0      71.0        100.0         10.37   
4939        5  64.0   82.0      26.0         45.0          4.59   
4940       15  64.0   53.0      14.0         80.0          6.69   
4941       21  52.0   88.0      66.0         89.0          8.10   
4942        5  62.0  113.0      72.0         95.0         10.72   

      Hours.of.Sleep  Sleep.Need  Sleep.Efficiency....  Wake.Periods  ...  \
0               9.05        9.06                  92.0          12.0  ...   
1               4.02        8.54         

In [13]:
#  save it as 'dataset.csv'
data.to_csv('dataset(modalitiesIncluded).csv', index=False)

In [14]:
from google.colab import files
data.to_csv('dataset.csv', index=False)
files.download('dataset.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>