In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

A. Prediction For "Jump"

In [None]:
# Load dataset
data = pd.read_csv('/content/modified_dataset.csv')

# Convert dates column to DateTime with correct format
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y')

# Define the athlete ID
athlete_id = 21  # Replace 123 with the specific athlete ID you want to analyze




'''
# Trials 01

# Filter dataset for the specific athlete ID
athlete_data = data[data['Athlete'] == athlete_id]

# Split athlete's data into training and testing based on year
train_data = athlete_data[athlete_data['Date'].dt.year == 2021]
test_data = athlete_data[athlete_data['Date'].dt.year == 2022]
'''



# Filter dataset for the specific athlete ID for testing
test_data = data[(data['Athlete'] == athlete_id) & (data['Date'].dt.year == 2023)]

# Filter dataset for all athletes for training for the year 2021 [edited: and year 2022]
#train_data = data[data['Date'].dt.year == 2021]
train_data = data[(data['Date'].dt.year == 2021) | (data['Date'].dt.year == 2022)]



# Check if there are samples available for training
if not train_data.empty:
    # Prepare features and labels for training set
    X_train = train_data.drop(['Jump', 'Date', 'Athlete'], axis=1)
    y_train = train_data['Jump']

    # Prepare features and labels for testing set
    X_test = test_data.drop(['Jump', 'Date', 'Athlete'], axis=1)
    y_test = test_data['Jump']

    # Initialize Random Forest regressor
    regressor = RandomForestRegressor()

    # Train the regressor
    regressor.fit(X_train, y_train)

    # Predict on test set
    y_pred = regressor.predict(X_test)

    # Evaluate mean squared error
    mse = mean_squared_error(y_test, y_pred)
    print("Mean Squared Error:", mse)

    # Evaluate mean absolute error
    mae = mean_absolute_error(y_test, y_pred)
    print("Mean Absolute Error:", mae)

    # Evaluate R-squared Score
    r2 = r2_score(y_test, y_pred)
    print("R-Squared Score:", r2)

else:
    print("No data available for training for athlete", athlete_id)


Mean Squared Error: 0.028725732204984478
Mean Absolute Error: 0.1080635581395507
R-Squared Score: 0.9999605012915698


In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.figure(figsize=(10, 6))  # Increase figure size
plt.plot(y_test.reset_index(drop=True), color='blue', label='Actual', marker='o', markersize=8)  # Increase marker size
plt.plot(y_pred, color='red', label='Predicted', linestyle='--', linewidth=2)  # Increase line width
plt.xlabel('Sample', fontsize=14)  # Increase font size
plt.ylabel('Jump', fontsize=14)  # Increase font size
plt.title('Actual vs Predicted Values', fontsize=16)  # Increase font size
plt.legend()
plt.grid(True)  # Add gridlines
plt.show()

In [None]:
bar_width = 0.35
index = np.arange(len(y_test))
plt.figure(figsize=(10, 6))
plt.bar(index, y_test, bar_width, color='blue', label='Actual')
plt.bar(index + bar_width, y_pred, bar_width, color='red', label='Predicted')
plt.xlabel('Sample', fontsize=14)
plt.ylabel('Jump', fontsize=14)
plt.title('Actual vs Predicted Values (Bar Plot)', fontsize=16)
plt.xticks(index + bar_width / 2, index)
plt.legend()
plt.show()

B. Prediction For Sleep

In [None]:
# Load dataset
data = pd.read_csv('/content/modified_dataset.csv')

# Convert dates column to DateTime with correct format
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y')

# Define the athlete ID
athlete_id = 21  # Replace 123 with the specific athlete ID you want to analyze




'''
# Trials 01

# Filter dataset for the specific athlete ID
athlete_data = data[data['Athlete'] == athlete_id]

# Split athlete's data into training and testing based on year
train_data = athlete_data[athlete_data['Date'].dt.year == 2021]
test_data = athlete_data[athlete_data['Date'].dt.year == 2022]
'''


# Trials 02
# Filter dataset for the specific athlete ID for testing
test_data = data[(data['Athlete'] == athlete_id) & (data['Date'].dt.year == 2023)]

# Filter dataset for all athletes for training for the year 2021 [edited: and year 2022]
#train_data = data[data['Date'].dt.year == 2021]
train_data = data[(data['Date'].dt.year == 2021) | (data['Date'].dt.year == 2022)]



# Check if there are samples available for training
if not train_data.empty:
    # Prepare features and labels for training set
    X_train = train_data.drop(['Sleep', 'Date', 'Athlete'], axis=1)
    y_train = train_data['Sleep']

    # Prepare features and labels for testing set
    X_test = test_data.drop(['Sleep', 'Date', 'Athlete'], axis=1)
    y_test = test_data['Sleep']

    # Initialize Random Forest regressor
    regressor = RandomForestRegressor()

    # Train the regressor
    regressor.fit(X_train, y_train)

    # Predict on test set
    y_pred = regressor.predict(X_test)

    # Evaluate mean squared error
    mse = mean_squared_error(y_test, y_pred)
    print("Mean Squared Error:", mse)

    # Evaluate mean absolute error
    mae = mean_absolute_error(y_test, y_pred)
    print("Mean Absolute Error:", mae)

    # Evaluate R-squared Score
    r2 = r2_score(y_test, y_pred)
    print("R-Squared Score:", r2)

else:
    print("No data available for training for athlete", athlete_id)


Mean Squared Error: 0.0013844637124469437
Mean Absolute Error: 0.02954517087209305
R-Squared Score: 0.9125737152346292


In [None]:
# Load dataset
data = pd.read_csv('/content/modified_dataset.csv')

# Convert dates column to DateTime with correct format
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y')

# Define the athlete ID
athlete_id = 21  # Replace 123 with the specific athlete ID you want to analyze




'''
# Trials 01

# Filter dataset for the specific athlete ID
athlete_data = data[data['Athlete'] == athlete_id]

# Split athlete's data into training and testing based on year
train_data = athlete_data[athlete_data['Date'].dt.year == 2021]
test_data = athlete_data[athlete_data['Date'].dt.year == 2022]
'''


# Trials 02
# Filter dataset for the specific athlete ID for testing
test_data = data[(data['Athlete'] == athlete_id) & (data['Date'].dt.year == 2023)]

# Filter dataset for all athletes for training for the year 2021 [edited: and year 2022]
#train_data = data[data['Date'].dt.year == 2021]
train_data = data[(data['Date'].dt.year == 2021) | (data['Date'].dt.year == 2022)]



# Check if there are samples available for training
if not train_data.empty:
    # Prepare features and labels for training set
    X_train = train_data.drop(['Training', 'Date', 'Athlete'], axis=1)
    y_train = train_data['Training']

    # Prepare features and labels for testing set
    X_test = test_data.drop(['Training', 'Date', 'Athlete'], axis=1)
    y_test = test_data['Training']

    # Initialize Random Forest regressor
    regressor = RandomForestRegressor()

    # Train the regressor
    regressor.fit(X_train, y_train)

    # Predict on test set
    y_pred = regressor.predict(X_test)

    # Evaluate mean squared error
    mse = mean_squared_error(y_test, y_pred)
    print("Mean Squared Error:", mse)

    # Evaluate mean absolute error
    mae = mean_absolute_error(y_test, y_pred)
    print("Mean Absolute Error:", mae)

    # Evaluate R-squared Score
    r2 = r2_score(y_test, y_pred)
    print("R-Squared Score:", r2)

else:
    print("No data available for training for athlete", athlete_id)


Mean Squared Error: 2.005810658099113
Mean Absolute Error: 1.1652828903654378
R-Squared Score: 0.9980796675881181


In [None]:
# Load dataset
data = pd.read_csv('/content/modified_dataset.csv')

# Convert dates column to DateTime with correct format
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y')

# Define the athlete ID
athlete_id = 21  # Replace 123 with the specific athlete ID you want to analyze




'''
# Trials 01

# Filter dataset for the specific athlete ID
athlete_data = data[data['Athlete'] == athlete_id]

# Split athlete's data into training and testing based on year
train_data = athlete_data[athlete_data['Date'].dt.year == 2021]
test_data = athlete_data[athlete_data['Date'].dt.year == 2022]
'''


# Trials 02
# Filter dataset for the specific athlete ID for testing
test_data = data[(data['Athlete'] == athlete_id) & (data['Date'].dt.year == 2023)]

# Filter dataset for all athletes for training for the year 2021 [edited: and year 2022]
#train_data = data[data['Date'].dt.year == 2021]
train_data = data[(data['Date'].dt.year == 2021) | (data['Date'].dt.year == 2022)]



# Check if there are samples available for training
if not train_data.empty:
    # Prepare features and labels for training set
    X_train = train_data.drop(['Cardiac Rhythm', 'Date', 'Athlete'], axis=1)
    y_train = train_data['Cardiac Rhythm']

    # Prepare features and labels for testing set
    X_test = test_data.drop(['Cardiac Rhythm', 'Date', 'Athlete'], axis=1)
    y_test = test_data['Cardiac Rhythm']

    # Initialize Random Forest regressor
    regressor = RandomForestRegressor()

    # Train the regressor
    regressor.fit(X_train, y_train)

    # Predict on test set
    y_pred = regressor.predict(X_test)

    # Evaluate mean squared error
    mse = mean_squared_error(y_test, y_pred)
    print("Mean Squared Error:", mse)

    # Evaluate mean absolute error
    mae = mean_absolute_error(y_test, y_pred)
    print("Mean Absolute Error:", mae)

    # Evaluate R-squared Score
    r2 = r2_score(y_test, y_pred)
    print("R-Squared Score:", r2)

else:
    print("No data available for training for athlete", athlete_id)


Mean Squared Error: 0.07271151744186104
Mean Absolute Error: 0.1936860465116289
R-Squared Score: 0.9607627490294188


In [None]:
# Load dataset
data = pd.read_csv('/content/modified_dataset.csv')

# Convert dates column to DateTime with correct format
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y')

# Define the athlete ID
athlete_id = 21  # Replace 123 with the specific athlete ID you want to analyze




'''
# Trials 01

# Filter dataset for the specific athlete ID
athlete_data = data[data['Athlete'] == athlete_id]

# Split athlete's data into training and testing based on year
train_data = athlete_data[athlete_data['Date'].dt.year == 2021]
test_data = athlete_data[athlete_data['Date'].dt.year == 2022]
'''


# Trials 02
# Filter dataset for the specific athlete ID for testing
test_data = data[(data['Athlete'] == athlete_id) & (data['Date'].dt.year == 2023)]

# Filter dataset for all athletes for training for the year 2021 [edited: and year 2022]
#train_data = data[data['Date'].dt.year == 2021]
train_data = data[(data['Date'].dt.year == 2021) | (data['Date'].dt.year == 2022)]



# Check if there are samples available for training
if not train_data.empty:
    # Prepare features and labels for training set
    X_train = train_data.drop(['Cognitive', 'Date', 'Athlete'], axis=1)
    y_train = train_data['Cognitive']

    # Prepare features and labels for testing set
    X_test = test_data.drop(['Cognitive', 'Date', 'Athlete'], axis=1)
    y_test = test_data['Cognitive']

    # Initialize Random Forest regressor
    regressor = RandomForestRegressor()

    # Train the regressor
    regressor.fit(X_train, y_train)

    # Predict on test set
    y_pred = regressor.predict(X_test)

    # Evaluate mean squared error
    mse = mean_squared_error(y_test, y_pred)
    print("Mean Squared Error:", mse)

    # Evaluate mean absolute error
    mae = mean_absolute_error(y_test, y_pred)
    print("Mean Absolute Error:", mae)

    # Evaluate R-squared Score
    r2 = r2_score(y_test, y_pred)
    print("R-Squared Score:", r2)

else:
    print("No data available for training for athlete", athlete_id)


Mean Squared Error: 0.29777422606787424
Mean Absolute Error: 0.4181280730897135
R-Squared Score: 0.9995293255425411
