In [52]:
import pandas as pd
import numpy as np

# Load the datasets
rpe = pd.read_csv("rpe.csv")
wellness = pd.read_csv("wellness.csv")

# Ensure date formats match
rpe["Date"] = pd.to_datetime(rpe["Date"])
wellness["Date"] = pd.to_datetime(wellness["Date"])

# Merge on Date and PlayerID
merged = pd.merge(rpe, wellness, on=["Date", "PlayerID"], how="inner")

In [53]:
# Remove unnecessary columns
columns_to_remove = [
    "SessionLoad", "DailyLoad", "AcuteLoad", "ChronicLoad", "AcuteChronicRatio", "BedTime", "WakeTime", 
    "MonitoringScore", "NutritionAdjustment"
]
merged = merged.drop(columns=columns_to_remove)

In [54]:
game_count = merged[merged["SessionType"] == "Game"].shape[0]
print(f"Number of rows with SessionType 'Game': {game_count}")

usg_count = merged[merged["USGMeasurement"] == "Yes"].shape[0]
print(f"Number of rows with USG: {usg_count}")

usg_count = merged[(merged["USGMeasurement"] == "Yes") & (merged["SessionType"] == "Game")].shape[0]
print(f"Number of rows with USG: {usg_count}")

Number of rows with SessionType 'Game': 449
Number of rows with USG: 1062
Number of rows with USG: 375


In [55]:
# Process rows
# - Filter out rows where SessionType is "Mobility/Recovery" or USGMeasurement is "No"
merged = merged[
    ~(
        (merged["SessionType"] != "Game") |
        (merged["USGMeasurement"] == "No")
    )
]

In [56]:
# Numerically encode the categorical variables

# Convert Date to datetime
merged["Date"] = pd.to_datetime(merged["Date"])

# Encode binary categorical variables: Yes/No → 1/0
binary_map = {"Yes": 1, "No": 0}
merged["Training"] = merged["Training"].map(binary_map)
merged["Menstruation"] = merged["Menstruation"].map(binary_map)
merged["USGMeasurement"] = merged["USGMeasurement"].map(binary_map)

# Encode Nutrition (ordinal category)
nutrition_order = ["Poor", "Okay", "Good", "Excellent"]
merged["Nutrition"] = pd.Categorical(merged["Nutrition"], categories=nutrition_order, ordered=True)
merged["Nutrition"] = merged["Nutrition"].cat.codes
merged["Nutrition"].replace(-1, np.nan, inplace=True)

# Encode Pain (binary categorical variable: Yes/No → 1/0)
binary_map = {"Yes": 1, "No": 0}
merged["Pain"] = merged["Pain"].map(binary_map)

# Encode Illness (ordinal categorical variable)
illness_order = ["No", "Slightly Off", "Yes"]  # Add more categories if applicable
merged["Illness"] = pd.Categorical(merged["Illness"], categories=illness_order, ordered=True)
merged["Illness"] = merged["Illness"].cat.codes
merged["Illness"].replace(-1, np.nan, inplace=True)  # Replace -1 with NaN for missing values

# Encode BestOutOfMyself (ordinal category)
best_out_map = {
    "Not at all": 0,
    "Somewhat": 1,
    "Moderately": 2,
    "Very much": 3,
    "Absolutely": 4
}
merged["BestOutOfMyself"] = merged["BestOutOfMyself"].map(best_out_map)
merged["TrainingReadiness"] = merged["TrainingReadiness"].str.rstrip('%').astype(float) / 100

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged["Nutrition"].replace(-1, np.nan, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged["Illness"].replace(-1, np.nan, inplace=True)  # Replace -1 with NaN for missing values


In [57]:
# Save to a new CSV
merged.to_csv("merged_rpe_wellness.csv", index=False)