### Preparation

In [175]:
# Configure root ..
import os
import sys
os.chdir("..")  

In [176]:
# Libraries & Packages
from src import config
import gspread
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None) 

## Import & Adjustments

In [177]:
# Import Excel files ~> TO log & Daily log
googleDrive_client = gspread.authorize(config.DRIVE_CREDENTIALS)
config.DRIVE_TP_LOG_FILENAMES

# Daily Log
daily_log_file = googleDrive_client.open(config.DRIVE_DAILY_LOG_FILENAMES[0])
daily_log_sheet = daily_log_file.get_worksheet(0)
daily_log_data = daily_log_sheet.get_all_values()
daily_log_df = pd.DataFrame(daily_log_data[1:], columns=daily_log_data[0]) 

# TP Log
tp_log_file = googleDrive_client.open(config.DRIVE_TP_LOG_FILENAMES[0])
tp_log_sheet = tp_log_file.get_worksheet(0)
tp_log_data = tp_log_sheet.get_all_values()
tp_log_df = pd.DataFrame(tp_log_data[1:], columns=tp_log_data[0]) 

In [178]:
# Small adjustments

# Convert to numeric (all are object ...)
def safe_convert(x):
    if x == "":
        return np.nan  # Treat empty strings as NaN
    try:
        return pd.to_numeric(x)
    except (ValueError, TypeError):
        return str(x)
    
for data in [daily_log_df, tp_log_df]:
    for col in data.columns:
        try:
            data[col] = data[col].apply(safe_convert)
        except ValueError:
            pass 

# Date
daily_log_df["Date"] = pd.to_datetime(daily_log_df[["Year", "Month", "Day"]]).dt.date
tp_log_df["Date"] = pd.to_datetime(tp_log_df[["Year", "Month", "Day"]]).dt.date

# Sort by date
daily_log_df = daily_log_df.sort_values(by="Date").reset_index(drop=True)
tp_log_df = tp_log_df.sort_values(by="Date").reset_index(drop=True)

# Some conversions
tp_log_df["Activity type"] = tp_log_df["Activity type"].replace({"Cycling": "Road Biking"}) #  Convert Cycling to Road Biking in Activity type
tp_log_df["Activity type"] = tp_log_df["Activity type"].str.replace("Cycling", "Biking") # cycling to biking in Activity type

In [179]:
# Data check

print("Daily Log:")
print("-------------------------------------")
print("Date range: {} to {}".format(daily_log_df["Date"].min(), daily_log_df["Date"].max()))
print("Duplicated rows = {}".format(daily_log_df[daily_log_df.duplicated(keep=False)].shape[0]))
print("Missing dates = {}".format([d for d in pd.date_range(start=daily_log_df["Date"].min(), end=daily_log_df["Date"].max()).date if d not in daily_log_df["Date"].values]))


print("\nTraining Log:")
print("-------------------------------------")
print("Date range: {} to {}".format(tp_log_df["Date"].min(), tp_log_df["Date"].max()))
print("Duplicated rows = {}".format(tp_log_df[tp_log_df.duplicated(keep=False)].shape[0]))
print("Missing dates = {}".format([d for d in pd.date_range(start=tp_log_df["Date"].min(), end=tp_log_df["Date"].max()).date if d not in tp_log_df["Date"].values]))

Daily Log:
-------------------------------------
Date range: 2024-09-13 to 2025-08-14
Duplicated rows = 0
Missing dates = []

Training Log:
-------------------------------------
Date range: 2024-09-13 to 2025-08-14
Duplicated rows = 0
Missing dates = []


## Work on data

In [189]:
# Work!

# Normalized HRV values (-inf, inf) ~> Upper bound = 1, Lower bound = 0 ~> Lower = better
daily_log_df["HRV normalized"] = (daily_log_df["HRV"] - daily_log_df["HRV baseline lower"]) / (daily_log_df["HRV baseline upper"] - daily_log_df["HRV baseline lower"])

## Quick Analysis

In [180]:
# Quick analyis ~ Training log

print("Different activities and their counts:")
print("-------------------------------------")
activities_count_time = (
    tp_log_df
    .groupby("Activity type")[["Duration [h]"]]
    .agg(
        count=("Duration [h]", "count"),
        total_duration=("Duration [h]", "sum")
        )
    .reset_index()
    .sort_values(by="total_duration", ascending=False)
    )

for _, row in activities_count_time.iterrows():
    print("{} ~> {:.2f} hours, ({} act.)".format(row["Activity type"], row["total_duration"], row["count"]))

Different activities and their counts:
-------------------------------------
Trail Running ~> 268.09 hours, (144 act.)
Road Biking ~> 84.39 hours, (35 act.)
Running ~> 78.25 hours, (76 act.)
Indoor Biking ~> 71.04 hours, (51 act.)
Hiking ~> 15.35 hours, (6 act.)
Mountain Biking ~> 12.67 hours, (7 act.)
Lap Swimming ~> 0.21 hours, (1 act.)


In [190]:
# Quick analyis ~ Daily log
print("Current performance values:")
print("-------------------------------------")
print("VO2max ~> {:.2f} ml/kg/min".format(daily_log_df["vo2Max"].iloc[-1]))
print("Hill score ~> {}".format(daily_log_df["Hill score"].iloc[-1]))
print("Endurance score ~> {}".format(daily_log_df["Endurance score"].iloc[-1]))
print("Normalized HRV ~> {:.2f}".format(daily_log_df["HRV normalized"].iloc[-1]))

Current performance values:
-------------------------------------
VO2max ~> 60.70 ml/kg/min
Hill score ~> 93
Endurance score ~> 8631
Normalized HRV ~> 0.19
