# Importing Packages

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from google.colab import drive

In [2]:
import seaborn as sns

# Mounting Drive

In [3]:
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


# Functions

In [58]:
def convert_duration_to_minutes(duration_str):
    total_minutes = 0
    parts = duration_str.split()
    for part in parts:
        if 'h' in part:
            total_minutes += int(part.strip('h')) * 60
        elif 'm' in part:
            total_minutes += int(part.strip('m'))
    return total_minutes

def convert_timedelta_to_readable(td):
    # Extract hours and minutes from the timedelta
    hours, remainder = divmod(td.seconds, 3600)
    minutes = remainder // 60

    # Create a human-readable string
    if hours > 0:
        return f"{hours}h {minutes}m"
    else:
        return f"{minutes}m"

def convert_date_formats(input_date):
    parsed_date = datetime.strptime(input_date, "%d %b %Y, %H:%M")
    formatted_date = parsed_date.strftime("%Y-%m-%d %H:%M:00")
    return formatted_date

def format_date(input_date):
    # Convert the input date string to a datetime object
    date_obj = datetime.strptime(input_date, "%Y-%m-%d %H:%M:%S")

    # Extract day and month information
    day = date_obj.day
    month = date_obj.strftime("%B")

    # Add 'st', 'nd', 'rd', or 'th' to the day based on its value
    if 10 <= day % 100 <= 20:
        day_suffix = "th"
    else:
        day_suffix = {1: "st", 2: "nd", 3: "rd"}.get(day % 10, "th")

    # Format the date string
    formatted_date = f"{day}{day_suffix} of {month}"

    return formatted_date

def get_first_index(df, col, val):
    return df[df[col]==val].index[0]

def convert_numeric_to_day(num):
    days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
    if 0 <= num <= 6:
        return days[num]
    else:
        return "Invalid Day"

def convert_numeric_to_month(num):
    days = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "Novemer","December"]
    if 0 <= num <= 6:
        return days[num]
    else:
        return "Invalid Month"


def get_pct_change_periods(data_struc, p=3):
    return np.round((pd.DataFrame(data_struc).pct_change(periods=p).mean() * 100)[0],2)

In [59]:
def give_top_by_metric(df, ex_totals_df, metric, n, p=3):
    if metric != "Top Progress":
        top_n_metric = ex_totals_df.nlargest(n, metric).reset_index(drop=True)
    else:
        ex_progress = {}
        for exercise in ex_totals_df["Exercise"]:
            count, weight_progression, volume_progression, rep_progression = get_total_from_session(df, exercise, "all")
            weight_change = get_pct_change_periods(weight_progression)
            vol_change = get_pct_change_periods(volume_progression)
            weight_vol_sum = weight_change + vol_change
            if not(np.isinf(weight_change) or np.isnan(weight_change)) and (count>=10) and not(np.isinf(vol_change) or np.isnan(vol_change)):
                ex_progress[exercise] = weight_vol_sum

        top_5 = sorted(ex_progress, key=ex_progress.get, reverse=True)[:5]

    out_string = ""

    first_str = " for "
    match metric:
        case "Total Sets":
            second_str = " sets over "
        case "Total Weight":
            first = " with "
            second_str = " metric tonnes over "
        case "Total Reps":
            second_str = " reps over "

    for i in range(n):

        if metric == "Top Progress":
            exercise_name = top_5[i]
        else:
            exercise_name = top_n_metric["Exercise"].loc[i]

        match metric:
            case "Total Sets":
                value = str(top_n_metric["Total Sets"].loc[i])
            case "Total Weight":
                value = str(np.round((top_n_metric["Total Weight"].loc[i]/1000),1))
            case "Total Reps":
                value = str(top_n_metric["Total Reps"].loc[i])


        if metric == "Top Progress":
            # Progression metric
            count, weight_progression, volume_progression, rep_progression = get_total_from_session(df, exercise_name, "all")
            weight_change = get_pct_change_periods(weight_progression)
            vol_change = get_pct_change_periods(volume_progression)

            out_string += "    - " + exercise_name + ", where every "+str(p)+" sessions you increased weight by " + str(weight_change) + "%, and volume by " + str(vol_change) +"%!\n"


        elif metric == "Total Occurrences":

            # Total Occurrences tracks progression by session too
            count, weight_progression, volume_progression, rep_progression = get_total_from_session(df, exercise_name, "all")

            weight_change = get_pct_change_periods(weight_progression)
            vol_change = get_pct_change_periods(volume_progression)
            rep_change = get_pct_change_periods(rep_progression)
            if np.isinf(weight_change):
                changes_str = "Every "+str(p)+" sessions you increased reps by an average of " + str(rep_change) + "%!\n"
            else:
                changes_str = "Every "+str(p)+" sessions you increased max weight moved and total volume by an average of " + str(weight_change) + "% and "+ str(vol_change) + "%!\n"

            out_string += "    - " + exercise_name + first_str + str(count) + " sessions. \n"#+ changes_str

        else:
            count = get_total_from_session(df, exercise_name, "count")
            out_string += "    - " + exercise_name + first_str + value + second_str + str(count) + " sessions.\n"

    return out_string

In [60]:
def get_total_from_session(df, exercise, r_type="all"):
    exercise_df = df[df['Exercise Name']==exercise]
    prog_dates = exercise_df["Date"].unique()

    count = len(prog_dates)

    if r_type=="count":
        return count

    else:
        weight_progression = []
        volume_progression = []
        rep_progression = []

        for date_i in prog_dates:
            time_i = exercise_df[exercise_df["Date"]==date_i]
            weight_progression.append(time_i["Weight"].max())
            volume_progression.append((time_i["Weight"] * time_i["Reps"]).sum())
            rep_progression.append(time_i["Reps"].sum())

        return count, weight_progression, volume_progression, rep_progression

In [88]:
def strong_wrapped(year, n=False, p=3, app="Strong"):

    if (app == "Hevy"):
        file_path = location+"/workout_data.csv"
        df = pd.read_csv(file_path)
        # Managing column differences between app data outputs
        df.columns = ["Workout Name", "Date", "End Time", "Workout Notes", "Exercise Name", "superset_id", "Notes", "Set Order", "set_type", "Weight", "Reps", "Distance", "Seconds", "RPE"]
        df["Date"] = df["Date"].apply(convert_date_formats)
        df["End Time"] = df["End Time"].apply(convert_date_formats)
        df["Duration"] = (pd.to_datetime(df["End Time"]) - pd.to_datetime(df["Date"]))
        df["Duration"] = df["Duration"].apply(convert_timedelta_to_readable)
    else:
        file_path = location+"/strong.csv"
        df = pd.read_csv(file_path)

    # CONVERT DATE TO DATETIME OBJECT
    df["datetime"] = pd.to_datetime(df["Date"])

    # FILTER BY YEAR
    df = df[df["datetime"].dt.year==year]

    # TOTAL NUM SESSIONS
    num_sessions = len(df["Date"].unique())
    num_reps = df["Reps"].sum()
    vatican_equivalents = num_reps // 825

    # Only proceed if enough data
    if num_sessions <= 5:
        return "Please specify a year with more than 5 sessions!"

    # Create list of beginning indexes for each workout
    index_list = []
    for unique_date in df["Date"].unique():
        index_list.append(get_first_index(df, "Date", unique_date))
    # Convert all durations into minutes
    if app == "Hevy":
        duration_df = df.loc[index_list]["Duration"].astype(str).apply(convert_duration_to_minutes)
    else:
        duration_df = df.loc[index_list]["Duration"].apply(convert_duration_to_minutes)

    # Get unique months that have been recorded
    date_months = df.loc[index_list]["datetime"].dt.month
    # Get unique weekdays that have been recorded
    date_days = df.loc[index_list]["datetime"].dt.weekday

    value_counts_ = date_months.value_counts()
    # Monthly activity graph?
    avg_monthly = 0
    top_month_str = ""
    top_month_value = 0
    for i in range(1,len(date_months.unique())):
        # Loop through unique values
        if i in date_months.unique():
            matched_activites = df[df["datetime"].dt.month == i]
            this_value = value_counts_[i]
            #print("Found this many activities in month", i)
            #print(this_value)
            avg_monthly += this_value
            if this_value > top_month_value:
                top_month_value = this_value
                top_month_str = convert_numeric_to_month(i)

    avg_monthly = avg_monthly/len(date_months.unique())

    # TOTAL DURATION DATA
    total_duration =  duration_df.sum()
    sandman_equivalents = total_duration / 5.51666666666
    lotr_equivalents = total_duration / 1171
    num_blinks = total_duration * 15

    # TOTAL WEIGHT DATA
    total_weight_moved = (df["Weight"] * df["Reps"]).sum()
    ronnie_weight_equivalents = np.round((total_weight_moved / 133),1)
    belgian_blue_equivalents = np.round((total_weight_moved / 1200),1)

    # MOST FREQUENT DAY / HOUR
    hour_mode = df["datetime"].dt.hour.mode()[0]
    favourite_day = convert_numeric_to_day(df["datetime"].dt.weekday.mode()[0])

    # EXERCISE FREQ DATA
    num_exercises = len(df["Exercise Name"].unique())
    val_counts = df["Exercise Name"].value_counts()
    avg_ex_sets = val_counts.mean()

    # EXERCISE TOTALS BY SETS, WEIGHTS, REPS
    exercise_totals = []
    for exercise in df["Exercise Name"].unique():
        total_sets = len(df[df["Exercise Name"]==exercise])
        total_weight = (df[df["Exercise Name"]==exercise]["Weight"] * df[df["Exercise Name"]==exercise]["Reps"]).sum()
        total_reps = df[df["Exercise Name"]==exercise]["Reps"].sum()
        total_occurrences = get_total_from_session(df, exercise, r_type="count")
        exercise_totals.append([exercise, total_sets, total_weight, total_reps, total_occurrences])

    # PUT INTO DF
    ex_totals_df = pd.DataFrame(exercise_totals, columns=["Exercise", "Total Sets", "Total Weight", "Total Reps", "Total Occurrences"])

    # TOP 3 or N exercises by metric, whichever is smaller
    if n == False:
        n = 3
        if len(val_counts)<3:
            n = len(val_counts)

    # Get metrics such as most common by total sets, total weight, total reps
    top_set_out = give_top_by_metric(df, ex_totals_df, "Total Sets", n, p)
    top_weight_out = give_top_by_metric(df, ex_totals_df, "Total Weight", n, p)
    top_reps_out = give_top_by_metric(df, ex_totals_df, "Total Reps", n, p)
    top_freq_out = give_top_by_metric(df, ex_totals_df, "Total Occurrences", n, p)
    top_progress_out = give_top_by_metric(df, ex_totals_df, "Top Progress", n, p)

    avg_ex_freq = ex_totals_df["Total Occurrences"].mean()

    # BARBELL PR
    barbell_pr = ""
    barbell_df = df[df['Exercise Name'].str.contains('Barbell', regex=False)]
    if len(barbell_df) > 0:
        max_idx = barbell_df["Weight"].idxmax()
        pr_df = barbell_df.loc[max_idx]
        pr_name = pr_df["Exercise Name"].replace('Barbell', '')
        pr_reps = pr_df["Reps"]
        if pr_reps > 1:
            pr_reps = str(pr_df["Reps"]) + " reps."
        else:
            pr_reps = str(pr_df["Reps"]) + " rep."
        barbell_pr += "on the " + format_date(pr_df["Date"]) + " with " + str(np.round(pr_df["Weight"],2)) +"kg in the " + pr_name + " for " + pr_reps

    # DUMBBELL PR
    dumbbell_pr = ""
    dumbbell_df = df[df['Exercise Name'].str.contains('Dumbbell', regex=False)]
    if len(dumbbell_df) > 0:
        max_idx = dumbbell_df["Weight"].idxmax()
        pr_df = dumbbell_df.loc[max_idx]
        pr_name = pr_df["Exercise Name"].replace('Dumbbell', '')
        pr_reps = pr_df["Reps"]
        if pr_reps > 1:
            pr_reps = str(pr_df["Reps"]) + " reps."
        else:
            pr_reps = str(pr_df["Reps"]) + " rep."
        dumbbell_pr += "on the " + format_date(pr_df["Date"]) + " with " + str(np.round(pr_df["Weight"],2)) +"kg in the " + pr_name + " for " + pr_reps



    out_str = "Welcome to your "+ str(year)+ " Gym Wrapped!\n"
    out_str += "\nThis year you recorded " + str(num_sessions) + " sessions!"
    out_str += "\nOn average, this was " + str(np.round(avg_monthly,1)) + " sessions per month, but in " + str(top_month_str) + " you managed " + str(top_month_value) +"."

    out_str += "\nIn 2023 you recorded a total of " + str(num_reps) + " reps, which is the same as each citizen of the vatican city performing "+ str(vatican_equivalents) + " reps.\n"
    out_str += "\nMostly you worked out at " + str(hour_mode) +":00, and your favourite day for a session was " + favourite_day
    out_str += "\nThese sessions add up to a total of " + str(total_duration) + " minutes, which is " +  str(np.round((total_duration/1440),1)) + " days,"
    out_str += " or " + str(np.round(sandman_equivalents,1)) + " loops of 'Enter Sandman' or "+str(np.round(lotr_equivalents,1)) +" entire LOTR marathons."
    out_str += "\n\nYou've probably blinked around " + str(np.round(num_blinks,1))+ " times during your sessions.\n"
    out_str += "\nIn total, you've moved " + str(np.round((total_weight_moved/1000),1)) + " metric tonnes, which is equivalent to " + str(ronnie_weight_equivalents) + " contest weight Ronnie Coleman's"
    out_str += " or " + str(belgian_blue_equivalents) + " adult male Belgian Blues's."
    out_str += "\n\nThis year, you've tried " + str(num_exercises) + " different exercises -"
    if num_exercises > 50:
        out_str += " how adventurous."
    else:
        out_str += " maybe it's time to explore a bit?"
    # Old way of phrasing this, not sure it makes as much sense
    #out_str += "\nYou've done each exercise " +str(np.round(avg_ex_freq,1)) + " times on average, over " + str(np.round(avg_ex_sets,1)) + " sets."
    out_str += "\nYou've done each exercise " +str(np.round(avg_ex_freq,1)) + " times on average."
    out_str += "\n\nYour top " + str(n) + " exercises this year by total number of sets have been: \n" + top_set_out
    out_str += "\nYour top " + str(n) + " exercises this year by total weight have been: \n" + top_weight_out
    out_str += "\nYour top " + str(n) + " exercises this year by total number of reps have been: \n" + top_reps_out
    out_str += "\nYour top " + str(n) + " exercises favourite exercises this year have been: \n" + top_freq_out
    out_str += "\nYour top " + str(n) + " exercises this year by progress (with a minimum of 10 occurrences) have been: \n" + top_progress_out
    out_str += "\nYour heaviest barbell PR was " + barbell_pr
    out_str += "\nYour heaviest dumbbell PR was " + dumbbell_pr
    return out_str

# Importing File

In [89]:
location = '/content/drive/My Drive/Gym'
file_path = location+"/strong.csv"
file_path = location+"/workout_data.csv"
gym_data = pd.read_csv(file_path)

# Use

In [90]:
out_str = strong_wrapped(2023, n=3, p=2, app="Hevy")

In [91]:
print(out_str)

Welcome to your 2023 Gym Wrapped!

This year you recorded 117 sessions!
On average, this was 10.8 sessions per month, but in May you managed 20.
In 2023 you recorded a total of 27072.0 reps, which is the same as each citizen of the vatican city performing 32.0 reps.

Mostly you worked out at 14:00, and your favourite day for a session was Saturday
These sessions add up to a total of 10258 minutes, which is 7.1 days, or 1859.5 loops of 'Enter Sandman' or 8.8 entire LOTR marathons.

You've probably blinked around 153870 times during your sessions.

In total, you've moved 950.8 metric tonnes, which is equivalent to 7149.2 contest weight Ronnie Coleman's or 792.4 adult male Belgian Blues's.

This year, you've tried 102 different exercises - how adventurous.
You've done each exercise 7.8 times on average.

Your top 3 exercises this year by total number of sets have been: 
    - Bench Press (Barbell) for 250 sets over 42 sessions.
    - Lat Pulldown (Machine) for 181 sets over 33 sessions.
 