# Classifying different workouts

In [None]:
from collections import Counter
import gzip
import json
import logging
import os
from datetime import date, timedelta

import altair as alt
import pandas as pd
from dotenv import load_dotenv
from qself.oura import OuraAPIClient

In [None]:
logging.basicConfig(level=logging.INFO)
load_dotenv()

## Dataset prep

In [None]:
client = OuraAPIClient(os.environ["OURA_PERSONAL_ACCESS_TOKEN"])

In [None]:
days = 365
end = date.today()
start = end - timedelta(days=days)

In [None]:
workouts = client("workout", start.isoformat(), (end + timedelta(days=1)).isoformat())
workouts["data"][0]

In [None]:
with gzip.open("../data/workouts.json.gz", "wt") as f:
    json.dump(workouts, f)

In [None]:
da = client("daily_activity", start.isoformat(), (end + timedelta(days=1)).isoformat())

In [None]:
da_today = da["data"][-1]
da_yesterday = da["data"][-2]

In [None]:
act_df = pd.DataFrame(da_yesterday["met"]["items"][840:960], columns=["activity"])

In [None]:
alt.Chart(act_df.reset_index()).mark_line().encode(
    x="index:Q",
    y="activity:Q",
)

In [None]:
alt.Chart(act_df.reset_index()).mark_line().transform_window(
    rolling_mean="mean(activity)", frame=[-7, 7]
).encode(x="index:Q", y="rolling_mean:Q")

#### TODO

- not sure if this matches what is visible in the app?

## Modelling

In [None]:
import torch
import torch.nn as nn
from torch.utils import data
import tqdm

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device

In [None]:
lbl2idx = {"cycling": 0, "running": 1, "walking": 2}

In [None]:
with gzip.open("../data/workouts.json.gz", "rt") as f:
    workouts = json.load(f)
workout_df = pd.DataFrame(workouts["data"])
workout_df["duration_min"] = [
    d.total_seconds() / 60
    for d in pd.to_datetime(workout_df["end_datetime"])
    - pd.to_datetime(workout_df["start_datetime"])
]
workout_df["start_min"] = [
    s.hour * 60.0 + s.minute for s in pd.to_datetime(workout_df["start_datetime"])
]
workout_df = workout_df.loc[workout_df.activity.isin(lbl2idx.keys()), :]
workout_df["label"] = [lbl2idx[l] for l in workout_df["activity"]]
workout_df = workout_df[["duration_min", "start_min", "label"]]

In [None]:
workout_df.dtypes

In [None]:
workout_df.shape

In [None]:
train_df = workout_df.loc[:500, :]
train_df.reset_index(drop=True, inplace=True)

val_df = workout_df.loc[500:750, :]
val_df.reset_index(drop=True, inplace=True)

test_df = workout_df.loc[750:, :]
test_df.reset_index(drop=True, inplace=True)

In [None]:
print(train_df.label.value_counts())

In [None]:
print(val_df.label.value_counts())

In [None]:
print(test_df.label.value_counts())

In [None]:
class WorkoutDataset(data.Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data.iloc[index, :]
        val = torch.from_numpy(
            row[["duration_min", "start_min"]].values
        )  # TODO store as tensor
        val = val.to(torch.float32)
        label = torch.tensor(row["label"], dtype=torch.int64)
        # label = nn.functional.one_hot(label, num_classes=3)
        return val, label

In [None]:
train_dataset = WorkoutDataset(train_df)
val_dataset = WorkoutDataset(val_df)
test_dataset = WorkoutDataset(test_df)

In [None]:
d, l = train_dataset[0]
d.shape, l.shape

In [None]:
train_data_loader = data.DataLoader(train_dataset, batch_size=8, shuffle=True)
val_data_loader = data.DataLoader(val_dataset, batch_size=16, shuffle=False)
test_data_loader = data.DataLoader(test_dataset, batch_size=16, shuffle=False)

In [None]:
data_inputs, data_labels = next(iter(train_data_loader))
print("Data inputs", data_inputs.shape, "\n", data_inputs)
print("Data labels", data_labels.shape, "\n", data_labels)

In [None]:
class SimpleClassifier(nn.Module):
    def __init__(self, num_inputs, num_hidden, num_outputs):
        super().__init__()
        self.linear1 = nn.Linear(num_inputs, num_hidden)
        self.act_fn = nn.Tanh()
        self.linear2 = nn.Linear(num_hidden, num_outputs)

    def forward(self, x):
        x = self.linear1(x)
        x = self.act_fn(x)
        x = self.linear2(x)
        return x

In [None]:
model = SimpleClassifier(num_inputs=2, num_hidden=64, num_outputs=3)
print(model)

In [None]:
model.to(device)

In [None]:
# loss_module = nn.BCEWithLogitsLoss()
loss_module = nn.CrossEntropyLoss()
# loss_module = nn.NLLLoss()

optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

In [None]:
def train_model(model, optimizer, data_loader, loss_module, num_epochs=100):
    model.train()

    # Training loop
    for _ in tqdm.tqdm(range(num_epochs)):
        for data_inputs, data_labels in data_loader:

            # 1: move to device
            data_inputs = data_inputs.to(device)
            data_labels = data_labels.to(device)

            # 2: forward pass
            preds = model(data_inputs)

            # 3: compute loss
            loss = loss_module(preds, data_labels)

            # 4: compute gradients
            optimizer.zero_grad()
            loss.backward()

            # 5: update parameters
            optimizer.step()

            # LOL: slight digression, below is GH Copilot suggestions

            # 6: compute accuracy

            # 7: log

            # 8: save model

            # 9: save optimizer

            # 10: save metrics

            # 11: save predictions

            # 12: save labels

            # 13: save loss

            # 14: save gradients

            # 15: save inputs

            # 15: save outputs

            # 16: save activations

            # 17: save weights

            # 18: save biases

            # 19: save histograms

            # 20: save images

            # 21: save videos

            # 22: save audio

            # 23: save text

            # 24: save plots

            # 25: save figures

            # 26: save tables

            # 27: save dataframes

            # 28: save tensors

            # 29: save numpy arrays

            # 30: save scipy arrays

            # 31: save pandas arrays

            # 32: save matplotlib arrays

            # 33: save seaborn arrays

            # 34: save bokeh arrays

            # 35: save plotly arrays

            # 36: save altair arrays

            # 37: save networkx arrays

            # 38: save pytorch arrays

            # 39: save tensorflow arrays

            # 40: save keras arrays

            # 41: save sklearn arrays

            # 42: save xgboost arrays

            # 43: save lightgbm arrays

            # 44: save catboost arrays

            # 45: save dask arrays

            # 46: save ray arrays

            # 47: save jax arrays

            # 48: save gluon arrays

            # 49: save mxnet arrays

            # 50: save chainer arrays

            # 51: save onnx arrays

            # 52: save caffe arrays

            # 53: save caffe2 arrays

            # 54: save tensorflow lite arrays

            # 55: save tensorflow js arrays

            # 56: save pytorch js arrays

            # 57: save pytorch mobile arrays

            # 58: save pytorch c++ arrays

            # 59: save pytorch c arrays

            # 60: save pytorch cuda arrays

            # 61: save pytorch cuda c arrays

            # 62: save pytorch cuda c++ arrays

            # 63: save pytorch cuda js arrays

            # 64: save pytorch cuda mobile arrays

            # 65: save pytorch cuda lite arrays

            # 66: save pytorch cuda onnx arrays

            # 67: save pytorch cuda caffe arrays

            # 68: save pytorch cuda caffe2 arrays

            # 69: save pytorch cuda tensorflow arrays

            # 70: save pytorch cuda keras arrays

            # 71: save pytorch cuda sklearn arrays

            # 72: save pytorch cuda xgboost arrays

            # 73: save pytorch cuda lightgbm arrays

            # 74: save pytorch cuda catboost arrays

            # 75: save pytorch cuda dask arrays

            # 76: save pytorch cuda ray arrays

            # 77: save pytorch cuda jax arrays

            # 78: save pytorch cuda gluon arrays

            # 79: save pytorch cuda mxnet arrays

            # 80: save pytorch cuda chainer arrays

            # 81: save pytorch cuda onnx arrays

            # 82: save pytorch cuda caffe arrays

            # 83: save pytorch cuda caffe2 arrays

            # 84: save pytorch cuda tensorflow lite arrays

            # 85: save pytorch cuda tensorflow js arrays

            # 86: save pytorch cuda pytorch js arrays

            # 87: save pytorch cuda pytorch mobile arrays

            # 88: save pytorch cuda pytorch c++ arrays

            # 89: save pytorch cuda pytorch c arrays

            # 90: save pytorch cuda pytorch js arrays

            # 91: save pytorch cuda pytorch mobile arrays

            # 92: save pytorch cuda pytorch lite arrays

            # 93: save pytorch cuda pytorch onnx arrays

            # 94: save pytorch cuda pytorch caffe arrays

            # 95: save pytorch cuda pytorch caffe2 arrays

            # 96: save pytorch cuda pytorch tensorflow arrays

            # 97: save pytorch cuda pytorch keras arrays

            # 98: save pytorch cuda pytorch sklearn arrays

            # 99: save pytorch cuda pytorch xgboost arrays

            # 100: save pytorch cuda pytorch lightgbm arrays

            # 101: save pytorch cuda pytorch catboost arrays

            # 102: save pytorch cuda pytorch dask arrays

In [None]:
train_model(model, optimizer, train_data_loader, loss_module)

In [None]:
state_dict = model.state_dict()
torch.save(state_dict, "model.pt")
# state_dict = torch.load("model.pt")
# new_model = SimpleClassifier(num_inputs=2, num_hidden=4, num_outputs=1)
# new_model.load_state_dict(state_dict)

In [None]:
def eval_model(model, data_loader):
    model.eval()
    true_preds, num_preds = 0.0, 0.0
    all_preds = []

    with torch.no_grad():  # Deactivate gradients for the following code
        for data_inputs, data_labels in data_loader:

            data_inputs, data_labels = data_inputs.to(device), data_labels.to(device)
            preds = model(data_inputs)
            preds = torch.argmax(preds, dim=1)

            all_preds.extend(preds.tolist())
            true_preds += (preds == data_labels).sum()
            num_preds += data_labels.shape[0]

    acc = true_preds / num_preds
    print(f"Accuracy of the model: {100.0*acc:4.2f}%")
    return all_preds

In [None]:
train_preds = eval_model(model, train_data_loader)
Counter(train_preds)

In [None]:
val_preds = eval_model(model, val_data_loader)
Counter(val_preds)

In [None]:
test_preds = eval_model(model, test_data_loader)
Counter(test_preds)