# Classifying different workouts

In [None]:
import gzip
import json
import logging
import os
from datetime import date, timedelta

import altair as alt
import pandas as pd
from dotenv import load_dotenv
from qself.oura import OuraAPIClient

In [None]:
logging.basicConfig(level=logging.INFO)
load_dotenv()

## Dataset prep

In [None]:
client = OuraAPIClient(os.environ["OURA_PERSONAL_ACCESS_TOKEN"])

In [None]:
days = 365
end = date.today()
start = end - timedelta(days=days)

In [None]:
workouts = client("workout", start.isoformat(), (end + timedelta(days=1)).isoformat())
workouts["data"][0]

In [None]:
with gzip.open("../data/workouts.json.gz", "wt") as f:
    json.dump(workouts, f)

In [None]:
da = client("daily_activity", start.isoformat(), (end + timedelta(days=1)).isoformat())

In [None]:
da_today = da["data"][-1]
da_yesterday = da["data"][-2]

In [None]:
act_df = pd.DataFrame(da_yesterday["met"]["items"][840:960], columns=["activity"])

In [None]:
alt.Chart(act_df.reset_index()).mark_line().encode(
    x="index:Q",
    y="activity:Q",
)

In [None]:
alt.Chart(act_df.reset_index()).mark_line().transform_window(
    rolling_mean="mean(activity)", frame=[-7, 7]
).encode(x="index:Q", y="rolling_mean:Q")

#### TODO

- not sure if this matches what is visible in the app?

## Modelling

In [None]:
import torch
import torch.nn as nn
from torch.utils import data
import tqdm

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device

In [None]:
lbl2idx = {"cycling": 0, "running": 1, "walking": 2}

In [None]:
with gzip.open("../data/workouts.json.gz", "rt") as f:
    workouts = json.load(f)
workout_df = pd.DataFrame(workouts["data"])
workout_df["duration_min"] = [
    d.total_seconds() / 60
    for d in pd.to_datetime(workout_df["end_datetime"])
    - pd.to_datetime(workout_df["start_datetime"])
]
workout_df["start_min"] = [
    s.hour * 60.0 + s.minute for s in pd.to_datetime(workout_df["start_datetime"])
]
workout_df = workout_df.loc[workout_df.activity.isin(lbl2idx.keys()), :]
workout_df["label"] = [lbl2idx[l] for l in workout_df["activity"]]
workout_df = workout_df[["duration_min", "start_min", "label"]]

In [None]:
workout_df.dtypes

In [None]:
workout_df.shape

In [None]:
train_df = workout_df.loc[:500, :]
train_df.reset_index(drop=True, inplace=True)

val_df = workout_df.loc[500:750, :]
val_df.reset_index(drop=True, inplace=True)

test_df = workout_df.loc[750:, :]
test_df.reset_index(drop=True, inplace=True)

In [None]:
print(train_df.label.value_counts())

In [None]:
print(val_df.label.value_counts())

In [None]:
print(test_df.label.value_counts())

In [None]:
class WorkoutDataset(data.Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data.iloc[index, :]
        val = torch.from_numpy(
            row[["duration_min", "start_min"]].values
        )  # TODO nasty but okay for tiny dataset
        val = val.to(torch.float32)
        label = torch.tensor(row["label"], dtype=torch.int64)
        label = nn.functional.one_hot(label, num_classes=3)
        return val, label

In [None]:
train_dataset = WorkoutDataset(train_df)
val_dataset = WorkoutDataset(val_df)
test_dataset = WorkoutDataset(test_df)

In [None]:
d, l = train_dataset[0]

In [None]:
d.shape, l.shape

In [None]:
train_data_loader = data.DataLoader(train_dataset, batch_size=8, shuffle=True)
val_data_loader = data.DataLoader(val_dataset, batch_size=16, shuffle=False)
test_data_loader = data.DataLoader(test_dataset, batch_size=16, shuffle=False)

In [None]:
class SimpleClassifier(nn.Module):
    def __init__(self, num_inputs, num_hidden, num_outputs):
        super().__init__()
        # Initialize the modules we need to build the network
        self.linear1 = nn.Linear(num_inputs, num_hidden)
        self.act_fn = nn.Tanh()
        self.linear2 = nn.Linear(num_hidden, num_outputs)

    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        x = self.linear1(x)
        x = self.act_fn(x)
        x = self.linear2(x)
        return x

In [None]:
model = SimpleClassifier(num_inputs=2, num_hidden=8, num_outputs=3)
print(model)

In [None]:
model.to(device)

In [None]:
# loss_module = nn.BCEWithLogitsLoss()
# loss_module = nn.CrossEntropyLoss()
loss_module = nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

In [None]:
def train_model(model, optimizer, data_loader, loss_module, num_epochs=100):
    model.train()

    # Training loop
    for _ in tqdm.tqdm(range(num_epochs)):
        for data_inputs, data_labels in data_loader:

            data_inputs = data_inputs.to(device)
            data_labels = data_labels.to(device)

            preds = model(data_inputs)
            # preds = preds.squeeze(dim=1)
            print(preds.shape, data_labels.shape)

            loss = loss_module(preds, data_labels)

            optimizer.zero_grad()
            loss.backward()

            optimizer.step()

In [None]:
train_model(model, optimizer, train_data_loader, loss_module)