In [None]:
import sys
sys.path.append('../')
import build_dataset

lfx = build_dataset.load_functions()
pfx = build_dataset.performance_functions()

pp = build_dataset.dataset_preprocess(local_activity_store='gc_activitydata_local.csv'
                                    ,local_activity_model_params='modeled_ef.csv')
lfx.metric_function_map

In [None]:
pfx.metric_function_map

In [None]:
loaded_df = lfx.derive_load(frame=pp.activity_data, load_metric='TIZ1_3')
loaded_df = lfx.derive_load(frame=pp.activity_data, load_metric='TIZ2_3')
loaded_df = lfx.derive_load(frame=pp.activity_data, load_metric='TIZ3_3')
loaded_df = loaded_df[loaded_df['TIZ1_3'].cumsum() > 0].copy()

In [None]:
pfx.derive_performance(loaded_df, performance_metric='VO2')

## Data Processing

In [None]:
data = data_original[data_original['Sport'] == 'Bike'].copy()
data['mod_pow_at_threshold'] = data['a'] + data['b']*athlete_statics['threshold_hr'] +  data['c']*(60*60)*20

data = data.groupby('date').agg({'L1_Time_in_Zone':'sum'
                                ,'L2_Time_in_Zone':'sum'
                                ,'L3_Time_in_Zone':'sum'
                                ,'L4_Time_in_Zone':'sum'
                                ,'L5_Time_in_Zone':'sum'
                                ,'L6_Time_in_Zone':'sum'
                                ,'L7_Time_in_Zone':'sum'
                                ,'mod_pow_at_threshold':'max'}).reset_index()

# data['date'] = data.index
data['date'] = pd.to_datetime(data['date'])
data = data.sort_values(by=['date'])
data.index = pd.DatetimeIndex(data['date'])
missing_dates = pd.date_range(start=data.index.min(), end=data.index.max())
data = data.reindex(missing_dates, fill_value=0)
data['mod_pow_at_threshold'] = data['mod_pow_at_threshold'].replace(0,np.nan)
data['mod_pow_at_threshold'] = data['mod_pow_at_threshold'].fillna(method='ffill')
data = data.dropna()

## Model

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
import matplotlib.pyplot as plt

## CNN

In [None]:
class TimeSeriesCNN(nn.Module):
    def __init__(self):
        super(TimeSeriesCNN, self).__init__()
        
        # Define the layers of the CNN
        self.conv1 = nn.Conv1d(in_channels=8, out_channels=32, kernel_size=3)
        self.pool = nn.MaxPool1d(kernel_size=2)
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3)
        self.fc1 = nn.Linear(in_features=64*8, out_features=128)
        self.fc2 = nn.Linear(in_features=128, out_features=2)

    def forward(self, x):
        # Pass the input through the layers of the CNN
        x = self.conv1(x)
        x = nn.functional.relu(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = nn.functional.relu(x)
        x = self.pool(x)
        x = x.view(-1, 64*8)
        x = self.fc1(x)
        x = nn.functional.relu(x)
        x = self.fc2(x)
        return x

In [None]:
sl_df = loaded_df[loaded_df['Sport'].isin(['Bike','Run'])][['date','Sport','TIZ1_3','TIZ2_3','TIZ3_3','VO2']].groupby(['date','Sport']).agg({'TIZ1_3':'sum','TIZ2_3':'sum','TIZ3_3':'sum','VO2':'max'})
ul_df = sl_df.unstack(level=1).copy()
ul_df.fillna({'TIZ1_3':0
             ,'TIZ2_3':0
             ,'TIZ3_3':0
            }).fillna(method='ffill', limit=5, inplace=True)

In [None]:
comp_vo2 = ul_df[ul_df['VO2'].isnull().sum(axis=1) < 1]['VO2']
bike_to_run = (comp_vo2['Bike'] / comp_vo2['Run']).mean()
comp_vo2['Run from bike'] = comp_vo2['Bike'] * 1/bike_to_run
ax = comp_vo2.plot(kind='scatter', x = 'Run', y='Run from bike', xlim=(50,75), ylim=(50,75), figsize=(4,4))
sing_vo2 = ul_df[ul_df['VO2'].isnull().sum(axis=1) < 2]['VO2']
ul_df[('VO2','Run')].fillna(ul_df[('VO2','Bike')] * 1/bike_to_run, inplace=True)
ul_df[('VO2','Bike')].fillna(ul_df[('VO2','Run')] * bike_to_run, inplace=True)

In [None]:
fl_df = ul_df[['TIZ1_3','TIZ2_3','TIZ3_3']]
fl_df = fl_df.join(ul_df[['VO2']])
fl_df[[('VO2_l1','Bike'),('VO2_l1','Run')]] = fl_df['VO2'].shift(-1)

In [None]:
input_data = fl_df[['TIZ1_3','TIZ2_3','TIZ3_3','VO2_l1']].to_numpy()[:-1]
target_data = fl_df['VO2'][:-1].to_numpy()

In [None]:
test_percent = 0.8
test_split = int(input_data.shape[0] * test_percent)
train_input_data, train_target_data = input_data[:test_split], target_data[:test_split]
test_input_data, test_target_data = input_data[test_split:], target_data[test_split:]

In [None]:
train_input_data.shape

In [None]:
train_input_data[:32].T.shape

In [None]:
model = TimeSeriesCNN()

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    inputs = torch.from_numpy(train_input_data[:32].T).float()
    targets = torch.from_numpy(train_target_data).float()
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    loss.backward()
    optimizer.step()

    # Evaluate the model on the test set
    if epoch % 10 == 0:
        model.eval()
        with torch.no_grad():
            inputs = torch.from_numpy(test_data).float()
            targets = torch.from_numpy(test_labels).float()
            outputs = model(inputs)
            test_loss = criterion(outputs, targets)
            print(f"Epoch {epoch}: Test loss = {test_loss.item()}")

In [None]:
def generator(data, min_index=0, max_index=None, batch_size=16, n_steps=150, step_length=1000):
    if max_index is None:
        max_index = len(data) - 1
     
    while True:
        # Pick indices of ending positions
        rows = np.random.randint(min_index + n_steps * step_length, max_index, size=batch_size)
         
        # Initialize feature matrices and targets
        samples = np.zeros((batch_size, n_steps, n_features))
        targets = np.zeros(batch_size, )
        
        for j, row in enumerate(rows):
            samples[j] = create_X(data[:, 0], last_index=row, n_steps=n_steps, step_length=step_length)
            targets[j] = data[row - 1, 1]
        yield samples, targets

## RNN

In [None]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(1, batch_size, self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return out


In [None]:
# Shaping of input data
data = np.random.rand(1000, 50, 10)
labels = np.random.rand(1000, 1)
input_data = np.random.rand(1000, 50, 10)
labels = np.random.rand(1000, 1)

# Split the data into training and test sets
train_data, train_labels = data[:800], labels[:800]
test_data, test_labels = data[800:], labels[800:]

# Initialize the model
input_size = 10
hidden_size = 64
output_size = 1
model = RNN(input_size, hidden_size, output_size)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    inputs = torch.from_numpy(train_data).float()
    targets = torch.from_numpy(train_labels).float()
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    loss.backward()
    optimizer.step()

    # Evaluate the model on the test set
    if epoch % 10 == 0:
        model.eval()
        with torch.no_grad():
            inputs = torch.from_numpy(test_data).float()
            targets = torch.from_numpy(test_labels).float()
            outputs = model(inputs)
            test_loss = criterion(outputs, targets)
            print(f"Epoch {epoch}: Test loss = {test_loss.item()}")