In [1]:
from datetime import date, datetime as dt, timedelta as td
import pytz
import time
import pandas as pd
import numpy as np
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error
from math import sqrt
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, LSTM, Bidirectional, SimpleRNN
import warnings
warnings.filterwarnings("ignore")

from keras.layers import Flatten
from keras.layers import TimeDistributed
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import ConvLSTM2D
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from keras.utils import to_categorical


  import pandas.util.testing as tm


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Preprocessing functions

In [6]:
PATH = 'drive/MyDrive/Colab Notebooks/StepCount.csv'
early_stopping_cb = EarlyStopping(patience=5, min_delta=0.000003, restore_best_weights=True)
data = pd.read_csv(PATH)

In [45]:
def convert_steps(steps, grouping=['date']):
  convert_tz = lambda x: x.to_pydatetime().replace(tzinfo=pytz.utc).astimezone(pytz.timezone('Europe/Vilnius'))
  get_year = lambda x: convert_tz(x).year
  get_month = lambda x: '{}-{:02}'.format(convert_tz(x).year, convert_tz(x).month) #inefficient
  get_date = lambda x: '{}-{:02}-{:02}'.format(convert_tz(x).year, convert_tz(x).month, convert_tz(x).day) #inefficient
  get_day = lambda x: convert_tz(x).day
  get_hour = lambda x: convert_tz(x).hour
  get_minute = lambda x: convert_tz(x).minute
  get_day_of_week = lambda x: convert_tz(x).weekday()


  steps['startDate'] = pd.to_datetime(steps['startDate'])
  steps['year'] = steps['startDate'].map(get_year)
  steps['month'] = steps['startDate'].map(get_month)
  steps['date'] = steps['startDate'].map(get_date)
  steps['day'] = steps['startDate'].map(get_day)
  steps['hour'] = steps['startDate'].map(get_hour)
  steps['dow'] = steps['startDate'].map(get_day_of_week)

  corona_start = steps[steps.date == "2020-03-10"].index[0]
  steps = steps.iloc[:corona_start, :]

  return steps.groupby(grouping)['value'].sum().reset_index(name='Steps')


def prepare_sequence(sequence, n_previous, n_future):
    """
    Input: 
        - sequence: numpy array, pandas DataFrame or list; input sequence
        - n_previous: int; number of past data points (N)
        - n_future: int; number of data points to be predicted (K)
    Returns:
        - X: (seq. length x n_previous) numpy array with the previous observations
        - y: (seq. length x n_fututre) numpy array with the target observations
    """
    
    X, y = [], []
    idx, i = 0, 0
    while idx+n_future <= len(sequence)-1:
        idx = i + n_previous
        X.append(sequence[i:idx])
        y.append(sequence[idx:idx+n_future])
        i += 1
        
    return np.array(X), np.array(y)


def season(month):
    if month in [12, 1, 2]:
        return 0
    elif month in [3, 4, 5]:
        return 1
    elif month in [6, 7, 8]:
        return 2
    elif month in [9, 10, 11]:
        return 3


def augment(steps):
    augmented = steps.groupby(['date'])['value'].sum().reset_index(name='Steps')
    dow = steps.groupby(['date']).mean()['dow']
    augmented['dow'] = (dow.apply(int)).to_list()
    month = augmented.date.apply(lambda a: int(a.split('-')[1]))
    augmented['season'] = month.apply(season)
    
    return augmented

# Load and prepare the data

In [46]:
steps_by_date = convert_steps(data)
steps_by_date_hour = convert_steps(data, ['date', 'hour'])

# Remove an invalid day
#steps_by_date = steps_by_date.iloc#[:-1]

# Replace the unrealistic values with the median
steps_by_date.Steps[steps_by_date.Steps < 300] = steps_by_date.Steps.median()

# Augent the data
augmented_steps_by_date = augment(data)

In [47]:
results = pd.DataFrame(columns=["Model", "N_K", "MAE", "RMSE", "Error_Steps"])

# Pipeline

## Baseline

In [48]:
def average(history, n, k):
  if k == 1:
    return np.mean(history[-n:])
  else:
    predictions = []
    while k != 0:
      prediction = np.mean(history[-n:])
      predictions.append(prediction)
      np.append(history, prediction)
      k -= 1
    return predictions

In [49]:
configurations = [(steps_by_date, 7, 1), (steps_by_date, 14, 1), (steps_by_date_hour, 24, 1),
                  (steps_by_date_hour, 24, 6), (steps_by_date_hour, 24, 12), (steps_by_date_hour, 48, 24)]
row = 0


for c in configurations:

  # Split to training and test
  coef = 0.1
  split_size = int(len(c[0].Steps) * (1 - coef))
  train_data, test_data = c[0].Steps[:split_size], c[0].Steps[split_size:]

  # Normalize the data
  scaler = MinMaxScaler() 
  norm_train = scaler.fit_transform(train_data.to_numpy().reshape(-1, 1))
  norm_test = scaler.transform(test_data.to_numpy().reshape(-1, 1))

  # Prepare
  X_train, y_train = prepare_sequence(norm_train.tolist()+norm_test.tolist()[:c[2]], c[1], c[2])
  X_test, y_test = prepare_sequence(norm_test, c[1], c[2])

  history = X_train.flatten()

  # Predict
  baseline_predictions = list()
  for t in range(len(y_test)):
      baseline_predictions.append(average(history, c[1], c[2]))
      np.append(history, y_test.squeeze()[t])
 
  rmse = sqrt(mean_squared_error(y_test.squeeze(), baseline_predictions))
  mae = mean_absolute_error(y_test.squeeze(), baseline_predictions)
  error_steps = scaler.inverse_transform(np.array(mae).reshape(1, -1))

  results.loc[row] = ["Baseline", "{}_{}".format(c[1], c[2]), mae, rmse, error_steps[0][0]]
  row += 1

In [50]:
results

Unnamed: 0,Model,N_K,MAE,RMSE,Error_Steps
0,Baseline,7_1,0.109887,0.133907,3545.951198
1,Baseline,14_1,0.116042,0.140325,3727.770872
2,Baseline,24_1,0.073331,0.094649,620.280473
3,Baseline,24_6,0.073325,0.094685,620.230618
4,Baseline,24_12,0.073338,0.094732,620.342998
5,Baseline,48_24,0.072413,0.095002,612.531287


## LSTM

In [51]:
def vanilla_LSTM(output_size, n_timestamps, n_features, next_predicted=1):
  model = Sequential()
  model.add(LSTM(output_size, input_shape=(n_timestamps, n_features), activation='tanh'))
  model.add(Dense(next_predicted))
  model.compile(optimizer='adam', loss='mse', metrics=['mean_absolute_error'])
  return model

In [52]:
for c in configurations:
  # Split to training and test
  coef = 0.1
  split_size = int(len(c[0].Steps) * (1 - coef))
  train_data, test_data = c[0].Steps[:split_size], c[0].Steps[split_size:]

  # Normalize the data
  scaler = MinMaxScaler() 
  norm_train = scaler.fit_transform(train_data.to_numpy().reshape(-1, 1))
  norm_test = scaler.transform(test_data.to_numpy().reshape(-1, 1))

  # Prepare
  X_train, y_train = prepare_sequence(norm_train.tolist()+norm_test.tolist()[:c[2]], c[1], c[2])
  X_test, y_test = prepare_sequence(norm_test, c[1], c[2])

  X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
  X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)


  # Build model
  single_model = vanilla_LSTM(1, X_train.shape[1], X_train.shape[2], c[2])
  # Fit the model
  history = single_model.fit(X_train, y_train, epochs=300, validation_split=0.1, shuffle=False, batch_size=6, callbacks=[early_stopping_cb])

  # Predict
  single_predictions = single_model.predict(X_test)

  rmse = sqrt(mean_squared_error(y_test.squeeze(), single_predictions))
  mae = mean_absolute_error(y_test.squeeze(), single_predictions)
  error_steps = scaler.inverse_transform(np.array(mae).reshape(1, -1))

  results.loc[row] = ["Vanilla LSTM", "{}_{}".format(c[1], c[2]), mae, rmse, error_steps[0][0]]

  row += 1

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300

In [53]:
results

Unnamed: 0,Model,N_K,MAE,RMSE,Error_Steps
0,Baseline,7_1,0.109887,0.133907,3545.951198
1,Baseline,14_1,0.116042,0.140325,3727.770872
2,Baseline,24_1,0.073331,0.094649,620.280473
3,Baseline,24_6,0.073325,0.094685,620.230618
4,Baseline,24_12,0.073338,0.094732,620.342998
5,Baseline,48_24,0.072413,0.095002,612.531287
6,Vanilla LSTM,7_1,0.084535,0.112442,2797.085491
7,Vanilla LSTM,14_1,0.085292,0.11362,2819.435088
8,Vanilla LSTM,24_1,0.072876,0.09613,616.441556
9,Vanilla LSTM,24_6,0.072069,0.095411,609.624564


## BLSTM

In [54]:
def BLSTM(output_size, n_timestamps, n_features, next_predicted=1):
  model = Sequential()
  model.add(Bidirectional(LSTM(output_size, activation='tanh'), input_shape=(n_timestamps, n_features)))
  model.add(Dense(next_predicted))
  model.compile(optimizer='adam', loss='mse')
  return model

In [55]:
for c in configurations:
  # Split to training and test
  coef = 0.1
  split_size = int(len(c[0].Steps) * (1 - coef))
  train_data, test_data = c[0].Steps[:split_size], c[0].Steps[split_size:]

  # Normalize the data
  scaler = MinMaxScaler() 
  norm_train = scaler.fit_transform(train_data.to_numpy().reshape(-1, 1))
  norm_test = scaler.transform(test_data.to_numpy().reshape(-1, 1))

  # Prepare
  X_train, y_train = prepare_sequence(norm_train.tolist()+norm_test.tolist()[:c[2]], c[1], c[2])
  X_test, y_test = prepare_sequence(norm_test, c[1], c[2])

  X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
  X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

  # Build model
  single_model = BLSTM(1, X_train.shape[1], X_train.shape[2], c[2])
  # Fit the model
  history = single_model.fit(X_train, y_train, epochs=300, validation_split=0.1, shuffle=False, batch_size=6, callbacks=[early_stopping_cb])

  # Predict
  single_predictions = single_model.predict(X_test)

  rmse = sqrt(mean_squared_error(y_test.squeeze(), single_predictions))
  mae = mean_absolute_error(y_test.squeeze(), single_predictions)
  error_steps = scaler.inverse_transform(np.array(mae).reshape(1, -1))

  results.loc[row] = ["BLSTM", "{}_{}".format(c[1], c[2]), mae, rmse, error_steps[0][0]]

  row += 1

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300


In [56]:
results

Unnamed: 0,Model,N_K,MAE,RMSE,Error_Steps
0,Baseline,7_1,0.109887,0.133907,3545.951198
1,Baseline,14_1,0.116042,0.140325,3727.770872
2,Baseline,24_1,0.073331,0.094649,620.280473
3,Baseline,24_6,0.073325,0.094685,620.230618
4,Baseline,24_12,0.073338,0.094732,620.342998
5,Baseline,48_24,0.072413,0.095002,612.531287
6,Vanilla LSTM,7_1,0.084535,0.112442,2797.085491
7,Vanilla LSTM,14_1,0.085292,0.11362,2819.435088
8,Vanilla LSTM,24_1,0.072876,0.09613,616.441556
9,Vanilla LSTM,24_6,0.072069,0.095411,609.624564


## Simple RNN

In [57]:
def simple_RNN(output_size, n_timestamps, n_features, next_predicted=1):
  model = Sequential()
  model.add(SimpleRNN(output_size, activation='relu', input_shape=(n_timestamps, n_features)))
  model.add(Dense(next_predicted))
  model.compile(optimizer='adam', loss='mse')

  return model

In [58]:
for c in configurations:
  # Split to training and test
  coef = 0.1
  split_size = int(len(c[0].Steps) * (1 - coef))
  train_data, test_data = c[0].Steps[:split_size], c[0].Steps[split_size:]

  # Normalize the data
  scaler = MinMaxScaler() 
  norm_train = scaler.fit_transform(train_data.to_numpy().reshape(-1, 1))
  norm_test = scaler.transform(test_data.to_numpy().reshape(-1, 1))

  # Prepare
  X_train, y_train = prepare_sequence(norm_train.tolist()+norm_test.tolist()[:c[2]], c[1], c[2])
  X_test, y_test = prepare_sequence(norm_test, c[1], c[2])

  X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
  X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

  # Build model
  single_model = simple_RNN(1, X_train.shape[1], X_train.shape[2], c[2])
  # Fit the model
  history = single_model.fit(X_train, y_train, epochs=300, validation_split=0.1, shuffle=False, batch_size=6, callbacks=[early_stopping_cb])

  # Predict
  single_predictions = single_model.predict(X_test)

  rmse = sqrt(mean_squared_error(y_test.squeeze(), single_predictions))
  mae = mean_absolute_error(y_test.squeeze(), single_predictions)
  error_steps = scaler.inverse_transform(np.array(mae).reshape(1, -1))

  results.loc[row] = ["SimpleRNN", "{}_{}".format(c[1], c[2]), mae, rmse, error_steps[0][0]]

  row += 1

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300


In [59]:
results

Unnamed: 0,Model,N_K,MAE,RMSE,Error_Steps
0,Baseline,7_1,0.109887,0.133907,3545.951198
1,Baseline,14_1,0.116042,0.140325,3727.770872
2,Baseline,24_1,0.073331,0.094649,620.280473
3,Baseline,24_6,0.073325,0.094685,620.230618
4,Baseline,24_12,0.073338,0.094732,620.342998
5,Baseline,48_24,0.072413,0.095002,612.531287
6,Vanilla LSTM,7_1,0.084535,0.112442,2797.085491
7,Vanilla LSTM,14_1,0.085292,0.11362,2819.435088
8,Vanilla LSTM,24_1,0.072876,0.09613,616.441556
9,Vanilla LSTM,24_6,0.072069,0.095411,609.624564


## ConvLSTM

In [60]:
def Conv_LSTM(n_seq, n_steps, n_features, next_predicted=1):
  model = Sequential()
  model.add(ConvLSTM2D(filters=64, kernel_size=(1,3), activation='tanh', input_shape=(n_seq, 1, n_steps, n_features)))
  model.add(Flatten())
  model.add(Dense(next_predicted))
  model.compile(optimizer='adam', loss='mse')
  return model

In [61]:
for c in configurations:
  # Split to training and test
  coef = 0.1
  split_size = int(len(c[0].Steps) * (1 - coef))
  train_data, test_data = c[0].Steps[:split_size], c[0].Steps[split_size:]

  # Normalize the data
  scaler = MinMaxScaler() 
  norm_train = scaler.fit_transform(train_data.to_numpy().reshape(-1, 1))
  norm_test = scaler.transform(test_data.to_numpy().reshape(-1, 1))

  # Prepare
  if c[1]%2 != 0:
    X_train, y_train = prepare_sequence(norm_train.tolist()+norm_test.tolist()[:c[2]], c[1]+1, c[2])
    X_test, y_test = prepare_sequence(norm_test, c[1]+1, c[2])
  else:
    X_train, y_train = prepare_sequence(norm_train.tolist()+norm_test.tolist()[:c[2]], c[1], c[2])
    X_test, y_test = prepare_sequence(norm_test, c[1], c[2])

  X_train = X_train.reshape(X_train.shape[0], 2, 1, X_train.shape[1]//2, 1)
  X_test = X_test.reshape(X_test.shape[0], 2, 1, X_test.shape[1]//2, 1)


  # Build model
  single_model = Conv_LSTM(2, X_train.shape[3], X_train.shape[4], c[2])
  # Fit the model
  history = single_model.fit(X_train, y_train, epochs=300, validation_split=0.1, shuffle=False, batch_size=6, callbacks=[early_stopping_cb])

  # Predict
  single_predictions = single_model.predict(X_test)

  rmse = sqrt(mean_squared_error(y_test.squeeze(), single_predictions))
  mae = mean_absolute_error(y_test.squeeze(), single_predictions)
  error_steps = scaler.inverse_transform(np.array(mae).reshape(1, -1))

  results.loc[row] = ["ConvLSTM", "{}_{}".format(c[1], c[2]), mae, rmse, error_steps[0][0]]

  row += 1

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300

In [62]:
results

Unnamed: 0,Model,N_K,MAE,RMSE,Error_Steps
0,Baseline,7_1,0.109887,0.133907,3545.951198
1,Baseline,14_1,0.116042,0.140325,3727.770872
2,Baseline,24_1,0.073331,0.094649,620.280473
3,Baseline,24_6,0.073325,0.094685,620.230618
4,Baseline,24_12,0.073338,0.094732,620.342998
5,Baseline,48_24,0.072413,0.095002,612.531287
6,Vanilla LSTM,7_1,0.084535,0.112442,2797.085491
7,Vanilla LSTM,14_1,0.085292,0.11362,2819.435088
8,Vanilla LSTM,24_1,0.072876,0.09613,616.441556
9,Vanilla LSTM,24_6,0.072069,0.095411,609.624564


# Results

In [63]:
results[results.N_K == "7_1"].sort_values("MAE")

Unnamed: 0,Model,N_K,MAE,RMSE,Error_Steps
6,Vanilla LSTM,7_1,0.084535,0.112442,2797.085491
24,ConvLSTM,7_1,0.085792,0.115768,2834.205266
12,BLSTM,7_1,0.08594,0.112182,2838.575706
18,SimpleRNN,7_1,0.086865,0.112112,2865.919571
0,Baseline,7_1,0.109887,0.133907,3545.951198


In [64]:
results[results.N_K == "14_1"].sort_values("MAE")

Unnamed: 0,Model,N_K,MAE,RMSE,Error_Steps
13,BLSTM,14_1,0.085001,0.113605,2810.856383
7,Vanilla LSTM,14_1,0.085292,0.11362,2819.435088
25,ConvLSTM,14_1,0.087133,0.117528,2873.817309
19,SimpleRNN,14_1,0.097181,0.122449,3170.624488
1,Baseline,14_1,0.116042,0.140325,3727.770872


In [65]:
results[results.N_K == "24_1"].sort_values("MAE")

Unnamed: 0,Model,N_K,MAE,RMSE,Error_Steps
20,SimpleRNN,24_1,0.071976,0.09542,608.836776
8,Vanilla LSTM,24_1,0.072876,0.09613,616.441556
14,BLSTM,24_1,0.072937,0.096107,616.95664
2,Baseline,24_1,0.073331,0.094649,620.280473
26,ConvLSTM,24_1,0.073407,0.096179,620.919354


In [66]:
results[results.N_K == "24_6"].sort_values("MAE")

Unnamed: 0,Model,N_K,MAE,RMSE,Error_Steps
21,SimpleRNN,24_6,0.071964,0.095474,608.736448
9,Vanilla LSTM,24_6,0.072069,0.095411,609.624564
15,BLSTM,24_6,0.072127,0.095207,610.115911
27,ConvLSTM,24_6,0.072231,0.095261,610.993724
3,Baseline,24_6,0.073325,0.094685,620.230618


In [67]:
results[results.N_K == "24_12"].sort_values("MAE")

Unnamed: 0,Model,N_K,MAE,RMSE,Error_Steps
10,Vanilla LSTM,24_12,0.072,0.095288,609.040175
28,ConvLSTM,24_12,0.072004,0.09565,609.070599
22,SimpleRNN,24_12,0.072017,0.095469,609.18552
16,BLSTM,24_12,0.072052,0.095196,609.483145
4,Baseline,24_12,0.073338,0.094732,620.342998


In [68]:
results[results.N_K == "48_24"].sort_values("MAE")

Unnamed: 0,Model,N_K,MAE,RMSE,Error_Steps
29,ConvLSTM,48_24,0.071797,0.095584,607.32211
17,BLSTM,48_24,0.071808,0.095233,607.421864
23,SimpleRNN,48_24,0.07184,0.095356,607.688238
11,Vanilla LSTM,48_24,0.071857,0.095206,607.835647
5,Baseline,48_24,0.072413,0.095002,612.531287


In [84]:
%cd drive/MyDrive/Colab\ Notebooks 

/content/drive/MyDrive/Colab Notebooks


In [85]:
%ls

 ARIMA.ipynb                         dnn.ipynb          StepCountB.csv
 [0m[01;34mCL[0m/                                 Experiment.ipynb   StepCount.csv
'Copy of Copy of Experiment.ipynb'  'explore t.ipynb'   Test_Notebook.ipynb
'Copy of Experiment.ipynb'           [01;34mLSTM[0m/              Thesis.ipynb


In [86]:
GIT_TOKEN = "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"
GIT_USERNAME = "MaxBelitsky" 
GIT_REPOSITORY = "step-counts-prediction" 

#GIT_PATH = "https://{GIT_TOKEN}@github.com/{GIT_USERNAME}/{GIT_REPOSITORY}.git" this return 400 Bad Request for me
GIT_PATH = "https://" + GIT_TOKEN + "@github.com/" + GIT_USERNAME + "/" + GIT_REPOSITORY + ".git"
print("GIT_PATH: ", GIT_PATH)

GIT_PATH:  https://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@github.com/MaxBelitsky/step-counts-prediction.git


In [89]:
%ls

 ARIMA.ipynb                         Experiment.ipynb   [0m[01;34mstep-counts-prediction[0m/
 [01;34mCL[0m/                                'explore t.ipynb'   Test_Notebook.ipynb
'Copy of Copy of Experiment.ipynb'   [01;34mLSTM[0m/              Thesis.ipynb
'Copy of Experiment.ipynb'           StepCountB.csv
 dnn.ipynb                           StepCount.csv


In [88]:
! git clone "{GIT_PATH}"

Cloning into 'step-counts-prediction'...
remote: Enumerating objects: 4, done.[K
remote: Counting objects: 100% (4/4), done.[K
remote: Compressing objects: 100% (3/3), done.[K
remote: Total 4 (delta 0), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (4/4), done.


In [90]:
%cd step-counts-prediction/

/content/drive/My Drive/Colab Notebooks/step-counts-prediction


In [92]:
%ls

'Copy of Copy of Experiment.ipynb'   README.md   StepCountB.csv   StepCount.csv


In [93]:
! git branch

* [32mmain[m
