## OpenAI Hackathon: Health Data

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from pathlib import Path
import re

ROOT_DIR = Path.cwd()
GYRO_CSV = ROOT_DIR / 'Anand-history.csv'

# Read data from CSV
gyro = pd.read_csv(GYRO_CSV)

## Correlation

In [None]:
# Correlation matrix using Seaborn
corrmat = gyro.corr() #weight_data.corr()
f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(corrmat, vmax=.8, square=True);

## Possible Next Steps

Try to predict future health information

Sources:
 - [1] https://github.com/floydhub/time-sequence-prediction

In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import numpy as np

# Params
NUM_EPOCH = 8
LR = 0.01

# Columns used for prediction and columns to be predicted
IN_COL = ['steps', 'commits']
OUT_COL = ['weight']

# CUDA?
CUDA = torch.cuda.is_available()

# Model
class Sequence(nn.Module):
    def __init__(self):
        super(Sequence, self).__init__()
        self.lstm1 = nn.LSTMCell(1, 51)
        self.lstm2 = nn.LSTMCell(51, 1)
        if CUDA:
            self.lstm1, self.lstm2 = self.lstm1.cuda(), self.lstm2.cuda()

    def forward(self, input, future = 0):
        outputs = []
        h_t = Variable(torch.zeros(input.size(0), 51).double(), requires_grad=False)
        c_t = Variable(torch.zeros(input.size(0), 51).double(), requires_grad=False)
        h_t2 = Variable(torch.zeros(input.size(0), 1).double(), requires_grad=False)
        c_t2 = Variable(torch.zeros(input.size(0), 1).double(), requires_grad=False)
        if CUDA:
            h_t, c_t, h_t2, c_t2 = h_t.cuda(), c_t.cuda(), h_t2.cuda(), c_t2.cuda()

        # Iterate over columns
        for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):
            h_t, c_t = self.lstm1(input_t, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            outputs += [h_t2]

        # Begin with the test input and continue for steps in range(future) predictions
        for i in range(future):# if we should predict the future
            h_t, c_t = self.lstm1(h_t2, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            outputs += [h_t2]
        # Compact the list of predictions
        outputs = torch.stack(outputs, 1).squeeze(2)
        return outputs


# set ramdom seed to 0
np.random.seed(0)
torch.manual_seed(0)

# Input and target data are from csv
input_data = gyro[IN_COL].as_matrix()
target_data = gyro[OUT_COL].as_matrix()
predict_from = 200 # Use data before this to train, predict points after this

# Train data (from index 0 to predict_from)
input = Variable(torch.from_numpy(input_data[predict_from:]), requires_grad=False)
target = Variable(torch.from_numpy(target_data[predict_from:]), requires_grad=False)
if CUDA:
    input, target = input.cuda(), target.cuda()

# Test Data (from index predict_from onwards)
test_input = Variable(torch.from_numpy(input_data[:predict_from]), requires_grad=False)
test_target = Variable(torch.from_numpy(target_data[:predict_from]), requires_grad=False)
if CUDA:
    test_input, test_target = test_input.cuda(), test_target.cuda()

# build the model
seq = Sequence()
seq.double()
criterion = nn.MSELoss()
if CUDA:
    criterion.cuda()

# use LBFGS as optimizer since we can load the whole data to train
optimizer = optim.LBFGS(seq.parameters(), lr=LR)

In [None]:
# Actual Training
for i in range(NUM_EPOCH):
    print('STEP: ', i)
    def closure():
        optimizer.zero_grad()
        out = seq(input)
        loss = criterion(out, target)
        print('loss:', loss.data.cpu().numpy()[0])
        loss.backward()
        return loss.float()
    optimizer.step(closure)
    # begin to predict
    future = 1000
    pred = seq(test_input, future = future)
    loss = criterion(pred[:, :-future], test_target)
    print('test loss:', loss.data.cpu().numpy()[0])
    y = pred.data.cpu().numpy()
#     # draw the result
#     plt.figure(figsize=(30,10))
#     plt.title('Predict future values for time sequences\n(Dashlines are predicted values)', fontsize=30)
#     plt.xlabel('x', fontsize=20)
#     plt.ylabel('y', fontsize=20)
#     plt.xticks(fontsize=20)
#     plt.yticks(fontsize=20)
#     def draw(yi, color):
#         plt.plot(np.arange(input.size(1)), yi[:input.size(1)], color, linewidth = 2.0)
#         plt.plot(np.arange(input.size(1), input.size(1) + future), yi[input.size(1):], color + ':', linewidth = 2.0)
#     draw(y[0], 'r')
#     draw(y[1], 'g')
#     draw(y[2], 'b')
#     plt.close()

# Do checkpointing - Is saved in outf
torch.save(seq.state_dict(), 'saved_model.pt')