# CSE 535 Project- CGM Prediction | RNN Algorithm

## Variable Exploration

In [757]:
#Importing Libraries
from pandas import DataFrame
from pandas import concat
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import LSTM
import pandas as pd
import timeit
import numpy as np
np.random.seed(7)

from matplotlib import pyplot as plt

In [758]:
#Importing data
df = pd.read_csv('CSE535ProjectData.csv')
df2 = pd.read_csv('NewPatientCSE535ProjectData.csv')
print(df.info())
print(df2.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16384 entries, 0 to 16383
Data columns (total 3 columns):
Bolus    12960 non-null float64
CGM      15803 non-null float64
Meal     16384 non-null float64
dtypes: float64(3)
memory usage: 384.1 KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16384 entries, 0 to 16383
Data columns (total 3 columns):
Bolus    16384 non-null float64
CGM      15651 non-null float64
Meal     16384 non-null float64
dtypes: float64(3)
memory usage: 384.1 KB
None


In [759]:
#Printing Dataset to check for null values
print(df.isnull().sum())
print(df2.isnull().sum())

Bolus    3424
CGM       581
Meal        0
dtype: int64
Bolus      0
CGM      733
Meal       0
dtype: int64


In [760]:
#Getting rid of null values
df = df.dropna(axis = 0)
df2 = df2.dropna(axis = 0)

In [761]:
#Making datasets same length
df2 = df2.iloc[:len(df), :]
df2

Unnamed: 0,Bolus,CGM,Meal
0,0.00,136.0,0.0
1,0.00,135.0,0.0
2,0.00,135.0,0.0
3,0.00,135.0,0.0
4,0.00,136.0,0.0
...,...,...,...
12966,0.00,226.0,0.0
12967,0.00,228.0,0.0
12968,0.08,231.0,0.0
12969,0.00,232.0,0.0


## Data Preperation (Task 1)

In [762]:
#Converting series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True): #24 timesteps is every 2 hours
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()
     
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
        
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
            
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [763]:
#Feature Scaling (normalizing data)
values = df.iloc[:,1:3].values 
values2 = df2.iloc[:,1:3].values
values = values.astype('float')
values2 = values2.astype('float')

scaler = MinMaxScaler(feature_range = (0,0.99)) 
scaled = scaler.fit_transform(values)
scaled2 = scaler.fit_transform(values2)
scaled

array([[0.264  , 0.     ],
       [0.26125, 0.     ],
       [0.2585 , 0.     ],
       ...,
       [0.27225, 0.     ],
       [0.2695 , 0.     ],
       [0.2695 , 0.99   ]])

In [764]:
#Retrieving data from previous 24 timesteps (2 hours)
reframed = series_to_supervised(scaled, 24, 1) 
reframed2 = series_to_supervised(scaled2, 24,1)

#Dropping columns we don't want to predict
reframed.drop(reframed.columns[[48]], axis = 1, inplace = True) 
reframed2.drop(reframed2.columns[[48]], axis = 1, inplace = True) 
print(reframed.head())

    var1(t-24)  var2(t-24)  var1(t-23)  var2(t-23)  var1(t-22)  var2(t-22)  \
24     0.26400         0.0     0.26125         0.0     0.25850         0.0   
25     0.26125         0.0     0.25850         0.0     0.25575         0.0   
26     0.25850         0.0     0.25575         0.0     0.24750         0.0   
27     0.25575         0.0     0.24750         0.0     0.24475         0.0   
28     0.24750         0.0     0.24475         0.0     0.23925         0.0   

    var1(t-21)  var2(t-21)  var1(t-20)  var2(t-20)  ...  var2(t-5)  var1(t-4)  \
24     0.25575         0.0     0.24750         0.0  ...        0.0    0.24750   
25     0.24750         0.0     0.24475         0.0  ...        0.0    0.24475   
26     0.24475         0.0     0.23925         0.0  ...        0.0    0.24200   
27     0.23925         0.0     0.23650         0.0  ...        0.0    0.23925   
28     0.23650         0.0     0.23925         0.0  ...        0.0    0.23100   

    var2(t-4)  var1(t-3)  var2(t-3)  var1(t-

In [765]:
#Splitting data into train and test sets
reframedValues = reframed.values
reframedValues2 = reframed2.values

n_train_days = int(len(df) * .85) #85% data is train, 15% test

train = reframedValues[:n_train_days, :]
test = reframedValues[n_train_days+1:12482, :]

test2 = reframedValues2[:, :]

In [766]:
#Assigning inputs and output datasets
train_X, train_y = train[:, :-1], train[:, -1]
test_X, test_y = test[:, :-1], test[:, -1]

test_X2, test_y2 = test2[:, :-1], test2[:, -1]

In [767]:
#Reshaping input to be 3 dimensions for RNN(samples, timesteps, features)
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))

test_X2 = test_X2.reshape((test_X2.shape[0], 1, test_X2.shape[1]))

print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

(10609, 1, 48) (10609,) (1848, 1, 48) (1848,)


## RNN Algorithm Creation (Task 2)

In [768]:
#Algorithm Instantiation
model = Sequential()

#Algorithm Development
model.add(LSTM(100, input_shape=(train_X.shape[1], train_X.shape[2]))) #Recurrent Layer
model.add(Dropout(0.4)) #Dropout Layer
model.add(Dense(15, activation = 'tanh')) #Fully Connected Layer
moddel.add(Dense(1, activation = 'sigmoid')) #Output Layer
model.compile(loss='sparse_categorical_crossentropy', optimizer= 'adam', metrics=['accuracy']) #Compiling the model

In [769]:
#Algorithm Implementation
def algorithm(): 
    history = model.fit(train_X, train_y, epochs = 3, batch_size=50, validation_data=(test_X, test_y), verbose=2, shuffle=False)
    #Best is epochs = 3, batch_size = 50

## Evaluating Model Performance (Task 2 )

In [770]:
#Training Accuracy
m = algorithm()
result1 = model.evaluate(train_X, train_y)
percentage = "{:.1%}".format(result1[1])
print("Our training accuracy is " + str(percentage))

Train on 10609 samples, validate on 1848 samples
Epoch 1/3
 - 11s - loss: 0.0000e+00 - acc: 0.8453 - val_loss: 0.0000e+00 - val_acc: 0.9594
Epoch 2/3
 - 2s - loss: 0.0000e+00 - acc: 0.8491 - val_loss: 0.0000e+00 - val_acc: 0.9594
Epoch 3/3
 - 2s - loss: 0.0000e+00 - acc: 0.8447 - val_loss: 0.0000e+00 - val_acc: 0.9594
Our training accuracy is 95.4%


In [771]:
#Testing Accuracy
result2 = model.evaluate(test_X, test_y)
percentage = "{:.1%}".format(result2[1])
print("Our testing accuracy is " + str(percentage))

Our testing accuracy is 95.9%


## Applying algorithm to new patient (Task 3): 

In [772]:
history = model.fit(train_X, train_y, epochs = 3, batch_size=50, validation_data=(test_X2, test_y2), verbose=2, shuffle=False) 

Train on 10609 samples, validate on 12458 samples
Epoch 1/3
 - 3s - loss: 0.0000e+00 - acc: 0.8395 - val_loss: 0.0000e+00 - val_acc: 0.9672
Epoch 2/3
 - 2s - loss: 0.0000e+00 - acc: 0.8486 - val_loss: 0.0000e+00 - val_acc: 0.9672
Epoch 3/3
 - 2s - loss: 0.0000e+00 - acc: 0.8448 - val_loss: 0.0000e+00 - val_acc: 0.9672


In [773]:
#Training Accuracy New Patient
result3 = model.evaluate(test_X2, test_y2)
percentage2 = "{:.1%}".format(result3[1])
print("Our testing accuracy for a new patient is " + str(percentage2))

Our testing accuracy for a new patient is 96.7%


## Execution Time Analysis (Task 4)

In [774]:
print("RNN Algorithm Execution Time: " + str(timeit.timeit(algorithm, number = 1)))

Train on 10609 samples, validate on 1848 samples
Epoch 1/3
 - 2s - loss: 0.0000e+00 - acc: 0.8488 - val_loss: 0.0000e+00 - val_acc: 0.9594
Epoch 2/3
 - 2s - loss: 0.0000e+00 - acc: 0.8484 - val_loss: 0.0000e+00 - val_acc: 0.9594
Epoch 3/3
 - 2s - loss: 0.0000e+00 - acc: 0.8486 - val_loss: 0.0000e+00 - val_acc: 0.9594
RNN Algorithm Execution Time: 5.672522912995191
