## Load Data - Subaru + VW

In [2]:
#DATASET PREPERATION
%matplotlib inline
import torch
import random
import torch.nn as nn
import pandas as pd
import numpy as np
from torchvision import transforms
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torch import autograd
from torch.autograd import Variable
from torchvision.utils import make_grid
import matplotlib.pyplot as plt
import scipy.io as sio

data = sio.loadmat('../accelane_mat_v3/SBSB_6Khz_preprocessed_AccZ.mat')
start_no = 0
range_len = 800000
test_set = np.zeros((1,range_len))
test_datapoints = [4,7,14,17,25,21,36,33]
num_datapoints = 20
labels = []
for i in range (num_datapoints):     
    if i < 10:
        labels.append(0)
    else:
        labels.append(1)
    varName = "accZ_L{}T{}_p".format(2 if i < 10 else 3, (i%10)+1)
    curr = np.asarray(data[varName][start_no : start_no+range_len]).reshape(1,range_len)
    if i in test_datapoints:
        test_set = np.append(test_set, curr, axis = 0)
data = sio.loadmat('../accelane_mat_v3/VWVW_preprocessed_AccZ.mat')
for i in range (num_datapoints):
    if i < 10:
        labels.append(0)
    else:
        labels.append(1)
    varName = "accZ_L{}T{}_p".format(2 if i < 10 else 3, (i%10)+1)
    curr = np.asarray(data[varName][start_no : start_no+range_len]).reshape(1,range_len)
    if i in test_datapoints:
        test_set = np.append(test_set, curr, axis = 0)

test_set = np.delete(test_set, (0), axis=0)
training_size = (num_datapoints*2) - len(test_datapoints)
test_labels = []
for i in np.arange(len(labels)):
    if i in test_datapoints:
        test_labels.append(labels[i])
test_labels = np.asarray(test_labels).reshape(len(test_datapoints),1)
test_set = np.append(test_set, test_labels, axis = 1)
test_targets_numpy = test_set[:,-1]
test_features_numpy = test_set[:,:-1]
print("Test Features numpy {}".format(test_features_numpy.shape))
print("Test Targets numpy {}".format(test_targets_numpy.shape))
print("Data points chosen for test: {}".format(test_datapoints))

Test Features numpy (8, 800000)
Test Targets numpy (8,)
Data points chosen for test: [4, 7, 14, 17, 25, 21, 36, 33]


## Load Training and Validation Data

In [2]:
import tables
train_features_numpy_filename = "data/TrainFeatures800K.h5"
# train_features_second_numpy_filename = "data/TrainFeatures800K2.h5"
train_targets_numpy_filename = "data/TrainLabels800K.h5"

try:
    train_features_file = tables.open_file(train_features_numpy_filename, mode='r')
#     train_features_second_file = tables.open_file(train_features_second_numpy_filename, mode='r')
    train_targets_file = tables.open_file(train_targets_numpy_filename, mode='r')
    train_features_file.root.data.truncate(296952)

    print("Num Samples In Storage: Features {}, Labels {}".format(train_features_file.root.data.shape, train_targets_file.root.data.shape))
    np.random.seed(10)
    index_list = np.random.randint(296952,size=500)
    train_len = int(len(index_list) * 0.8)
    train_features_numpy_tr = (train_features_file.root.data[i] for i in index_list[:train_len])
    train_targets_numpy_tr = (train_targets_file.root.data[i] for i in index_list[:train_len])
    train_features_numpy_val = (train_features_file.root.data[i] for i in index_list[train_len:])
    train_targets_numpy_val = (train_targets_file.root.data[i] for i in index_list[train_len:])
finally:
    pass
#     train_features_file.close()
#     train_targets_file.close()
#     train_features_second_file.close()


Num Samples In Storage: Features (296952, 800000), Labels (371712, 1)


1) for a given epoch
    take a drive
    split them into 20k lengths based on stride
    batch these values together
    train the lstm on this batch
    reset state

## Configuration

In [17]:
num_layers = 2
output_dim = 2
hidden_dim = 2#200
num_classes = 2
input_dim = 100000
seq_length = 8
num_epochs = 1
embedding_dim = 991#491
lr = 0.01
stride = input_dim
batch_size = int((range_len-(seq_length*input_dim))/stride + 1)
print("batch_size for model: ", batch_size)

batch_size for model:  1


## Batching Function

In [25]:
class DoBatches:
    def __init__(self, length, stride, signal):
        self.length = length
        self.stride = stride
        self.signal = signal
        self.curr_index = 0
    def getNextBatch(self):
        if (self.curr_index >= len(self.signal)):
            return
        upper_lim = min(self.curr_index + self.length, len(self.signal))
        to_ret =  self.signal[self.curr_index:upper_lim, 0]
        self.curr_index += self.stride
        done = False
        if (self.length > len(to_ret)):
            to_ret = np.pad(to_ret, (0,self.length - len(to_ret)), 'constant', constant_values=(0))
            done = True
        return to_ret.reshape(self.length, 1), done

## Model Design

In [36]:
class LSTMNet(nn.Module):

    def __init__(self, input_dim, embedding_dim, seq_length, hidden_dim, batch_size, output_dim, num_layers = 1):
        super(LSTMNet, self).__init__()
        self.input_dim = input_dim 
        self.embedding_dim = embedding_dim 
        self.seq_length = seq_length 
        self.hidden_dim = hidden_dim 
        self.batch_size = batch_size 
        self.num_layers = num_layers 
        self.output_dim = output_dim 
        
        self.embedding = nn.Sequential( # 10,000
            
            nn.AvgPool1d(1000, stride = 100)

        )

        self.lstm = nn.GRU(input_size = self.embedding_dim, hidden_size = self.hidden_dim, num_layers = self.num_layers, batch_first = True)
        
        #self.fc = nn.Linear(self.hidden_dim, self.output_dim)

    def init_hidden(self):
        return Variable(torch.zeros(self.num_layers, self.batch_size, self.hidden_dim), requires_grad = False)
    
    def forward(self, x):
        
        x = self.embedding(x)
        
        lstm_out, self.hidden = self.lstm(x)
        
       # out = self.fc(self.hidden[-1].view(-1,self.hidden_dim))
        
        return lstm_out#self.hidden[-1]


In [37]:
model = LSTMNet(input_dim = input_dim, 
                embedding_dim = embedding_dim,
                seq_length = seq_length,
                hidden_dim = hidden_dim,
                batch_size = batch_size, 
                output_dim = output_dim, 
                num_layers = num_layers)

# model  = model.cuda()

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=lr)

## Model Training

In [38]:
import tables
num_epochs = 10
loss_list = []
iteration_list = []
accuracy_list = []

train_features_numpy_filename = "data/TrainFeatures800K.h5"
train_targets_numpy_filename = "data/TrainLabels800K.h5"
np.random.seed(10)
train_features_file = tables.open_file(train_features_numpy_filename, mode='r')
train_targets_file = tables.open_file(train_targets_numpy_filename, mode='r')
index_list = np.arange(5192)
np.random.shuffle(index_list)
train_sample_size = 1000
index_list = index_list[:train_sample_size]
train_len = int(.9*train_sample_size)
# max_fea = 0
# min_fea = 0

# for i in range(0, train_sample_size, 100):
#     data_to_train = train_features_file.root.data[i:i+100] #100X800000
#     max_fea = max(max_fea, np.max(data_to_train))
#     min_fea = min(min_fea, np.min(data_to_train))
# scale = (1 - (-1))/(max_fea - min_fea) 

for epoch in range(num_epochs):  
    train_features_numpy_tr = (train_features_file.root.data[i] for i in index_list[:train_len])
    train_targets_numpy_tr = (train_targets_file.root.data[i] for i in index_list[:train_len])
    #training through dataset
    drive_num = 0
    for (drive, label) in zip(train_features_numpy_tr, train_targets_numpy_tr):   
        model.hidden = model.init_hidden()
        #Rescale data so that max is <= 1
        #drive = drive*scale + (-1) - min_fea*scale 
        batching_obj = DoBatches(length=input_dim*seq_length, stride=stride,signal = drive.reshape(800000,1))
        
        this_drive_batches = np.zeros((input_dim*seq_length,1))
        this_batch, is_done = batching_obj.getNextBatch()
        num_batches = 0
        while(this_batch is not None and this_batch.shape[0] > 0 and not is_done):
            num_batches += 1
            this_drive_batches = np.append(this_drive_batches, this_batch, axis = 1)
            this_batch, is_done = batching_obj.getNextBatch()
        this_drive_batches = np.delete(this_drive_batches, (0), axis=1) 
        
        features_curr_drive_batch_tensor = torch.from_numpy(np.array(this_drive_batches)).type(torch.FloatTensor)
        
        train = Variable(features_curr_drive_batch_tensor.view((batch_size,seq_length,input_dim)))
        
#        labels = np.repeat(int(label),num_batches)
        labels = np.full((num_batches*seq_length,),int(label))
        targets_curr_drive_batch_tensor = torch.from_numpy(labels).type(torch.LongTensor)
        targets = Variable(targets_curr_drive_batch_tensor)
        optimizer.zero_grad()
        outputs = model(train)  
        outputs = outputs.reshape(-1,2)
        loss = criterion(outputs, targets)
        #s = torch.sum(model.lstm.weight_hh_l0.chunk(3,0)[1])
        loss.backward()
        optimizer.step()   
        drive_num += 1

        if (drive_num%10 == 0):
            #validation
            correct = 0
            total = 0
            train_features_numpy_val = (train_features_file.root.data[i] for i in index_list[train_len:])
            train_targets_numpy_val = (train_targets_file.root.data[i] for i in index_list[train_len:])
            for test_drive, test_label in zip(train_features_numpy_val, train_targets_numpy_val):        
                #test_drive = test_drive*scale + (-1) - min_fea *scale 
                total += 1
                batching_obj = DoBatches(length=input_dim*seq_length, stride=stride,signal = test_drive.reshape(800000,1))

                this_drive_batches = np.zeros((input_dim*seq_length,1))

                this_batch, is_done = batching_obj.getNextBatch()
                num_batches = 0
                while(this_batch is not None and this_batch.shape[0] > 0 and not is_done):
                    num_batches += 1
                    this_drive_batches = np.append(this_drive_batches, this_batch, axis = 1)
                    this_batch, is_done = batching_obj.getNextBatch()


                this_drive_batches = np.delete(this_drive_batches, (0), axis=1) 

                features_curr_drive_batch_tensor = torch.from_numpy(np.array(this_drive_batches)).type(torch.FloatTensor)

                test = Variable(features_curr_drive_batch_tensor.view((batch_size,seq_length,input_dim)))

                test_labels = np.repeat(int(test_label),num_batches)
                
                #test_labels = np.full((num_batches*seq_length,),int(label))
        
                outputs = model(test)
                
                #outputs = outputs.reshape(-1, num_classes)
                outputs = outputs[:, -1]
                predicted = torch.max(outputs.data, 1)[1].data.numpy()
                correct += (predicted == test_labels).sum()
            accuracy = 100*correct/float(total*batch_size)
            print("Epoch: {} Drives Done: {} Accuracy: {}".format(epoch, drive_num, accuracy))
    
    #loss_list.append(loss.data)
    #accuracy_list.append(accuracy)

    #print('Epoch: {} Loss: {} Accuracy: {} %'.format(epoch+1, loss.data.item(), accuracy))

# plt.subplot(2,1,1)
# plt.title('Final Loss curve')
# plt.plot(loss_list)
# plt.subplot(2,1,2)
# plt.title('Final Validation Curve')
# plt.plot(accuracy_list)
train_features_file.close()
train_targets_file.close()

Epoch: 0 Drives Done: 10 Accuracy: 53.0
Epoch: 0 Drives Done: 20 Accuracy: 53.0
Epoch: 0 Drives Done: 30 Accuracy: 62.0
Epoch: 0 Drives Done: 40 Accuracy: 73.0
Epoch: 0 Drives Done: 50 Accuracy: 81.0
Epoch: 0 Drives Done: 60 Accuracy: 73.0
Epoch: 0 Drives Done: 70 Accuracy: 77.0
Epoch: 0 Drives Done: 80 Accuracy: 85.0
Epoch: 0 Drives Done: 90 Accuracy: 85.0
Epoch: 0 Drives Done: 100 Accuracy: 88.0
Epoch: 0 Drives Done: 110 Accuracy: 86.0
Epoch: 0 Drives Done: 120 Accuracy: 87.0
Epoch: 0 Drives Done: 130 Accuracy: 79.0
Epoch: 0 Drives Done: 140 Accuracy: 80.0
Epoch: 0 Drives Done: 150 Accuracy: 83.0
Epoch: 0 Drives Done: 160 Accuracy: 82.0
Epoch: 0 Drives Done: 170 Accuracy: 84.0
Epoch: 0 Drives Done: 180 Accuracy: 89.0
Epoch: 0 Drives Done: 190 Accuracy: 90.0
Epoch: 0 Drives Done: 200 Accuracy: 90.0
Epoch: 0 Drives Done: 210 Accuracy: 89.0
Epoch: 0 Drives Done: 220 Accuracy: 89.0
Epoch: 0 Drives Done: 230 Accuracy: 91.0
Epoch: 0 Drives Done: 240 Accuracy: 89.0
Epoch: 0 Drives Done: 250

Epoch: 2 Drives Done: 220 Accuracy: 99.0
Epoch: 2 Drives Done: 230 Accuracy: 99.0
Epoch: 2 Drives Done: 240 Accuracy: 99.0
Epoch: 2 Drives Done: 250 Accuracy: 99.0
Epoch: 2 Drives Done: 260 Accuracy: 99.0
Epoch: 2 Drives Done: 270 Accuracy: 99.0
Epoch: 2 Drives Done: 280 Accuracy: 99.0
Epoch: 2 Drives Done: 290 Accuracy: 99.0
Epoch: 2 Drives Done: 300 Accuracy: 99.0
Epoch: 2 Drives Done: 310 Accuracy: 99.0
Epoch: 2 Drives Done: 320 Accuracy: 99.0
Epoch: 2 Drives Done: 330 Accuracy: 99.0
Epoch: 2 Drives Done: 340 Accuracy: 99.0
Epoch: 2 Drives Done: 350 Accuracy: 99.0
Epoch: 2 Drives Done: 360 Accuracy: 99.0
Epoch: 2 Drives Done: 370 Accuracy: 99.0
Epoch: 2 Drives Done: 380 Accuracy: 99.0
Epoch: 2 Drives Done: 390 Accuracy: 99.0
Epoch: 2 Drives Done: 400 Accuracy: 99.0
Epoch: 2 Drives Done: 410 Accuracy: 99.0
Epoch: 2 Drives Done: 420 Accuracy: 99.0
Epoch: 2 Drives Done: 430 Accuracy: 99.0
Epoch: 2 Drives Done: 440 Accuracy: 99.0
Epoch: 2 Drives Done: 450 Accuracy: 99.0
Epoch: 2 Drives 

KeyboardInterrupt: 

## Model Testing

In [39]:
test_features = torch.from_numpy(test_features_numpy).type(torch.FloatTensor)
test_targets = torch.from_numpy(test_targets_numpy).type(torch.FloatTensor)
test_totalDataset = torch.utils.data.TensorDataset(test_features,test_targets)
batch_size_test = test_features_numpy.shape[0]
test_loader = torch.utils.data.DataLoader(test_totalDataset, batch_size = batch_size_test, shuffle = True)


print('Testing dataset dimensions')
print('Shape of Features Dataset')
print(test_features.size())
print('Shape of Targets Dataset')
print(test_targets.size())
print("Batch Size: {}".format(batch_size_test))

Testing dataset dimensions
Shape of Features Dataset
torch.Size([8, 800000])
Shape of Targets Dataset
torch.Size([8])
Batch Size: 8


In [56]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

accuracy_list = []
f1_score_list = []
precision_list = []
recall_list = []
y_pred =[]
y_true = []
correct = 0
total = 0
with torch.no_grad():
    for test_drive, test_label in zip(test_features_numpy, test_targets_numpy):            
        total += 1
        batching_obj = DoBatches(length=input_dim*seq_length, stride=stride,signal = test_drive.reshape(800000,1))

        this_drive_batches = np.zeros((input_dim*seq_length,1))

        this_batch, is_done = batching_obj.getNextBatch()
        num_batches = 0
        while(this_batch is not None and this_batch.shape[0] > 0 and not is_done):
            num_batches += 1
            this_drive_batches = np.append(this_drive_batches, this_batch, axis = 1)
            this_batch, is_done = batching_obj.getNextBatch()


        this_drive_batches = np.delete(this_drive_batches, (0), axis=1) 

        features_curr_drive_batch_tensor = torch.from_numpy(np.array(this_drive_batches)).type(torch.FloatTensor)

        test = Variable(features_curr_drive_batch_tensor.view((batch_size,seq_length,input_dim)))

        test_labels = np.repeat(int(test_label),num_batches)

        #test_labels = np.full((num_batches*seq_length,),int(label))

        outputs = model(test)

        #outputs = outputs.reshape(-1, num_classes)
        outputs = outputs[:, -1]
        predicted = torch.max(outputs.data, 1)[1].data.numpy()
        correct += (predicted == test_labels).sum()
    accuracy = 100*correct/float(total*batch_size)
    print(accuracy)
#     for i, data in enumerate(test_loader):
#             samples, labels = data
#             samples = Variable(samples.view(batch_size_test,1,-1))
            
#             model.eval()
            
#             outputs = model(samples)
#             outputs = outputs.view(batch_size_test, n_classes)
            
#             predictions = torch.argmax(outputs, 1)
#             targets = labels
            
#             y_pred.extend(predictions)
#             y_true.extend(targets)


# conf_matrix = confusion_matrix(y_true, y_pred)
# print(conf_matrix)
# print("Accuracy Score: {}".format(accuracy_score(y_true, y_pred) * 100))
# print("F1 Score: {}". format(f1_score(y_true, y_pred) * 100))
# print("Precision Score: {}".format(precision_score(y_true, y_pred) * 100))
# print("Recall Score: {}".format(recall_score(y_true, y_pred)* 100))

62.5


In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(2, 5, sharex=True, sharey=True)

for i in range(10):
        ax[i%2][i%5].plot(np.arange(50000),vw_features_numpy[i,:])
plt.title("Volkswagen Lane 2")


In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(2, 5, sharex=True, sharey=True)

for i in range(10):
        ax[i%2][i%5].plot(np.arange(50000),features_numpy[i,:])
plt.title("Subaru Lane 2")