In [4]:
#code for AnDi 

In [5]:
import torch 
from torch import nn
import numpy as np
import os
import datetime # for timing


In [6]:
#for simplicity I will include all classes and functions used in this document

In [7]:
#creates and early stopping object
#see: https://github.com/Bjarten/early-stopping-pytorch
#used for training the model
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print            
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

In [8]:
#code modified from:
#https://github.com/Bjarten/early-stopping-pytorch/blob/master/MNIST_Early_Stopping_example.ipynb
#modified to add drop_last = True so that data size doesnt
#have to exactly dividsible by the batch size

def create_loaders(train_data, test_data, batch_size):

    # percentage of training set to use as validation
    valid_size = 0.2

    # obtain training indices that will be used for validation
    num_train = len(train_data)
    indices = list(range(num_train))
    np.random.shuffle(indices)
    split = int(np.floor(valid_size * num_train))
    train_idx, valid_idx = indices[split:], indices[:split]
    
    # define samplers for obtaining training and validation batches
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(train_idx)
    valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(valid_idx)
    
    # load training data in batches
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                               sampler=train_sampler,
                                               num_workers=0,
                                               drop_last=True)
    
    # load validation data in batches
    valid_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                               sampler=valid_sampler,
                                               num_workers=0,
                                               drop_last=True)
    
    # load test data in batches
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=batch_size,
                                              num_workers=0,
                                              drop_last=True)
    
    return train_loader, test_loader, valid_loader

In [9]:
class ConejeroConvNet(nn.Module):
    def __init__(self, input_size, batch_size, hidden_size, device = 'cuda', output_size = 5):
        super().__init__()#can call like this as of python3

        #initialize some parameters
        self.input_size = input_size #length of seq or trajectory. also known as feature
        self.hidden_size = hidden_size #same as num neuron
        self.batch_size = batch_size 
        self.device = device
        self.output_size = output_size
        
        #more parameters for tuning
        self.conv_kernel_size = 3
        self.drop_prob = .2
        self.mp_kernel_size = 2
        self.num_lstm_layers = 3
        
        #do the Layers----------------

        #convolutional layers
        #Dropout
        #introduced between 2 fully connected layers to introduce non-linearity
        #and reduce over fitting
        #outputs are scaled by factor 1/1-p  
        #MaxPooling
        #reduces the speatial size of the convolved features
        #helps to reduce over fitting 
        
        self.ConvBlock = nn.Sequential(
            nn.Conv1d(in_channels = 1, out_channels = 20, kernel_size = self.conv_kernel_size),
            nn.ReLU(),
            nn.Conv1d(in_channels = 20 , out_channels = 64, kernel_size = self.conv_kernel_size),
            nn.ReLU(),
            nn.Dropout(p=self.drop_prob),
            nn.MaxPool1d(kernel_size=self.mp_kernel_size)
        )
        conBlockOutDim = int(64/2)*int((self.input_size-2*self.conv_kernel_size+2))
        
        
        #add lstm layer
        self.bi_lstm = nn.LSTM(input_size=int((self.input_size-2*self.conv_kernel_size+2)/2), 
                               hidden_size= self.hidden_size, bidirectional = True, 
                               num_layers = self.num_lstm_layers,
                               batch_first = True)
        
          #LinearLayers
        self.linearOuts = nn.Sequential(
            nn.Linear(2*self.hidden_size*2*self.batch_size, 1000),
            nn.ReLU(),
            nn.Linear(1000, 50),
            nn.ReLU(),
            nn.Linear(50, self.output_size)
        )#two hidden get concatenated
        
    def init_hidden(self): #num batch is number of batches not batch size
        # (num_layers, batch_size, n_neurons)
        #using one instead of num_batch becuase I can only process 
        #onebatch at a time to do the fast weights
        return (torch.zeros(2*self.num_lstm_layers, self.batch_size, self.hidden_size))
               
    def forward(self, X):
        #initialize hidden and cellState
        self.hidden = self.init_hidden().to(device)
        self.cellState = self.init_hidden().to(device)
        
        
        #convolutional
        out = self.ConvBlock(X)
        
        #add lstm layer
        #print('convblock out dim: ', out.size())
        out, (self.hidden, self.cellState) = self.bi_lstm(out)
        #print('out size: ', out.size())
        #reshape so I can feed to linear layer
        out = out.contiguous().view(out.size(0),-1)
        #print('out size: ', out.size())
        #linear Layer with RELU activation function
        out = self.linearOuts(out)
        return out

In [10]:
#### this code here is for importing data for training
#csv file should be csv file from competition i.e. task2 dimension 1 
#the csv should have the first column(dimension ) removed
#the csv is already padded to max length of 1000
#read in the data from the csv
task = torch.from_numpy(np.genfromtxt('300KT2D1task2_2.csv', delimiter = ',')).unsqueeze(1)
#task data will have form torch.Size([189810, 1, 1000]) where 189810 is number of trajectories
#name is misleading because I ran out of RAM when writing 300k actually has only 189k trajectories

#simply remove the first column(dimension)
ref = torch.from_numpy(np.genfromtxt('300KT2D1ref2_2.csv', delimiter = ','))

#too add more training data simply import more and run the loop to add it to the tuple
#I found ~300k trajectories to be optimal after that model training did not improve


#make tuple of the data
#the tuple has the task and reference data
#the list of tuple will be passed to a dataLoader for training
data = []
for i in range(len(ref)):
    data.append((task[i], ref[i]))

    
    
#split into train and test data 
BATCH_SIZE = 32
train = data[0:150000] #used through 75% for train 25% test
test = data[150000:] 

train_loader, test_loader, valid_loader  = create_loaders(train, test, BATCH_SIZE)



In [11]:
#will check for GPU if found device will be set to 'cuda' else it will be set to 'cpu'
#check for gpu code from: https://blog.floydhub.com/a-beginners-guide-on-recurrent-neural-networks-with-pytorch/
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU is available


In [12]:
#try the module
#try the module
HIDDEN_SIZE = 32
LEARN_RATE = .001
NUM_EPOCH = 100
BATCH_SIZE=BATCH_SIZE #defined above


#start the training
# Model instance
#device = torch.device('cpu')
model = ConejeroConvNet(input_size= 1000, batch_size=BATCH_SIZE,
                        hidden_size=HIDDEN_SIZE,
                        output_size=5, device = device)

model.to(device) #sends the model to device

ConejeroConvNet(
  (ConvBlock): Sequential(
    (0): Conv1d(1, 20, kernel_size=(3,), stride=(1,))
    (1): ReLU()
    (2): Conv1d(20, 64, kernel_size=(3,), stride=(1,))
    (3): ReLU()
    (4): Dropout(p=0.2, inplace=False)
    (5): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (bi_lstm): LSTM(498, 32, num_layers=3, batch_first=True, bidirectional=True)
  (linearOuts): Sequential(
    (0): Linear(in_features=4096, out_features=1000, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1000, out_features=50, bias=True)
    (3): ReLU()
    (4): Linear(in_features=50, out_features=5, bias=True)
  )
)

In [13]:
#set criterion and optimizer
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=LEARN_RATE)


In [14]:
#function used to train the model
#function modified from: 
#see: https://github.com/Bjarten/early-stopping-pytorch
def train_model(model, train_loader, test_loader, valid_loader, batch_size, patience, n_epochs, device):
    print('DEVICE: ', device)
    #modified from: https://github.com/Bjarten/early-stopping-pytorch/blob/master/MNIST_Early_Stopping_example.ipynb
    # to track the training loss as the model trains
    train_losses = []
    # to track the validation loss as the model trains
    valid_losses = []
    # to track the average training loss per epoch as the model trains
    avg_train_losses = []
    # to track the average validation loss per epoch as the model trains
    avg_valid_losses = [] 
    
    # initialize the early_stopping object
    early_stopping = EarlyStopping(patience=patience, verbose=True)
    
    for epoch in range(1, n_epochs + 1):

        ###################
        # train the model #
        ###################
        model.train() # prep model for training
        for batch, dat in enumerate(train_loader, 1):
            data = dat[0].to(device).float()#must be float
            target = dat[1].to(device).long()#must cast to long
            
            # clear the gradients of all optimized variables
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            #print('made it past model in put')
            # calculate the loss
            loss = criterion(output, target)
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            # record training loss
            train_losses.append(loss.item())

        ######################    
        # validate the model #
        ######################
        model.eval() # prep model for evaluation
        for dataV, targetV in valid_loader:
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(dataV.to(device).float())
            # calculate the loss
            loss = criterion(output, targetV.to(device).long())
            # record validation loss
            valid_losses.append(loss.item())

        # print training/validation statistics 
        # calculate average loss over an epoch
        train_loss = np.average(train_losses)
        valid_loss = np.average(valid_losses)
        avg_train_losses.append(train_loss)
        avg_valid_losses.append(valid_loss)
        
        epoch_len = len(str(n_epochs))
        
        print_msg = (f'[{epoch:>{epoch_len}}/{n_epochs:>{epoch_len}}] ' +
                     f'train_loss: {train_loss:.5f} ' +
                     f'valid_loss: {valid_loss:.5f}')
        
        print(print_msg)
        
        # clear lists to track next epoch
        train_losses = []
        valid_losses = []
        
        # early_stopping needs the validation loss to check if it has decresed, 
        # and if it has, it will make a checkpoint of the current model
        early_stopping(valid_loss, model)
        
        if early_stopping.early_stop:
            print("Early stopping")
            break
        
    # load the last checkpoint with the best model
    model.load_state_dict(torch.load('checkpoint.pt'))

    return  model, avg_train_losses, avg_valid_losses

In [15]:
#run model training
mod, trainLoss, validLoss = train_model(model = model,train_loader= train_loader, 
                                        test_loader = test_loader, valid_loader = valid_loader,
                                        batch_size=BATCH_SIZE,
                                        patience = 20, n_epochs=200, device = device)

DEVICE:  cuda
[  1/200] train_loss: 1.14736 valid_loss: 0.92465
Validation loss decreased (inf --> 0.924648).  Saving model ...
[  2/200] train_loss: 0.78689 valid_loss: 0.68925
Validation loss decreased (0.924648 --> 0.689245).  Saving model ...
[  3/200] train_loss: 0.64999 valid_loss: 0.59310
Validation loss decreased (0.689245 --> 0.593104).  Saving model ...
[  4/200] train_loss: 0.58235 valid_loss: 0.55701
Validation loss decreased (0.593104 --> 0.557013).  Saving model ...
[  5/200] train_loss: 0.54885 valid_loss: 0.55926
EarlyStopping counter: 1 out of 20
[  6/200] train_loss: 0.51817 valid_loss: 0.54657
Validation loss decreased (0.557013 --> 0.546573).  Saving model ...
[  7/200] train_loss: 0.49788 valid_loss: 0.52402
Validation loss decreased (0.546573 --> 0.524023).  Saving model ...
[  8/200] train_loss: 0.47800 valid_loss: 0.51224
Validation loss decreased (0.524023 --> 0.512244).  Saving model ...
[  9/200] train_loss: 0.46276 valid_loss: 0.54483
EarlyStopping counter: 

KeyboardInterrupt: 

In [None]:
#test the model
#see: https://github.com/Bjarten/early-stopping-pytorch
# initialize lists to monitor test loss and accuracy
test_loss = 0.0
num_classes = 5
class_correct = list(0. for i in range(num_classes))
class_total = list(0. for i in range(num_classes))
classes = list(i+1 for i in range(num_classes))
model.eval() # prep model for evaluation


for data, target in test_loader:
    data = data.to(device).float()
    target = target.to(device).long()
    
    if len(target.data) != BATCH_SIZE:
        break
    # forward pass: compute predicted outputs by passing inputs to the model
    output = model(data)
    # calculate the loss
    loss = criterion(output, target)
    # update test loss 
    test_loss += loss.item()*data.size(0)
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1)
    # compare predictions to true label
    correct = np.squeeze(pred.eq(target.data.view_as(pred)))
    # calculate test accuracy for each object class
    for i in range(BATCH_SIZE):
        label = target.data[i]
        class_correct[label] += correct[i].item()
        class_total[label] += 1

# calculate and print avg test loss
test_loss = test_loss/len(test_loader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))

for i in range(num_classes):
    print('classTotal: ', class_total[i])
    if class_total[i] > 0:
        print('Test Accuracy of Class %5s: %2d%% (%2d/%2d)' % (
            str(i+1), 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))#not sure what this does

print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))




In [None]:
#beeps when the training process is done
duration = 2   # seconds
freq = 440  # Hz

os.system('play -nq -t alsa synth {} sine {}'.format(duration, freq))