In [1]:
import torch
import numpy as np
from torch.nn import CTCLoss, Linear, Module, LSTM, Conv1d, ReLU, Dropout#, LogSoftmax
from torch.nn.functional import log_softmax, softmax
from torch.utils.data import Dataset, DataLoader
import torchvision
from torch import optim
from ctcdecode import CTCBeamDecoder
from time import time
from Levenshtein import distance
import warnings
warnings.simplefilter(action='ignore', category=DeprecationWarning)

N_PHONEMES = 41
PHONEME_LIST = [
    " ",
    "SIL",
    "SPN",
    "AA",
    "AE",
    "AH",
    "AO",
    "AW",
    "AY",
    "B",
    "CH",
    "D",
    "DH",
    "EH",
    "ER",
    "EY",
    "F",
    "G",
    "H",
    "IH",
    "IY",
    "JH",
    "K",
    "L",
    "M",
    "N",
    "NG",
    "OW",
    "OY",
    "P",
    "R",
    "S",
    "SH",
    "T",
    "TH",
    "UH",
    "UW",
    "V",
    "W",
    "Y",
    "Z",
    "ZH"
]
# PHONEME_LIST.append('')

PHONEME_MAP = [
    " ",
    ".", #SIL
    "!", #SPN
    "a", #AA
    "A", #AE
    "h", #AH
    "o", #AO
    "w", #AW
    "y", #AY
    "b", #B
    "c", #CH
    "d", #D
    "D", #DH
    "e", #EH
    "r", #ER
    "E", #EY
    "f", #F
    "g", #G
    "H", #H
    "i", #IH 
    "I", #IY
    "j", #JH
    "k", #K
    "l", #L
    "m", #M
    "n", #N
    "N", #NG
    "O", #OW
    "Y", #OY
    "p", #P 
    "R", #R
    "s", #S
    "S", #SH
    "t", #T
    "T", #TH
    "u", #UH
    "U", #UW
    "v", #V
    "W", #W
    "?", #Y
    "z", #Z
    "Z" #ZH
]

In [2]:
class MyDataset(Dataset):
    def __init__(self, X_path, Y_path=None):
        self.X = np.load(X_path, allow_pickle=True)
        if Y_path:
            self.Y = np.load(Y_path, allow_pickle=True)
        else:
            self.Y=None
        self.length = self.X.shape[0]

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        X = self.X[index]
        if self.Y!=None:
            Y = self.Y[index] # to make sure 0 is reserved for blank
            return torch.DoubleTensor(X), torch.DoubleTensor(Y)+1, torch.LongTensor([X.shape[0]]), torch.LongTensor([Y.shape[0]]) # may have to do .float()/.long()
        else:
            return torch.DoubleTensor(X), torch.LongTensor([X.shape[0]])
class TestCollateFunction(object):
    def __init__(self):
        return
    def __call__(self, batch):
        X = []
        X_len = []
        for tup in batch:
            X.append(tup[0])
            X_len.append(tup[1])
        # X = torch.stack(X, 0) # stack?                 NEED TO SEPARATE, NO CAT
        # Y = torch.stack(Y,0)
        X = pad_sequence(X)
        X = X.permute(1,2,0)
        X_len = torch.cat(X_len,0).long()
        return X, X_len
    
class CollateFunction(object):
    def __init__(self):
        return
    def __call__(self, batch):
        X = []
        Y = []
        X_len = []
        Y_len = []
        for tup in batch:
            X.append(tup[0])
            Y.append(tup[1])
            X_len.append(tup[2])
            Y_len.append(tup[3])
        # X = torch.stack(X, 0) # stack?                 NEED TO SEPARATE, NO CAT
        # Y = torch.stack(Y,0)
        X = pad_sequence(X)
        X = X.permute(1,2,0)
        Y = pad_sequence(Y).permute(1,0) # Batch X 
        X_len = torch.cat(X_len,0)#.long()
        Y_len = torch.cat(Y_len,0)#.long()
        return X, Y, X_len, Y_len

In [3]:
dev_path = 'dev.npy'
dev_labels_path = 'dev_labels.npy'
val_dataset = MyDataset(dev_path, dev_labels_path)

train_path = 'train.npy'
train_labels_path = 'train_labels.npy'
train_dataset = MyDataset(train_path, train_labels_path)

cuda = True
device = torch.device("cuda" if cuda else "cpu") 
numworkers = 4 if cuda else 0
batchsize = 64 if cuda else 64

collatefn = CollateFunction()
val_loader_args = dict(shuffle=False, batch_size=batchsize, num_workers=numworkers, pin_memory=True, collate_fn=collatefn) if cuda else dict(shuffle=False, batch_size=batchsize, collate_fn=collatefn)
val_loader = DataLoader(val_dataset, **val_loader_args)

train_loader_args = dict(shuffle=True, batch_size=batchsize, num_workers=numworkers, pin_memory=True, collate_fn=collatefn) if cuda else dict(shuffle=True, batch_size=batchsize, collate_fn=collatefn)
train_loader = DataLoader(train_dataset, **train_loader_args)

print('Data Loaded')

Data Loaded


In [4]:
VAL_ROWS = []
for i in range(len(val_dataset)):
#     print(val_dataset[i][1],val_dataset[i][3].item())
    row=''
    for j in range(val_dataset[i][3].item()):
        row += PHONEME_MAP[val_dataset[i][1][j].long().item()]
    VAL_ROWS.append(row)
VAL_ROWS[0], val_dataset[0][1]

('.mistrkWiltrizDIhpashlhvDhmidhlklAshz.hndWIrglAdtUWelkhmHizgasphl.',
 tensor([ 1., 24., 19., 31., 33., 14., 22., 38., 19., 23., 33., 14., 19., 40.,
         12., 20.,  5., 29.,  3., 31.,  5., 23.,  5., 37., 12.,  5., 24., 19.,
         11.,  5., 23., 22., 23.,  4., 31.,  5., 40.,  1.,  5., 25., 11., 38.,
         20., 14., 17., 23.,  4., 11., 33., 36., 38., 13., 23., 22.,  5., 24.,
         18., 19., 40., 17.,  3., 31., 29.,  5., 23.,  1.], dtype=torch.float64))

In [8]:
# ('.mistrkWiltrizDIhpashlhvDhmidhlklAshz.hndWIrglAdtUWelkhmHizgasphl.',
#  tensor([ 0., 24.,  5., 29., 18., 39., 23., 18., 30., 32., 13.,  1., 23.,  3.,
#          24., 13., 22., 12., 30., 18., 24., 32., 13.,  4., 30., 32., 18., 25.,
#          11.,  4., 24., 17., 18., 39., 23.,  3., 32., 13.,  0.],
#         dtype=torch.float64))
test_path = 'test.npy'
test_dataset = MyDataset(test_path)
testcollatefn = TestCollateFunction()
test_loader_args = dict(shuffle=False, batch_size=batchsize, num_workers=numworkers, pin_memory=True, collate_fn=testcollatefn) if cuda else dict(shuffle=False, batch_size=batchsize, collate_fn=testcollatefn)
test_loader = DataLoader(test_dataset, **test_loader_args)

In [5]:
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence, pad_sequence
# Model_5, Model_2 without 4th LSTM
# class Model(Module):
#     def __init__(self, out_vocab, embed_size, hidden_size, in_channel=13):
#         super(Model, self).__init__()
#         self.conv1d = Conv1d(in_channel, embed_size, kernel_size=3, stride=1, padding=1, bias=False)
#         self.lstm1 = LSTM(embed_size, hidden_size, bidirectional=True)
#         self.lstm2 = LSTM(hidden_size*2, hidden_size, bidirectional=True)
#         self.lstm3 = LSTM(hidden_size*2, hidden_size, bidirectional=True)
# #         self.lstm4 = LSTM(hidden_size*2, hidden_size, bidirectional=True)
#         self.predict = False
        
# #         self.linear1 = Linear(hidden_size*2, hidden_size*2)
# #         self.relu1 = ReLU()
# #         self.linear2 = Linear(hidden_size*2, out_vocab+1)
        
#         self.linear3 = Linear(hidden_size*2, out_vocab+1)
        
#     def forward(self, X, X_lens):
#         X = self.conv1d(X) # CNN requires (batch_size, embedding_size, timesteps)

#         X = X.permute(0,2,1)
#         packed_X = pack_padded_sequence(X, X_lens, batch_first=True, enforce_sorted=False)
        
#         packed_X = self.lstm1(packed_X)[0] # LSTM requires (timesteps, batch_size, embedding_size)
#         packed_X = self.lstm2(packed_X)[0]
#         packed_X = self.lstm3(packed_X)[0]
# #         packed_X = self.lstm4(packed_X)[0]
        
#         X, out_lens = pad_packed_sequence(packed_X)
        
# #         X = self.linear1(X.permute(1,0,2))
# #         X = self.relu1(X)
# #         out = self.linear2(X).softmax(2) # B x timesteps x 42 log_softmax(2)
        
#         X = self.linear3(X.permute(1,0,2))
        
# #         if self.predict:
# #             print("SOFTMAX")
# #         out = X.softmax(2)
# #         else:
# #             print("LOG")
#         out = X.log_softmax(2)
        
#         return out, out_lens

# Model_6
class Model(Module):
    def __init__(self, out_vocab, embed_size, hidden_size, in_channel=13):
        super(Model, self).__init__()
        self.conv1d1 = Conv1d(in_channel, embed_size, kernel_size=5, stride=1, padding=2, bias=False)
        self.lstm1 = LSTM(embed_size, hidden_size, bidirectional=True)
        self.conv1d2 = Conv1d(hidden_size*2, hidden_size*2, kernel_size=3, stride=1, padding=1, bias=False)
        self.lstm2 = LSTM(hidden_size*2, hidden_size, bidirectional=True)
        self.lstm3 = LSTM(hidden_size*2, hidden_size, bidirectional=True)
        self.predict = False
        
#         self.linear1 = Linear(hidden_size*2, hidden_size*2)
#         self.relu1 = ReLU()
#         self.linear2 = Linear(hidden_size*2, out_vocab+1)
        
        self.linear3 = Linear(hidden_size*2, out_vocab+1)
        
    def forward(self, X, X_lens):
        X = self.conv1d1(X) # CNN requires (batch_size, embedding_size, timesteps)
        X = X.permute(0,2,1)
        packed_X = pack_padded_sequence(X, X_lens, batch_first=True, enforce_sorted=False)
        
        packed_X = self.lstm1(packed_X)[0] # LSTM requires (timesteps, batch_size, embedding_size)
        
        X, X_lens = pad_packed_sequence(packed_X)
        
        X = X.permute(1,2,0)
        X = self.conv1d2(X)
        X = X.permute(0,2,1)
        packed_X = pack_padded_sequence(X, X_lens, batch_first=True, enforce_sorted=False)

        packed_X = self.lstm2(packed_X)[0]
        packed_X = self.lstm3(packed_X)[0]
        
        X, out_lens = pad_packed_sequence(packed_X)
        X = self.linear3(X.permute(1,0,2))
        out = X.log_softmax(2)
        return out, out_lens

In [5]:
# block = BasicBlock(41, embed_size=256, hidden_size=512)
# a = torch.rand((32,13,21)) # B,Ch,T
# b = torch.ones((32))*21
# out, out_len = block.forward(a,b)
# out.shape, out_len.shape

model = Model(41, embed_size=300, hidden_size=256)
a = torch.rand((32,13,1650)) # B,Ch,T
b = torch.ones((32))*1625
out, out_len = model.forward(a,b)
print(model.predict, out, out_len.shape)
model.predict=True
out, out_lens = model.forward(a,b)
print(model.predict,out)

False tensor([[[-3.7687, -3.7972, -3.7302,  ..., -3.7418, -3.7511, -3.7313],
         [-3.7724, -3.8035, -3.7303,  ..., -3.7439, -3.7476, -3.7318],
         [-3.7751, -3.8070, -3.7299,  ..., -3.7458, -3.7451, -3.7322],
         ...,
         [-3.7793, -3.8041, -3.7310,  ..., -3.7514, -3.7432, -3.7348],
         [-3.7769, -3.8003, -3.7312,  ..., -3.7515, -3.7448, -3.7357],
         [-3.7723, -3.7965, -3.7312,  ..., -3.7495, -3.7470, -3.7362]],

        [[-3.7688, -3.7965, -3.7302,  ..., -3.7405, -3.7515, -3.7319],
         [-3.7730, -3.8025, -3.7307,  ..., -3.7427, -3.7487, -3.7323],
         [-3.7765, -3.8059, -3.7307,  ..., -3.7451, -3.7465, -3.7323],
         ...,
         [-3.7796, -3.8041, -3.7296,  ..., -3.7515, -3.7417, -3.7359],
         [-3.7773, -3.8004, -3.7299,  ..., -3.7512, -3.7434, -3.7368],
         [-3.7727, -3.7966, -3.7302,  ..., -3.7489, -3.7459, -3.7372]],

        [[-3.7673, -3.7964, -3.7312,  ..., -3.7412, -3.7512, -3.7315],
         [-3.7710, -3.8025, -3.7316,  .

In [6]:
RUN_NUMBER = 6  # <============================= CHANGE THIS EVERY TIME ======================<<<<<<<<<
# model = Model(41, embed_size=256, hidden_size=256).double().to(device)
model_path = 'hw3p2_models/model_6_15/model.pt'
model = torch.load(model_path).to(device)
NUM_EPOCHS = 50
learning_rate = (1e-3)/4 #
mile_stones = [2+2,10,16,22] # [6,12,18,24,30]  # 2 model training: [5,15,20,25,30,35,40,45] 
gamma = 0.5
# optimizer = optim.SGD(model.parameters(), momentum=0.9, weight_decay=5e-5, lr=learning_rate)
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=5e-6)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=mile_stones, gamma=gamma)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=2, verbose=True)
criterion = CTCLoss()
print(torch.cuda.memory_summary(device=None, abbreviated=False))

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |   39306 KB |   51626 KB |   72170 KB |   32864 KB |
|       from large pool |   39008 KB |   51328 KB |   71872 KB |   32864 KB |
|       from small pool |     298 KB |     298 KB |     298 KB |       0 KB |
|---------------------------------------------------------------------------|
| Active memory         |   39306 KB |   51626 KB |   72170 KB |   32864 KB |
|       from large pool |   39008 KB |   51328 KB |   71872 KB |   32864 KB |
|       from small pool |     298 KB |     298 KB |     298 KB |       0 KB |
|---------------------------------------------------------------

In [7]:
import csv
import os

def save_state(distance, running_best, model_number, model, train_loader_args, device, NUM_EPOCHS, learning_rate, optimizer, criterion):
    path = './hw3p2_models/model_' + str(RUN_NUMBER) + '_'+str(model_number)
    if not os.path.isdir(path):
        os.makedirs(path)
    torch.save(model, path+'/model.pt')
    # write parameter tracking file
    parameter_file = open(path+'/hyperparameters.txt', 'w')
    parameter_file.write('\nLevenshtein Distance:\n' + str(distance))
    parameter_file.write('\nRunning Best Levenshtein Distance:\n' + str(running_best[0]) + "  " + str(running_best[1]))
    parameter_file.write('\nModel:\n' + str(model))
    parameter_file.write('\ntrain_loader_args:\n' + str(train_loader_args))
    parameter_file.write('\nDevice:\n' + str(device))
    parameter_file.write('\nNUM_EPOCHS:\n' + str(NUM_EPOCHS))
    parameter_file.write('\nLearning Rate:\n' + str(learning_rate))
    parameter_file.write('\nOptimizer:\n' + str(optimizer))
    parameter_file.write('\nCriterion:\n' + str(criterion))
    parameter_file.close()
    
def log(file,string):
    file.write(string)
    return
# CTCDecode: https://github.com/parlance/ctcdecode
# for decoder must make sure the labels list is the same length as out_vocab (or in my case out_vocab+1)
# when discussing must make sure people understand that the ' ' character is the blank we are trying to account for
def get_decoder(labels, beam=10):
    return CTCBeamDecoder(labels, beam_width=beam, log_probs_input=True)

def predict(model, loader, labels, decoder=None, test=False):
    log_file = open("predict_decoder_logs.txt", "w")
    
    if not decoder:
        decoder = get_decoder(labels, beam=150)
    model.eval()
    model.predict = True
    sequences = []
    sequence_lens = []
    if test:
        for i, (x, x_len) in enumerate(loader):
#             x_len.to(device)

            output, out_lens = model(x.to(device), x_len)
#             output = output.permute(1,0,2)

            log_string = 'output:' + str(output.shape) + str(output) + '\n' + 'out_lens: ' + str(out_lens.shape) + str(out_lens)
            log(log_file, log_string)

            # probabilities, out_lens are passed to ctcdecoder instead of loss
            beam_results, beam_scores, timesteps, out_lens = decoder.decode(output, out_lens)
    #         print(beam_results.shape, out_lens.shape)

            if i%10==0:
                print("Predict", i*batchsize, end='\r')

            sequences.append(beam_results.detach())
            sequence_lens.append(out_lens.detach())
        write_submission(sequences, sequence_lens)
    else:
        for i, (x, y, x_len, y_len) in enumerate(loader):
            x.to(device)
#             y.to(device)
#             x_len.to(device)
#             y_len.to(device)

            output, out_lens = model(x.to(device), x_len)
#             output = output.permute(1,0,2)

            log_string = 'output:' + str(output.shape) + str(output) + '\n' + 'out_lens: ' + str(out_lens.shape) + str(out_lens)
            log(log_file, log_string)

            # probabilities, out_lens are passed to ctcdecoder instead of loss
            beam_results, beam_scores, timesteps, out_lens = decoder.decode(output, out_lens)
    #         print(beam_results.shape, out_lens.shape)

#             if i%10==0:
            print("Predict", i*batchsize, end='\r')

            sequences.append(beam_results.detach())
            sequence_lens.append(out_lens.detach())
        
    model.train()
    model.predict = False
    log_file.close()
    return sequences, sequence_lens

def write_submission(sequences, sequence_lens, sub_name='./submission9.csv'):
    submission = csv.writer(open(sub_name, "w"))
    submission.writerow(['id','label'])
    Id=0
    for batch in range(len(sequences)):
        for i in range(len(sequences[batch])):
            encoded_row = sequences[batch][i,0,:sequence_lens[batch][i,0]]
            
            row = ''
            for phoneme in encoded_row:
                row += PHONEME_MAP[phoneme.item()]
            
            submission.writerow([Id,row])
            Id+=1
            if Id%10 == 0:
                print("Saved {} predictions".format(Id), end='\r')
    print("Submission File COMPLETE")
    
    return Id

def levenshtein(a, b):
    if not a: return len(b)
    if not b: return len(a)
    return min(levenshtein(a[1:], b[1:])+(a[0] != b[0]),
               levenshtein(a[1:], b)+1,
               levenshtein(a, b[1:])+1)

def _levenshtein(token1, token2):
    distances = np.zeros((len(token1) + 1, len(token2) + 1))

    for t1 in range(len(token1) + 1):
        distances[t1][0] = t1

    for t2 in range(len(token2) + 1):
        distances[0][t2] = t2
        
    a = 0
    b = 0
    c = 0
    
    for t1 in range(1, len(token1) + 1):
        for t2 in range(1, len(token2) + 1):
            if (token1[t1-1] == token2[t2-1]):
                distances[t1][t2] = distances[t1 - 1][t2 - 1]
            else:
                a = distances[t1][t2 - 1]
                b = distances[t1 - 1][t2]
                c = distances[t1 - 1][t2 - 1]
                
                if (a <= b and a <= c):
                    distances[t1][t2] = a + 1
                elif (b <= a and b <= c):
                    distances[t1][t2] = b + 1
                else:
                    distances[t1][t2] = c + 1

    return distances[len(token1)][len(token2)]

# https://blog.paperspace.com/implementing-levenshtein-distance-word-autocomplete-autocorrect/
def levenshtein_distance(model, loader):
    model.eval()
    model.predict = True
    decoder = get_decoder(PHONEME_LIST)
    log_file = open("levenshtein_dist_decoder_logs.txt", "w")
    sequences = []
    sequence_lens = []
    with torch.no_grad():
        for i, (x, y, x_len, y_len) in enumerate(loader):
            x = x.to(device)
    #         y.to(device)
    #         x_len.to(device)
    #         y_len.to(device)

            output, out_lens = model(x, x_len)

            output = output.to(torch.device("cpu"))
            del x

            log_string = 'output:' + str(output.shape) + str(output) + '\n' + 'out_lens: ' + str(out_lens.shape) + str(out_lens)
            log(log_file, log_string)

            # probabilities, out_lens are passed to ctcdecoder instead of loss
            beam_results, beam_scores, timesteps, out_lens = decoder.decode(output.detach(), out_lens.detach())
    #         print(beam_results.shape, out_lens.shape)
    #         print(torch.cuda.memory_summary(device=None, abbreviated=False))

            if i%10==0:
                print("Levenshtein Predict", i*batchsize, end='\r')

            sequences.append(beam_results.detach())
            sequence_lens.append(out_lens.detach())

    torch.cuda.empty_cache()
#     del x
    
    total_dist = 0
    rows = []
    Id=0
    for batch in range(len(sequences)):
        for i in range(len(sequences[batch])):
            encoded_row = sequences[batch][i,0,:sequence_lens[batch][i,0]] #beam_results[0][0][:out_len[0][0]] TODO CHECK THIS
            
            row = ''
            for phoneme in encoded_row:
                row += PHONEME_MAP[phoneme.item()]
            rows.append(row)
            Id+=1
            if Id%10 == 0:
                print("Levenshtein Distance {}".format(Id), end='\r')
    for row, val_row in zip(rows, VAL_ROWS):
        total_dist += _levenshtein(row, val_row)
    model.train()
    model.predict = False
    return total_dist/Id
# decoded sequence
# levenshtein distance
# print(PHONEME_LIST, len(PHONEME_LIST))
# predict(model, test_loader, PHONEME_LIST, test=True)
# predict(model, test_loader, PHONEME_LIST
# torch.cuda.empty_cache()
# del x
# del y
# del output
# del loss
# levenshtein_distance(model, val_loader)

In [8]:
model_number = 16 #         <<<<<<<<<<<<<<<<<< CHANGE THIS TO 0 AFTER THIS MODEL
prev_dist = 1000
running_best = ['',10000]
dist = 1000
torch.cuda.empty_cache()
for epoch in range(NUM_EPOCHS):
    ti = time()
    cuda_mem_log = open("cuda_mem_log.txt", "w")
    model.train()
    model.predict = False
    for i, (x, y, x_len, y_len) in enumerate(train_loader):
        _ti =time()
        
        optimizer.zero_grad()

        x = x.to(device)
        y = y.to(device)
#         x_len = x_len.to(device)
#         y_len = y_len.to(device)

#         to device all enumerated tupple
#         y = y.reshape(-1).to(device) # need to turn to row

        output, out_lens = model(x, x_len)
#         print(output.shape, y.shape, x_len.shape, y_len.shape)
        loss = criterion(output.permute(1,0,2), y, out_lens, y_len) # .unsqueeze(1) torch.log()

        loss.backward()
        optimizer.step()

#         # progress
#         if i%10==0:
        _tf = time()
        print('Epoch:', epoch, '| Iteration:', i, '| Projected Time Left', ((21952//batchsize)-(i+1))*(_tf-_ti), '| Projected Time Total', ((21952//batchsize))*(_tf-_ti), end='\r')

    
#     # Deallocate memory in GPU
        torch.cuda.empty_cache()

    # validation
    
    
    log(cuda_mem_log,torch.cuda.memory_summary(device=None, abbreviated=False))
    cuda_mem_log.close()
    
    torch.cuda.empty_cache()
    del x
    del y
    del output
    
    
    dist = levenshtein_distance(model, val_loader)
    print("Epoch", epoch, "Dist:", dist, "Loss:", loss.item(), "-----------------------------------------------")
    
    del loss
    
    if prev_dist == 10000:
        print("\tImprovement:", prev_dist-dist)
    else:
        print("\tImprovement:", prev_dist-dist, "| Percent Improvement:", 100*(prev_dist-dist)/prev_dist, '%')
    # tracking running best AUC
    if running_best[1]>dist:
        running_best[0]='Model_' + str(RUN_NUMBER) + '_' + str(model_number)
        running_best[1]=dist
        
    save_state(dist, running_best, model_number, model, train_loader_args, device, NUM_EPOCHS, learning_rate, optimizer, criterion)
    model_number+=1

    scheduler.step()
    tf=time()
    print("Time for epoch:", tf-ti)
    print('   Running Best:', *running_best,'\n')

    prev_dist = dist
    

Epoch 0 Dist: 12.5368782161235 Loss: 0.41302626146464316 -----------------------------------------------0787
	Improvement: 987.4631217838765 | Percent Improvement: 98.74631217838765 %
Time for epoch: 5710.455364704132
   Running Best: Model_6_16 12.5368782161235 

Epoch 1 Dist: 12.457118353344768 Loss: 0.36322463435215746 -----------------------------------------------11
	Improvement: 0.07975986277873126 | Percent Improvement: 0.6362019428102385 %
Time for epoch: 5693.146910667419
   Running Best: Model_6_17 12.457118353344768 

Epoch 2 Dist: 12.433104631217839 Loss: 0.36163950561700187 -----------------------------------------------52
	Improvement: 0.024013722126928982 | Percent Improvement: 0.19277108433734386 %
Time for epoch: 5694.483701705933
   Running Best: Model_6_18 12.433104631217839 

Epoch: 3 | Iteration: 5 | Projected Time Left 5491.771364927292 | Projected Time Total 5589.547709703445

KeyboardInterrupt: 

In [15]:
# import sys
# for p in model.parameters():
#     print(sys.getsizeof(p))
# print(output.shape, y.shape, x_len.unsqueeze(1), y_len.shape)
# print("Epoch", epoch, "Dist:", dist)
# torch.cuda.device_of(next(model.parameters()))
print(torch.cuda.memory_summary(device=None, abbreviated=False))
# cuda_mem_log = open("cuda_mem_log.txt", "w")
# log(cuda_mem_log,torch.cuda.memory_summary(device=None, abbreviated=False))
# cuda_mem_log.close()

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 46        |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |    7428 MB |   11052 MB |  183250 GB |  183242 GB |
|       from large pool |    7426 MB |   11050 MB |  183158 GB |  183151 GB |
|       from small pool |       2 MB |      23 MB |      91 GB |      91 GB |
|---------------------------------------------------------------------------|
| Active memory         |    7428 MB |   11052 MB |  183250 GB |  183242 GB |
|       from large pool |    7426 MB |   11050 MB |  183158 GB |  183151 GB |
|       from small pool |       2 MB |      23 MB |      91 GB |      91 GB |
|---------------------------------------------------------------

In [11]:
save_state(10000, running_best, -1, model, train_loader_args, device, NUM_EPOCHS, learning_rate, optimizer, criterion)

In [12]:
# For AWS:  Need to `pip install python-Levenshtein` while in activated pytorch environment
# https://github.com/ztane/python-Levenshtein/
# from Levenshtein import distance
# distance('abcd','aqcde')
len(VAL_ROWS)

2332

## Prediction

In [33]:
predict(model, test_loader, PHONEME_LIST, test=True)
print("WOOO!")

Submission File COMPLETE
WOOO!


## Models

In [None]:
# Model_5 
class Model(Module):
    def __init__(self, out_vocab, embed_size, hidden_size, in_channel=13):
        super(Model, self).__init__()
        self.conv1d = Conv1d(in_channel, embed_size, kernel_size=3, stride=1, padding=1, bias=False)
        self.lstm1 = LSTM(embed_size, hidden_size, bidirectional=True)
        self.lstm2 = LSTM(hidden_size*2, hidden_size, bidirectional=True)
        self.lstm3 = LSTM(hidden_size*2, hidden_size, bidirectional=True)
        self.lstm4 = LSTM(hidden_size*2, hidden_size, bidirectional=True)
        self.predict = False
        
#         self.linear1 = Linear(hidden_size*2, hidden_size*2)
#         self.relu1 = ReLU()
#         self.linear2 = Linear(hidden_size*2, out_vocab+1)
        
        self.linear3 = Linear(hidden_size*2, out_vocab+1)
        
    def forward(self, X, X_lens):
        X = self.conv1d(X) # CNN requires (batch_size, embedding_size, timesteps)

        X = X.permute(0,2,1)
        packed_X = pack_padded_sequence(X, X_lens, batch_first=True, enforce_sorted=False)
        
        packed_X = self.lstm1(packed_X)[0] # LSTM requires (timesteps, batch_size, embedding_size)
        packed_X = self.lstm2(packed_X)[0]
        packed_X = self.lstm3(packed_X)[0]
        packed_X = self.lstm4(packed_X)[0]
        
        X, out_lens = pad_packed_sequence(packed_X)
        
#         X = self.linear1(X.permute(1,0,2))
#         X = self.relu1(X)
#         out = self.linear2(X).softmax(2) # B x timesteps x 42 log_softmax(2)
        
        X = self.linear3(X.permute(1,0,2))
        
#         if self.predict:
#             print("SOFTMAX")
#         out = X.softmax(2)
#         else:
#             print("LOG")
        out = X.log_softmax(2)
        
        return out, out_lens

In [None]:
# pad/pack sequence
class BasicBlock(Module):
    def __init__(self, out_vocab, embed_size, hidden_size, in_channel=13):
        super(BasicBlock, self).__init__()
        self.conv1d = Conv1d(in_channel, embed_size, kernel_size=3, stride=1, padding=1, bias=False) # do I need this?
        self.lstm = LSTM(embed_size, hidden_size, bidirectional=True)
        
    def forward(self, X, lengths):
        X = self.conv1d(X) # CNN requires (batch_size, embedding_size, timesteps)
        
        X = X.permute(0,2,1)
        
        packed_X = pack_padded_sequence(X, lengths, batch_first=True, enforce_sorted=False)
        
        packed_out = self.lstm(packed_X)[0] # LSTM requires (timesteps, batch_size, embedding_size)
        
        out, out_lens = pad_packed_sequence(packed_out)
        
        return out, out_lens

class Model(Module):
    def __init__(self, out_vocab, embed_size, hidden_size):
        super(Model, self).__init__()
        self.block1 = BasicBlock(out_vocab, embed_size, hidden_size)
        self.block2 = BasicBlock(out_vocab, embed_size, hidden_size, in_channel=hidden_size*2)
        self.block3 = BasicBlock(out_vocab, embed_size, hidden_size, in_channel=hidden_size*2)
        
        self.linear1 = Linear(hidden_size*2, hidden_size*2)
        self.relu1 = ReLU()
        self.dropout = Dropout(0.1)
        self.linear2 = Linear(hidden_size*2, out_vocab+1)
        
    def forward(self, X, lengths):
        X, X_lens = self.block1(X, lengths)
#         print(X.shape)
        X, X_lens = self.block2(X.permute(1,2,0), X_lens)
#         print(X.shape)
        X, out_lens = self.block3(X.permute(1,2,0), X_lens)
#         print(X.shape)
        X = self.linear1(X.permute(1,0,2)).log_softmax(2)
        X = self.relu1(X)
        X = self.dropout(X)
#         print(X.shape)
        X = self.linear2(X).log_softmax(2)
        return X, out_lens

In [None]:
class Model(Module):
    def __init__(self, out_vocab, embed_size, hidden_size):
        super(Model, self).__init__()
        self.conv1d = Conv1d(13, embed_size, kernel_size=3, stride=1, padding=1, bias=False) # do I need this?
        self.lstm = LSTM(embed_size, hidden_size, bidirectional=False)
        self.linear = Linear(hidden_size, out_vocab+1) # hidden_size*2? if bidirectional??

    def forward(self, X, lengths):
        # May want this in collate
        # X = pad_sequence(X)
        # print(X.permute(1,2,0).shape) # [32, 13, 1720]
        # X = X.permute(1,2,0)

        # print("in:", X.shape) # CNN requires (batch_size, embedding_size, timesteps)  TODO: double check
        X = self.conv1d(X)
        # print("after Conv1d:", X.shape) # lstm requires (timesteps, batch_size, embedding_size)  TODO: double check 
        X = X.permute(0,2,1)
        packed_X = pack_padded_sequence(X, lengths, batch_first=True, enforce_sorted=False)
        # print("packed:", packed_X.data.shape)
        packed_out = self.lstm(packed_X)[0] # why [0]? check output
        out, out_lens = pad_packed_sequence(packed_out)
        out = self.linear(out).log_softmax(2)
        # print("Eureka!")
        return out, out_lens