In [1]:
#9


import os
from glob import glob

import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm


def parse_free_digits(directory):
    # Parse relevant dataset info
    files = glob(os.path.join(directory, "*.wav"))
    
    fnames = [f.split("\\")[1].split(".")[0].split("_") for f in files]
    
    
    ids = [f[2] for f in fnames]
    y = [int(f[0]) for f in fnames]
    speakers = [f[1] for f in fnames]
    _, Fs = librosa.core.load(files[0], sr=None)

    def read_wav(f):
        wav, _ = librosa.core.load(f, sr=None)

        return wav

    # Read all wavs
    wavs = [read_wav(f) for f in files]

    # Print dataset info
    print("Total wavs: {}. Fs = {} Hz".format(len(wavs), Fs))
    #print(speakers)
    return wavs, Fs, ids, y, speakers


def extract_features(wavs, n_mfcc=6, Fs=8000):
    # Extract MFCCs for all wavs
    #print(len(wavs))
    window = 30 * Fs // 1000
    step = window // 2
    frames = [
        librosa.feature.mfcc(
            wav, Fs, n_fft=window, hop_length=window - step, n_mfcc=n_mfcc
        ).T

        for wav in tqdm(wavs, desc="Extracting mfcc features...")
    ]

    print("Feature extraction completed with {} mfccs per frame".format(n_mfcc))
     
    return frames


def split_free_digits(frames, ids, speakers, labels):
    print("Splitting in train test split using the default dataset split")
    # Split to train-test
    X_train, y_train, spk_train = [], [], []
    X_test, y_test, spk_test = [], [], []
    test_indices = ["0", "1", "2", "3", "4"]
    
    for idx, frame, label, spk in zip(ids, frames, labels, speakers):
        if str(idx) in test_indices:
            X_test.append(frame)
            y_test.append(label)
            spk_test.append(spk)
        else:
            X_train.append(frame)
            y_train.append(label)
            spk_train.append(spk)

    return X_train, X_test, y_train, y_test, spk_train, spk_test


def make_scale_fn(X_train):
    # Standardize on train data
    scaler = StandardScaler()
    scaler.fit(np.concatenate(X_train))
    print("Normalization will be performed using mean: {}".format(scaler.mean_))
    print("Normalization will be performed using std: {}".format(scaler.scale_))
    def scale(X):
        scaled = []

        for frames in X:
            scaled.append(scaler.transform(frames))
        return scaled
    return scale


def parser(directory, n_mfcc=6):
    wavs, Fs, ids, y, speakers = parse_free_digits(directory)
    frames = extract_features(wavs, n_mfcc=n_mfcc, Fs=Fs)
    make_scale_fn(frames)
#     print(len(frames))
#     print(len(ids))
#     print(len(y))
#     print(len(speakers))
    X_train, X_test, y_train, y_test, spk_train, spk_test = split_free_digits(
        frames, ids, speakers, y
    )

    return X_train, X_test, y_train, y_test, spk_train, spk_test



X_train, X_test, y_train, y_test, spk_train, spk_test = parser("recordings")

from sklearn.model_selection import train_test_split

X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, stratify=y_train, test_size=0.20) 
#stratify to make sure we have the same analogy
print(len(X_tr[0][1]))
print(len(X_val))

#10
#necessary for model training/testing cause shapes don't fit otherwise
digits_tr = [0,0,0,0,0,0,0,0,0,0]

for i in range(len(X_tr)):
    if (type(digits_tr[y_tr[i]]) == int): 
        digits_tr[y_tr[i]] = X_tr[i]  #all instances of the same digit
    else:
        digits_tr[y_tr[i]] = np.concatenate((digits_tr[y_tr[i]], X_tr[i]), axis=0)

        
digits_val=[0,0,0,0,0,0,0,0,0,0]

for i in range(len(X_val)):
    if (type(digits_val[y_val[i]]) == int): 
        digits_val[y_val[i]] = X_val[i]
    else:
        digits_val[y_val[i]] = np.concatenate((digits_val[y_val[i]], X_val[i]), axis=0)
        
        
digits_test=[0,0,0,0,0,0,0,0,0,0]

for i in range(len(X_test)):
    if (type(digits_test[y_test[i]]) == int): 
        digits_test[y_test[i]] = X_test[i]
    else:
        digits_test[y_test[i]] = np.concatenate((digits_test[y_test[i]], X_test[i]), axis=0)
        
        
        
#its dimension would be num_sequences x seq_length x feature_dimension  ?      
digits_train_3 = [[], [], [], [], 
                   [], [], [], [], 
                   [], []]

for i in range(len(X_tr)):
    digits_train_3[y_tr[i]].append(np.array(X_tr[i]))
print(len(digits_train_3))
print(len(X_train))



digits_val_3 = [[], [], [], [], 
                [], [], [], [], 
                [], []]

for i in range(len(X_val)):
    digits_val_3[y_val[i]].append(np.array(X_val[i]))
    
    
digits_test_3 = [[], [], [], [], 
                [], [], [], [], 
                [], []]

for i in range(len(X_test)):
    digits_test_3[y_test[i]].append(np.array(X_test[i]))

Total wavs: 3000. Fs = 8000 Hz


Extracting mfcc features...: 100%|████████████████████████████████████████████████| 3000/3000 [00:08<00:00, 336.66it/s]


Feature extraction completed with 6 mfccs per frame
Normalization will be performed using mean: [-517.71365072   62.17300245   19.0018117     9.6444396   -19.211481
  -10.88157066]
Normalization will be performed using std: [152.11675796  52.36822505  36.82452714  29.71210521  24.82190463
  23.43786656]
Splitting in train test split using the default dataset split
6
540
10
2700


In [2]:
#9


import os
from glob import glob

import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm


def parse_free_digits(directory):
    # Parse relevant dataset info
    files = glob(os.path.join(directory, "*.wav"))
    
    fnames = [f.split("\\")[1].split(".")[0].split("_") for f in files]
    
    
    ids = [f[2] for f in fnames]
    y = [int(f[0]) for f in fnames]
    speakers = [f[1] for f in fnames]
    _, Fs = librosa.core.load(files[0], sr=None)

    def read_wav(f):
        wav, _ = librosa.core.load(f, sr=None)

        return wav

    # Read all wavs
    wavs = [read_wav(f) for f in files]

    # Print dataset info
    print("Total wavs: {}. Fs = {} Hz".format(len(wavs), Fs))
    #print(speakers)
    return wavs, Fs, ids, y, speakers


def extract_features(wavs, n_mfcc=6, Fs=8000):
    # Extract MFCCs for all wavs
    #print(len(wavs))
    window = 30 * Fs // 1000
    step = window // 2
    frames = [
        librosa.feature.mfcc(
            wav, Fs, n_fft=window, hop_length=window - step, n_mfcc=n_mfcc
        ).T

        for wav in tqdm(wavs, desc="Extracting mfcc features...")
    ]

    print("Feature extraction completed with {} mfccs per frame".format(n_mfcc))
     
    return frames


def split_free_digits(frames, ids, speakers, labels):
    print("Splitting in train test split using the default dataset split")
    # Split to train-test
    X_train, y_train, spk_train = [], [], []
    X_test, y_test, spk_test = [], [], []
    test_indices = ["0", "1", "2", "3", "4"]
    
    for idx, frame, label, spk in zip(ids, frames, labels, speakers):
        if str(idx) in test_indices:
            X_test.append(frame)
            y_test.append(label)
            spk_test.append(spk)
        else:
            X_train.append(frame)
            y_train.append(label)
            spk_train.append(spk)

    return X_train, X_test, y_train, y_test, spk_train, spk_test


def make_scale_fn(X_train):
    # Standardize on train data
    scaler = StandardScaler()
    scaler.fit(np.concatenate(X_train))
    print("Normalization will be performed using mean: {}".format(scaler.mean_))
    print("Normalization will be performed using std: {}".format(scaler.scale_))
    def scale(X):
        scaled = []

        for frames in X:
            scaled.append(scaler.transform(frames))
        return scaled
    return scale


def parser(directory, n_mfcc=6):
    wavs, Fs, ids, y, speakers = parse_free_digits(directory)
    frames = extract_features(wavs, n_mfcc=n_mfcc, Fs=Fs)
    make_scale_fn(frames)
#     print(len(frames))
#     print(len(ids))
#     print(len(y))
#     print(len(speakers))
    X_train, X_test, y_train, y_test, spk_train, spk_test = split_free_digits(
        frames, ids, speakers, y
    )

    return X_train, X_test, y_train, y_test, spk_train, spk_test



X_train, X_test, y_train, y_test, spk_train, spk_test = parser("recordings")

from sklearn.model_selection import train_test_split

X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, stratify=y_train, test_size=0.20) 
#stratify to make sure we have the same analogy
print(len(X_tr[0][1]))
print(len(X_val))

#10
#necessary for model training/testing cause shapes don't fit otherwise
digits_tr = [0,0,0,0,0,0,0,0,0,0]

for i in range(len(X_tr)):
    if (type(digits_tr[y_tr[i]]) == int): 
        digits_tr[y_tr[i]] = X_tr[i]  #all instances of the same digit
    else:
        digits_tr[y_tr[i]] = np.concatenate((digits_tr[y_tr[i]], X_tr[i]), axis=0)

        
digits_val=[0,0,0,0,0,0,0,0,0,0]

for i in range(len(X_val)):
    if (type(digits_val[y_val[i]]) == int): 
        digits_val[y_val[i]] = X_val[i]
    else:
        digits_val[y_val[i]] = np.concatenate((digits_val[y_val[i]], X_val[i]), axis=0)
        
        
digits_test=[0,0,0,0,0,0,0,0,0,0]

for i in range(len(X_test)):
    if (type(digits_test[y_test[i]]) == int): 
        digits_test[y_test[i]] = X_test[i]
    else:
        digits_test[y_test[i]] = np.concatenate((digits_test[y_test[i]], X_test[i]), axis=0)
        
        
        
#its dimension would be num_sequences x seq_length x feature_dimension  ?      
digits_train_3 = [[], [], [], [], 
                   [], [], [], [], 
                   [], []]

for i in range(len(X_tr)):
    digits_train_3[y_tr[i]].append(np.array(X_tr[i]))
print(len(digits_train_3))
print(len(X_train))



digits_val_3 = [[], [], [], [], 
                [], [], [], [], 
                [], []]

for i in range(len(X_val)):
    digits_val_3[y_val[i]].append(np.array(X_val[i]))
    
    
digits_test_3 = [[], [], [], [], 
                [], [], [], [], 
                [], []]

for i in range(len(X_test)):
    digits_test_3[y_test[i]].append(np.array(X_test[i]))

Total wavs: 3000. Fs = 8000 Hz


Extracting mfcc features...: 100%|████████████████████████████████████████████████| 3000/3000 [00:08<00:00, 336.06it/s]


Feature extraction completed with 6 mfccs per frame
Normalization will be performed using mean: [-517.71365072   62.17300245   19.0018117     9.6444396   -19.211481
  -10.88157066]
Normalization will be performed using std: [152.11675796  52.36822505  36.82452714  29.71210521  24.82190463
  23.43786656]
Splitting in train test split using the default dataset split
6
540
10
2700


In [3]:
#14
import os
import numpy as np
import torch
from torch.utils.data import Dataset
import torch.nn as nn


class FrameLevelDataset(Dataset):
    nn.Dropout(0.25)
    def __init__(self, feats, labels):
        """
            feats: Python list of numpy arrays that contain the sequence features.
                   Each element of this list is a numpy array of shape seq_length x feature_dimension
            labels: Python list that contains the label for each sequence (each label must be an integer)
        """
        max_seq_len = 0
        #print(type(feats))
        #print(len(feats))
        lengths=np.zeros(len(feats))
        #finding the max seq len
        for i in range(len(feats)):
            if (len(feats[i])>max_seq_len):
                max_seq_len = feats[i].shape[0]
            lengths[i] = feats[i].shape[0]
        print(lengths)
        
        self.lengths = lengths
        self.maxseqlen = max_seq_len
        print(max_seq_len)
        print("len feats is" + str(len(feats)))
        self.feats = self.zero_pad_and_stack(feats)
        if isinstance(labels, (list, tuple)):
            self.labels = np.array(labels).astype('int64')

    def zero_pad_and_stack(self, x):
        """
            This function performs zero padding on a list of features and forms them into a numpy 3D array
            returns
                padded: a 3D numpy array of shape num_sequences x max_sequence_length x feature_dimension
        """
        #print(x[0])
        padded = np.zeros((len(x),self.maxseqlen,6))
        for i in range(len(x)):
            for j in range(self.maxseqlen):
                if(j>=x[i].shape[0]):
                    for k in range(6):
                        padded[i][j][k] = 0
                else:
                    #print(x[i][j])
                    padded[i][j] = x[i][j]
        
        # --------------- Insert your code here ---------------- #
        #print((padded))
        return padded

    def __getitem__(self, item):
        return self.feats[item], self.labels[item], self.lengths[item]

    def __len__(self):
        return len(self.feats)

In [4]:
from torch.autograd import Variable 
class BasicLSTM(nn.Module):
    def __init__(self, input_dim, hidden_size, output_dim, num_layers, bidirectional=False):
        super(BasicLSTM, self).__init__()
        self.bidirectional = bidirectional
        self.feature_size = hidden_size* 2 if self.bidirectional else hidden_size

        # --------------- Insert your code here ---------------- #
        
        self.num_layers = num_layers #number of layers
        self.input_size = input_size #input size
        self.hidden_size = hidden_size #hidden state
        
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True) #lstm
        self.fc =  nn.Linear(hidden_size, output_dim) #fully connected final layers
    

        self.relu = nn.ReLU()

    def forward(self, x, lengths):
        """ 
            x : 3D numpy array of dimension N x L x D
                N: batch index
                L: sequence index
                D: feature index
            lengths: N x 1
         """
        
        # --------------- Insert your code here ---------------- #
        
        # You must have all of the outputs of the LSTM, but you need only the last one (that does not exceed the sequence length)
        # To get it use the last_timestep method
        # Then pass it through the remaining network
        x = x.float()
        bi = 2 if self.bidirectional else 1
        h_0 = torch.zeros(self.num_layers*bi, x.size(0), self.hidden_size) #hidden state
        c_0 = torch.zeros(self.num_layers*bi, x.size(0), self.hidden_size) #internal state
        # Propagate input through LSTM
        output, _= self.lstm(x, (h_0, c_0)) #lstm with input, hidden, and internal state
       # hn = hn.view(-1, self.hidden_size) #reshaping the data for Dense layer next
        #output = self.relu(hn)
        output = self.fc(output) #final output
        #print(type(output))
        last_outputs = self.last_timestep(output,lengths,bidirectional=self.bidirectional)
        #print(last_outputs)
        return last_outputs

    def last_timestep(self, outputs, lengths, bidirectional=False):
        """
            Returns the last output of the LSTM taking into account the zero padding
        """
        if bidirectional:
            forward, backward = self.split_directions(outputs)
            last_forward = self.last_by_index(forward, lengths)
            last_backward = backward[:, 0, :]
            # Concatenate and return - maybe add more functionalities like average
            return torch.cat((last_forward, last_backward), dim=-1)

        else:
            return self.last_by_index(outputs, lengths)

    @staticmethod
    def split_directions(outputs):
        direction_size = int(outputs.size(-1) / 2)
        forward = outputs[:, :, :direction_size]
        backward = outputs[:, :, direction_size:]
        return forward, backward

    @staticmethod
    def last_by_index(outputs, lengths):
        # Index of the last output for each sequence.
        idx = (lengths - 1).view(-1, 1).expand(outputs.size(0),
                                               outputs.size(2)).unsqueeze(1)
        idx = idx.type(torch.int64)
        #print(type(idx))
        return outputs.gather(1, idx).squeeze()

In [5]:

num_epochs = 60 #1000 epochs
learning_rate = 0.001 #0.001 lr
input_size = 6 #number of features
hidden_size = 150 #number of features in hidden state
num_layers = 2 #number of stacked lstm layers
bidirectional = False
num_classes = 10 #number of output classes 
batch_size = 20


In [6]:
train_set = FrameLevelDataset(X_tr,y_tr)
test_set = FrameLevelDataset(X_test,y_test)
val_set = FrameLevelDataset(X_val,y_val )
model = BasicLSTM(input_size, hidden_size, num_classes, num_layers, bidirectional=bidirectional) #our lstm class 
model = model.float()
train_dataloader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
print(train_dataloader)
test_dataloader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False)
val_dataloader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False)


[36. 39. 36. ... 26. 27. 37.]
147
len feats is2160
[20. 40. 45. 42. 37. 43. 36. 36. 40. 37. 43. 46. 49. 38. 34. 30. 32. 24.
 37. 33. 27. 24. 23. 23. 28. 26. 23. 24. 24. 22. 38. 34. 39. 36. 36. 35.
 36. 32. 34. 36. 26. 27. 28. 54. 26. 25. 20. 18. 20. 20. 16. 16. 13. 17.
 15. 28. 15. 23. 21. 21. 23. 38. 27. 27. 26. 34. 37. 30. 34. 32. 25. 28.
 29. 29. 29. 24. 20. 25. 16. 23. 17. 16. 36. 14. 18. 19. 21. 16. 19. 20.
 34. 34. 33. 36. 30. 33. 32. 34. 35. 30. 42. 41. 39. 31. 36. 23. 22. 18.
 16. 24. 17. 19. 19. 16. 15. 27. 21. 17. 27. 27. 30. 36. 33. 32. 29. 31.
 28. 28. 28. 29. 29. 28. 42. 30. 37. 21. 24. 21. 22. 22. 19. 17. 16. 17.
 20. 28. 27. 23. 19. 24. 38. 39. 33. 34. 32. 29. 28. 31. 27. 36. 41. 77.
 39. 36. 34. 23. 26. 21. 25. 24. 21. 20. 18. 19. 19. 21. 28. 26. 28. 34.
 35. 32. 38. 40. 37. 56. 43. 43. 58. 42. 33. 42. 33. 59. 33. 15. 16. 18.
 27. 32. 33. 33. 34. 33. 32. 23. 11. 16. 10. 13. 43. 40. 44. 39. 42. 29.
 32. 26. 29. 28. 45. 31. 32. 38. 39. 25. 31. 30. 25. 30. 29. 25. 17. 20.


In [7]:
criterion = nn.CrossEntropyLoss()   # mean-squared error for regression
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)

# to track the training loss as the model trains
train_losses = []
    # to track the validation loss as the model trains
valid_losses = []
    # to track the average training loss per epoch as the model trains
avg_train_losses = []
    # to track the average validation loss per epoch as the model trains
avg_valid_losses = [] 
    
    # initialize the early_stopping object
#early_stopping = EarlyStopping(patience=patience, verbose=True)
    
for epoch in range(num_epochs):

        ###################
        # train the model #
        ###################
    model.train() # prep model for training
    for batch_idx, (feats,labels,lengths) in enumerate(train_dataloader):
        feats = feats.float()
        labels = torch.tensor(labels, dtype=torch.long)
        outputs = model(feats,lengths) #forward pass
        #caluclate the gradient, manually setting to 0
        optimizer.zero_grad()
                        #print(net_out.size())
        loss = criterion(outputs,labels)
        #print(loss.size())
        loss.backward()
        optimizer.step()
            # record training loss
        train_losses.append(loss.item())

        ######################    
        # validate the model #
        ######################
        model.eval() # prep model for evaluation
    for batch_idx, (feats,labels,lengths) in enumerate(val_dataloader):
        feats = feats.float()
        labels = torch.tensor(labels, dtype=torch.long)
        outputs = model(feats,lengths) #forward pass
        #caluclate the gradient, manually setting to 0
       
                        #print(net_out.size())
        loss = criterion(outputs,labels)
        #print(loss.size())
        
        valid_losses.append(loss.item())

        # print training/validation statistics 
        # calculate average loss over an epoch
    train_loss = np.average(train_losses)
    valid_loss = np.average(valid_losses)
    avg_train_losses.append(train_loss)
    avg_valid_losses.append(valid_loss)
        
    epoch_len = len(str(num_epochs))
        
    print_msg = (f'[{epoch:>{epoch_len}}/{num_epochs:>{epoch_len}}] ' +
                     f'train_loss: {train_loss:.5f} ' +
                     f'valid_loss: {valid_loss:.5f}')
        
    print(print_msg)
        
        # clear lists to track next epoch
    train_losses = []
    valid_losses = []



[ 0/60] train_loss: 1.48617 valid_loss: 0.76354
[ 1/60] train_loss: 0.61032 valid_loss: 0.39333
[ 2/60] train_loss: 0.38444 valid_loss: 0.39203
[ 3/60] train_loss: 0.26996 valid_loss: 0.39014
[ 4/60] train_loss: 0.26279 valid_loss: 0.23964
[ 5/60] train_loss: 0.16786 valid_loss: 0.26269
[ 6/60] train_loss: 0.14451 valid_loss: 0.15258
[ 7/60] train_loss: 0.08040 valid_loss: 0.14860
[ 8/60] train_loss: 0.13722 valid_loss: 0.29279
[ 9/60] train_loss: 0.16148 valid_loss: 0.19264
[10/60] train_loss: 0.12483 valid_loss: 0.12157
[11/60] train_loss: 0.11638 valid_loss: 0.20381
[12/60] train_loss: 0.11001 valid_loss: 0.14205
[13/60] train_loss: 0.08599 valid_loss: 0.19162
[14/60] train_loss: 0.12832 valid_loss: 0.10436
[15/60] train_loss: 0.09410 valid_loss: 0.12878
[16/60] train_loss: 0.07434 valid_loss: 0.09909
[17/60] train_loss: 0.13212 valid_loss: 0.10536
[18/60] train_loss: 0.07456 valid_loss: 0.12127
[19/60] train_loss: 0.07010 valid_loss: 0.11112
[20/60] train_loss: 0.08459 valid_loss: 

In [8]:
val_set = FrameLevelDataset(X_val,y_val )
val_dataloader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=True)

test_loss = 0
correct = 0
y_pred=np.array([])
y_true=np.array([])
for batch_idx, (feats,labels,lengths) in enumerate(val_dataloader):
    feats = feats.float()
    labels = torch.tensor(labels, dtype=torch.long)
    outputs = model(feats,lengths) #forward pass
        #caluclate the gradient, manually setting to 0    
    test_loss += criterion(outputs, labels).item()
    pred = outputs.data.max(1)[1]  # get the index of the max log-probability
    y_true=np.concatenate((y_true,labels.numpy()),axis=0)
    y_pred=np.concatenate((y_pred,pred.numpy()),axis=0)
    correct += pred.eq(labels).sum()


    test_loss /= len(val_dataloader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(val_dataloader.dataset),
            100. * correct / len(val_dataloader.dataset)))

[ 25.  29.  31.  27.  19.  34.  28.  26.  43.  22.  17.  31.  25.  28.
  41.  20.  28.  30.  28.  28.  24.  35.  14.  28.  40.  58.  17.  22.
  38.  28.  30.  35.  34.  37.  42.  34.  18.  41.  24.  17.  36.  24.
  58.  30.  24.  26.  21.  36. 153.  21.  29.  34.  34.  18.  41.  24.
  24.  64.  21.  29.  29.  33.  35.  25.  22.  18.  31.  24.  27.  18.
  20.  29.  28.  32.  32.  23.  30.  39.  16.  27.  41.  29.  24.  20.
  21.  19.  38.  29.  23.  18.  20.  40.  31.  38.  48.  27.  40.  50.
  38.  26.  29.  39.  87.  19.  16.  27.  25.  16.  29.  21.  37.  30.
  25.  30.  26.  28.  35.  21.  26.  33.  23.  34.  30.  25.  25.  42.
  18.  25.  17.  27.  28.  42.  21.  31.  22.  22.  27.  26.  23.  28.
  25.  28.  34.  35.  43.  35.  23.  25.  25.  28.  44.  23.  44.  34.
  21.  33.  27.  39.  40.  31.  26.  42.  41.  27.  63.  18.  29.  32.
  29.  21.  32.  32.  42.  24.  30.  41.  44.  31.  40.  25.  27.  47.
  28.  36.  31.  34.  27.  19.  28.  33.  37.  24.  18.  19.  27.  37.
  27. 

  # Remove the CWD from sys.path while we load stuff.



Test set: Average loss: 0.0005, Accuracy: 19/540 (4%)


Test set: Average loss: 0.0004, Accuracy: 37/540 (7%)


Test set: Average loss: 0.0000, Accuracy: 57/540 (11%)


Test set: Average loss: 0.0000, Accuracy: 77/540 (14%)


Test set: Average loss: 0.0000, Accuracy: 97/540 (18%)


Test set: Average loss: 0.0008, Accuracy: 114/540 (21%)


Test set: Average loss: 0.0000, Accuracy: 134/540 (25%)


Test set: Average loss: 0.0000, Accuracy: 154/540 (29%)


Test set: Average loss: 0.0001, Accuracy: 173/540 (32%)


Test set: Average loss: 0.0000, Accuracy: 193/540 (36%)


Test set: Average loss: 0.0000, Accuracy: 213/540 (39%)


Test set: Average loss: 0.0000, Accuracy: 233/540 (43%)


Test set: Average loss: 0.0000, Accuracy: 253/540 (47%)


Test set: Average loss: 0.0000, Accuracy: 273/540 (51%)


Test set: Average loss: 0.0000, Accuracy: 293/540 (54%)


Test set: Average loss: 0.0001, Accuracy: 312/540 (58%)


Test set: Average loss: 0.0000, Accuracy: 332/540 (61%)


Test set: Average lo

In [10]:
test_loss = 0
correct = 0
y_pred=np.array([])
y_true=np.array([])
for batch_idx, (feats,labels,lengths) in enumerate(test_dataloader):
    feats = feats.float()
    labels = torch.tensor(labels, dtype=torch.long)
    outputs = model(feats,lengths) #forward pass
        #caluclate the gradient, manually setting to 0    
    test_loss += criterion(outputs, labels).item()
    pred = outputs.data.max(1)[1]  # get the index of the max log-probability
    y_true=np.concatenate((y_true,labels.numpy()),axis=0)
    y_pred=np.concatenate((y_pred,pred.numpy()),axis=0)
    correct += pred.eq(labels).sum()


    test_loss /= len(test_dataloader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(test_dataloader.dataset),
            100. * correct / len(test_dataloader.dataset)))

  import sys



Test set: Average loss: 0.0000, Accuracy: 20/300 (7%)


Test set: Average loss: 0.0009, Accuracy: 39/300 (13%)


Test set: Average loss: 0.0000, Accuracy: 59/300 (20%)


Test set: Average loss: 0.0000, Accuracy: 79/300 (26%)


Test set: Average loss: 0.0000, Accuracy: 99/300 (33%)


Test set: Average loss: 0.0000, Accuracy: 119/300 (40%)


Test set: Average loss: 0.0008, Accuracy: 138/300 (46%)


Test set: Average loss: 0.0000, Accuracy: 158/300 (53%)


Test set: Average loss: 0.0000, Accuracy: 178/300 (59%)


Test set: Average loss: 0.0013, Accuracy: 197/300 (66%)


Test set: Average loss: 0.0000, Accuracy: 217/300 (72%)


Test set: Average loss: 0.0000, Accuracy: 237/300 (79%)


Test set: Average loss: 0.0014, Accuracy: 255/300 (85%)


Test set: Average loss: 0.0000, Accuracy: 275/300 (92%)


Test set: Average loss: 0.0006, Accuracy: 293/300 (98%)

