In [None]:
from google.colab import drive
drive.mount('/content/drive')
dir = '/content/drive/MyDrive/Courses/2021fall/Spoken Language Technologies/Lab2/'

In [35]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Load the dataset
data = np.load(dir + 'lab2_dataset.npz')
train_feats = torch.tensor(data['train_feats'])
test_feats = torch.tensor(data['test_feats'])
train_labels = torch.tensor(data['train_labels'])
test_labels = torch.tensor(data['test_labels'])
phone_labels = data['phone_labels']

# Set up the dataloaders
train_dataset = torch.utils.data.TensorDataset(train_feats, train_labels)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=8, shuffle=True)

test_dataset = torch.utils.data.TensorDataset(test_feats, test_labels)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=8, shuffle=False)

# Define the model architecture
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.linear2 = nn.Linear(512, 48)
        self.conv1 = nn.Conv1d(40, 512, 3)
        self.conv2 = nn.Conv1d(512, 512, 3)
        self.conv3 = nn.Conv1d(512, 512, 3)
        self.conv4 = nn.Conv1d(512, 512, 3)
        self.conv5 = nn.Conv1d(512, 512, 3)
        self.attn1 = nn.Linear(512, 9)
        self.attn2 = nn.Linear(512, 7)
        self.attn3 = nn.Linear(512, 5)
        self.attn4 = nn.Linear(512, 3)
        self.dropout = nn.Dropout()
        # self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        
        # TODO: Fill in the model's layers here

    def forward(self, x):
        batch_size = x.shape[0]
        x= x.permute(0, 2, 1)
        embedded = F.relu(self.conv1(x)) # b, 512, 9
        attn_weights = F.softmax(self.attn1(embedded.permute(0, 2, 1)), dim=2) # b, 9, 9
        x = torch.bmm(embedded, attn_weights)

        embedded = self.dropout(F.relu(self.conv2(x)))
        attn_weights = F.softmax(self.attn2(embedded.permute(0, 2, 1)), dim=2) # b, 9, 9
        x = torch.bmm(embedded, attn_weights)

        x = self.dropout(F.relu(self.conv3(x)))

        x = self.dropout(F.relu(self.conv4(x)))

        x = self.dropout(F.relu(self.conv5(x)))
        x = self.linear2(x.reshape(batch_size, -1))
        # TODO: Fill in the forward pass here
        return x

# Instantiate the model, loss function, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MyModel()
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[15, 30], gamma=0.1)

def train_network(model, train_loader, criterion, optimizer, epoch):
    # TODO: fill in
    for i, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        if i % 1000 == 0:
            print('Epoch %d [%d / %d]: loss: %.4f' % (epoch, i, len(train_loader), loss.item()))

def test_network(model, test_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('Test accuracy: %d %%' % (100 * correct / total))



In [None]:
for epoch in range(1, 41):
    train_network(model, train_loader, criterion, optimizer, epoch)
    test_network(model, test_loader)
    scheduler.step()

Epoch 1 [0 / 5592]: loss: 5.5907
Epoch 1 [1000 / 5592]: loss: 3.4199
Epoch 1 [2000 / 5592]: loss: 2.4304
Epoch 1 [3000 / 5592]: loss: 2.7802
Epoch 1 [4000 / 5592]: loss: 4.1211
Epoch 1 [5000 / 5592]: loss: 3.0983
Test accuracy: 20 %
Epoch 2 [0 / 5592]: loss: 2.9204
Epoch 2 [1000 / 5592]: loss: 1.9015
Epoch 2 [2000 / 5592]: loss: 2.6517
Epoch 2 [3000 / 5592]: loss: 2.1312
Epoch 2 [4000 / 5592]: loss: 2.3306
Epoch 2 [5000 / 5592]: loss: 2.7180
Test accuracy: 31 %
Epoch 3 [0 / 5592]: loss: 1.7507
Epoch 3 [1000 / 5592]: loss: 2.2915
Epoch 3 [2000 / 5592]: loss: 2.3356
Epoch 3 [3000 / 5592]: loss: 2.9825
Epoch 3 [4000 / 5592]: loss: 1.9525
Epoch 3 [5000 / 5592]: loss: 2.2181
Test accuracy: 35 %
Epoch 4 [0 / 5592]: loss: 1.9252
Epoch 4 [1000 / 5592]: loss: 1.9226
Epoch 4 [2000 / 5592]: loss: 2.2926
Epoch 4 [3000 / 5592]: loss: 2.1448
Epoch 4 [4000 / 5592]: loss: 1.5731
Epoch 4 [5000 / 5592]: loss: 2.0863
Test accuracy: 39 %
Epoch 5 [0 / 5592]: loss: 1.7238
Epoch 5 [1000 / 5592]: loss: 1.4859

Epoch 21 [0 / 5592]: loss: 1.3522
Epoch 21 [1000 / 5592]: loss: 1.7280
Epoch 21 [2000 / 5592]: loss: 1.7000
Epoch 21 [3000 / 5592]: loss: 1.4558
Epoch 21 [4000 / 5592]: loss: 1.0594
Epoch 21 [5000 / 5592]: loss: 0.8152
Test accuracy: 56 %
Epoch 22 [0 / 5592]: loss: 2.6837
Epoch 22 [1000 / 5592]: loss: 1.6620
Epoch 22 [2000 / 5592]: loss: 1.2738
Epoch 22 [3000 / 5592]: loss: 0.8235
Epoch 22 [4000 / 5592]: loss: 0.8223
Epoch 22 [5000 / 5592]: loss: 0.4243
Test accuracy: 56 %
Epoch 23 [0 / 5592]: loss: 1.5869
Epoch 23 [1000 / 5592]: loss: 1.3389
Epoch 23 [2000 / 5592]: loss: 1.3605
Epoch 23 [3000 / 5592]: loss: 0.6173
Epoch 23 [4000 / 5592]: loss: 1.2221
Epoch 23 [5000 / 5592]: loss: 0.9207
Test accuracy: 56 %
Epoch 24 [0 / 5592]: loss: 0.6542
Epoch 24 [1000 / 5592]: loss: 1.3720
Epoch 24 [2000 / 5592]: loss: 1.2660
Epoch 24 [3000 / 5592]: loss: 0.7208
Epoch 24 [4000 / 5592]: loss: 1.6496
Epoch 24 [5000 / 5592]: loss: 1.4874
Test accuracy: 56 %
Epoch 25 [0 / 5592]: loss: 0.3880
Epoch 25 [