Note:
optimization freedom:
(1) Batch Size
(2) Model
(3) num of epoch
(4) learning rate 

**Loading Data**

train_11.npy → training data (# of training frames, 11 x feature dim)
train_label_11.npy → framewise phoneme label (0-38)
test_11.npy → testing data (# of testing frames, 11 x feature dim)

In [19]:
import numpy as np

print('Loading data ...')

data_root="C:\\Users\\Michael\\Desktop\\PYTHON\\Machine Learning\\HW2\\timit_11\\"
train = np.load(data_root + 'train_11.npy')
train_label = np.load(data_root + 'train_label_11.npy')
test = np.load(data_root + 'test_11.npy')

print('Size of training data: {}'.format(train.shape))
print('Size of testing data: {}'.format(test.shape))

Loading data ...
Size of training data: (1229932, 429)
Size of testing data: (451552, 429)


**Create DataSet**

In [20]:
import torch
from torch.utils.data import Dataset

class TIMITDataset(Dataset):
    def __init__(self, X, y=None):
        self.data = torch.from_numpy(X).float()
        if y is not None:
            y = y.astype(int)
            self.label = torch.LongTensor(y)
        else:
            self.label = None

    def __getitem__(self, idx):
        if self.label is not None:
            return self.data[idx], self.label[idx]
        else:
            return self.data[idx]

    def __len__(self):
        return len(self.data)


Spliy Train and Validation data

In [21]:
VAL_RATIO = 0.2

percent = int(train.shape[0] * (1 - VAL_RATIO))
train_x, train_y, val_x, val_y = train[:percent], train_label[:percent], train[percent:], train_label[percent:]
print('Size of training set: {}'.format(train_x.shape))
print('Size of validation set: {}'.format(val_x.shape))

Size of training set: (983945, 429)
Size of validation set: (245987, 429)


**Dataloader**

In [22]:
BATCH_SIZE = 2048

from torch.utils.data import DataLoader

train_set = TIMITDataset(train_x, train_y)
val_set = TIMITDataset(val_x, val_y)
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True) #only shuffle the training data
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False) # Why shuffle the training data only

In [23]:
import gc

del train, train_label, train_x, train_y, val_x, val_y
gc.collect()

580

**Model**

In [24]:
import torch
import torch.nn as nn

class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(429, 2048), # 1
            nn.LeakyReLU(),
            nn.BatchNorm1d(2048),
            nn.Dropout(0.5),
            nn.Linear(2048, 2048), # 2
            nn.LeakyReLU(),
            nn.BatchNorm1d(2048),
            nn.Dropout(0.5),
            nn.Linear(2048, 2048), # 2
            nn.LeakyReLU(),
            nn.BatchNorm1d(2048),
            nn.Dropout(0.5),
            nn.Linear(2048,1024), # 3
            nn.LeakyReLU(),
            nn.BatchNorm1d(1024),
            nn.Dropout(0.5),
            nn.Linear(1024, 512), # 4
            nn.LeakyReLU(),
            nn.BatchNorm1d(512),
            nn.Dropout(0.5),
            nn.Linear(512, 256), # 5
            nn.LeakyReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.5),
            nn.Linear(256, 39)
        )

    def forward(self, x):
        x = self.net(x)
        return x

**Training**

In [25]:
#check device
def get_device():
  return 'cuda' if torch.cuda.is_available() else 'cpu'

Reproducibility

In [26]:
# fix random seed
def same_seeds(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  
    np.random.seed(seed)  
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

In [27]:
# fix random seed for reproducibility
same_seeds(0)

# get device 
device = get_device()
print(f'DEVICE: {device}')

# training parameters
num_epoch = 250             # number of training epoch
learning_rate = 1e-4       # learning rate
l2 = 1e-4

# the path where checkpoint saved
model_path = './model.ckpt'

# create model, define a loss function, and optimizer
model = Classifier().to(device)
criterion = nn.CrossEntropyLoss() 
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=l2)

DEVICE: cuda


In [28]:
# start training

train_losses = []
valid_losses = []
trainacc = []
valacc = []
best_acc = 0.0
for epoch in range(num_epoch):
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    # training
    model.train() # set the model to training mode
    for i, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device) #Moves the data to the specified device (e.g., GPU).
        optimizer.zero_grad() # Clears the gradients of all optimized variables.
        outputs = model(inputs) # Forward pass to obtain model predictions.
        batch_loss = criterion(outputs, labels) #Computes the loss between model predictions and actual labels using the specified loss criterion.
        _, train_pred = torch.max(outputs, 1) # get the index of the class with the highest probability # max_values, indices = torch.max(outputs, 1)
        batch_loss.backward() #Backward pass to compute gradients.
        optimizer.step() #Updates the model parameters using the optimizer.

        train_acc += (train_pred.cpu() == labels.cpu()).sum().item()
        train_loss += batch_loss.item()

    # validation
    if len(val_set) > 0:
        model.eval() # set the model to evaluation mode
        with torch.no_grad():
            for i, data in enumerate(val_loader):
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                batch_loss = criterion(outputs, labels) 
                _, val_pred = torch.max(outputs, 1) 
            
                val_acc += (val_pred.cpu() == labels.cpu()).sum().item() # get the index of the class with the highest probability
                # val_pred.cpu() == labels.cpu() Performs element-wise comparison between the two tensors, resulting in a boolean tensor indicating whether each element is equal.
                # some operations (like tensor comparisons and sum) may not be supported when tensors are located on different devices (e.g., GPU and CPU). By calling .cpu(), you make sure that both tensors are on the CPU and can be used together.
                # .sum(): Computes the sum of the boolean tensor. True is treated as 1, and False is treated as 0.
                # .item(): Converts the sum to a Python scalar (integer). This is necessary because the result of the sum operation is a PyTorch tensor.
                val_loss += batch_loss.item()

            print('[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f} | Val Acc: {:3.6f} loss: {:3.6f}'.format(
                epoch + 1, num_epoch, train_acc/len(train_set), train_loss/len(train_loader), val_acc/len(val_set), val_loss/len(val_loader)
            ))
            train_losses.append(train_loss/len(train_loader))
            valid_losses.append(val_loss/len(val_loader))
            trainacc.append(train_acc/len(train_set))
            valacc.append(val_acc/len(val_set))

            # if the model improves, save a checkpoint at this epoch
            if val_acc > best_acc:
                best_acc = val_acc
                torch.save(model.state_dict(), model_path)
                print('saving model with acc {:.3f}'.format(best_acc/len(val_set)))
    else:
        print('[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f}'.format(
            epoch + 1, num_epoch, train_acc/len(train_set), train_loss/len(train_loader)
        ))

# if not validating, save the last epoch
if len(val_set) == 0:
    torch.save(model.state_dict(), model_path)
    print('saving model at last epoch')


[001/250] Train Acc: 0.391858 Loss: 2.202476 | Val Acc: 0.553806 loss: 1.497888
saving model with acc 0.554
[002/250] Train Acc: 0.527697 Loss: 1.604127 | Val Acc: 0.620273 loss: 1.249955
saving model with acc 0.620
[003/250] Train Acc: 0.570423 Loss: 1.440136 | Val Acc: 0.651681 loss: 1.137280
saving model with acc 0.652
[004/250] Train Acc: 0.596523 Loss: 1.343338 | Val Acc: 0.667641 loss: 1.072795
saving model with acc 0.668
[005/250] Train Acc: 0.613453 Loss: 1.279361 | Val Acc: 0.680760 loss: 1.026321
saving model with acc 0.681
[006/250] Train Acc: 0.628411 Loss: 1.227591 | Val Acc: 0.690463 loss: 0.991680
saving model with acc 0.690
[007/250] Train Acc: 0.637939 Loss: 1.188015 | Val Acc: 0.697663 loss: 0.963799
saving model with acc 0.698
[008/250] Train Acc: 0.646942 Loss: 1.155512 | Val Acc: 0.703261 loss: 0.941507
saving model with acc 0.703
[009/250] Train Acc: 0.654709 Loss: 1.127497 | Val Acc: 0.708375 loss: 0.922601
saving model with acc 0.708
[010/250] Train Acc: 0.66136

In [29]:
import pandas as pd

data = pd.DataFrame({
    'Epoch': range(1, len(train_losses) + 1),
    'Train_Loss': train_losses,
    'Valid_Loss': valid_losses,
    'Train_acc': trainacc,
    'Val_acc': valacc
})

data.to_csv('final.csv', index=False)


In [30]:
# create testing dataset
test_set = TIMITDataset(test, None)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False)

# create model and load weights from checkpoint
model = Classifier().to(device)
model.load_state_dict(torch.load(model_path))

<All keys matched successfully>

In [31]:
predict = []
model.eval() # set the model to evaluation mode
with torch.no_grad():
    for i, data in enumerate(test_loader):
        inputs = data
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, test_pred = torch.max(outputs, 1) # get the index of the class with the highest probability

        for y in test_pred.cpu().numpy():
            predict.append(y)

In [32]:
with open('prediction.csv', 'w') as f:
    f.write('Id,Class\n')
    for i, y in enumerate(predict):
        f.write('{},{}\n'.format(i, y))