**Loading Data**

train_11.npy → training data (# of training frames, 11 x feature dim)
train_label_11.npy → framewise phoneme label (0-38)
test_11.npy → testing data (# of testing frames, 11 x feature dim)

In [8]:
import numpy as np

print('Loading data ...')

data_root="C:\\Users\\micha\\OneDrive\\桌面\\CODES\\Machine Learning\\HW2\\timit_11\\"
train = np.load(data_root + 'train_11.npy')
train_label = np.load(data_root + 'train_label_11.npy')
test = np.load(data_root + 'test_11.npy')

print('Size of training data: {}'.format(train.shape))
print('Size of testing data: {}'.format(test.shape))

Loading data ...
Size of training data: (1229932, 429)
Size of testing data: (451552, 429)


**Create DataSet**

In [3]:
import torch
from torch.utils.data import Dataset

class TIMITDataset(Dataset):
    def __init__(self, X, y=None):
        self.data = torch.from_numpy(X).float()
        if y is not None:
            y = y.astype(int)
            self.label = torch.LongTensor(y)
        else:
            self.label = None

    def __getitem__(self, idx):
        if self.label is not None:
            return self.data[idx], self.label[idx]
        else:
            return self.data[idx]

    def __len__(self):
        return len(self.data)


Spliy Train and Validation data

In [9]:
VAL_RATIO = 0.2

percent = int(train.shape[0] * (1 - VAL_RATIO))
train_x, train_y, val_x, val_y = train[:percent], train_label[:percent], train[percent:], train_label[percent:]
print('Size of training set: {}'.format(train_x.shape))
print('Size of validation set: {}'.format(val_x.shape))


Size of training set: (983945, 429)
Size of validation set: (245987, 429)


**Dataloader**

In [10]:
BATCH_SIZE = 64

from torch.utils.data import DataLoader

train_set = TIMITDataset(train_x, train_y)
val_set = TIMITDataset(val_x, val_y)
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True) #only shuffle the training data
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False) # Why shuffle the training data only

In [11]:
import gc

del train, train_label, train_x, train_y, val_x, val_y
gc.collect()

519

**Model**

In [12]:
import torch.nn as nn

class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.layer1 = nn.Linear(429, 1024)
        self.layer2 = nn.Linear(1024, 512)
        self.layer3 = nn.Linear(512, 128)
        self.out = nn.Linear(128, 39) 

        self.act_fn = nn.Sigmoid()

    def forward(self, x):
        x = self.layer1(x)
        x = self.act_fn(x)

        x = self.layer2(x)
        x = self.act_fn(x)

        x = self.layer3(x)
        x = self.act_fn(x)

        x = self.out(x)
        
        return x

**Training**

In [13]:
#check device
def get_device():
  return 'cuda' if torch.cuda.is_available() else 'cpu'

Reproducibility

In [14]:
# fix random seed
def same_seeds(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  
    np.random.seed(seed)  
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

In [16]:
import torch
print(torch.version.cuda)


None


In [17]:
# fix random seed for reproducibility
same_seeds(0)

# get device 
device = get_device()
print(f'DEVICE: {device}')

# training parameters
num_epoch = 20               # number of training epoch
learning_rate = 0.0001       # learning rate

# the path where checkpoint saved
model_path = './model.ckpt'

# create model, define a loss function, and optimizer
model = Classifier().to(device)
criterion = nn.CrossEntropyLoss() 
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

DEVICE: cpu


In [48]:
# start training

train_losses = []
valid_losses = []
trainacc = []
valacc = []
best_acc = 0.0
for epoch in range(num_epoch):
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    # training
    model.train() # set the model to training mode
    for i, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device) #Moves the data to the specified device (e.g., GPU).
        optimizer.zero_grad() # Clears the gradients of all optimized variables.
        outputs = model(inputs) # Forward pass to obtain model predictions.
        batch_loss = criterion(outputs, labels) #Computes the loss between model predictions and actual labels using the specified loss criterion.
        _, train_pred = torch.max(outputs, 1) # get the index of the class with the highest probability # max_values, indices = torch.max(outputs, 1)
        batch_loss.backward() #Backward pass to compute gradients.
        optimizer.step() #Updates the model parameters using the optimizer.

        train_acc += (train_pred.cpu() == labels.cpu()).sum().item()
        train_loss += batch_loss.item()

    # validation
    if len(val_set) > 0:
        model.eval() # set the model to evaluation mode
        with torch.no_grad():
            for i, data in enumerate(val_loader):
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                batch_loss = criterion(outputs, labels) 
                _, val_pred = torch.max(outputs, 1) 
            
                val_acc += (val_pred.cpu() == labels.cpu()).sum().item() # get the index of the class with the highest probability
                # val_pred.cpu() == labels.cpu() Performs element-wise comparison between the two tensors, resulting in a boolean tensor indicating whether each element is equal.
                # some operations (like tensor comparisons and sum) may not be supported when tensors are located on different devices (e.g., GPU and CPU). By calling .cpu(), you make sure that both tensors are on the CPU and can be used together.
                # .sum(): Computes the sum of the boolean tensor. True is treated as 1, and False is treated as 0.
                # .item(): Converts the sum to a Python scalar (integer). This is necessary because the result of the sum operation is a PyTorch tensor.
                val_loss += batch_loss.item()

            print('[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f} | Val Acc: {:3.6f} loss: {:3.6f}'.format(
                epoch + 1, num_epoch, train_acc/len(train_set), train_loss/len(train_loader), val_acc/len(val_set), val_loss/len(val_loader)
            ))
            train_losses.append(train_loss)
            valid_losses.append(val_loss)
            trainacc.append(train_acc)
            valacc.append(val_acc)

            # if the model improves, save a checkpoint at this epoch
            if val_acc > best_acc:
                best_acc = val_acc
                torch.save(model.state_dict(), model_path)
                print('saving model with acc {:.3f}'.format(best_acc/len(val_set)))
    else:
        print('[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f}'.format(
            epoch + 1, num_epoch, train_acc/len(train_set), train_loss/len(train_loader)
        ))

# if not validating, save the last epoch
if len(val_set) == 0:
    torch.save(model.state_dict(), model_path)
    print('saving model at last epoch')


[001/020] Train Acc: 0.467302 Loss: 1.811661 | Val Acc: 0.567428 loss: 1.433065
saving model with acc 0.567
[002/020] Train Acc: 0.594383 Loss: 1.330665 | Val Acc: 0.628639 loss: 1.211098
saving model with acc 0.629
[003/020] Train Acc: 0.644506 Loss: 1.154064 | Val Acc: 0.660421 loss: 1.101215
saving model with acc 0.660
[004/020] Train Acc: 0.672217 Loss: 1.052246 | Val Acc: 0.676300 loss: 1.038718
saving model with acc 0.676
[005/020] Train Acc: 0.691347 Loss: 0.983103 | Val Acc: 0.685154 loss: 1.001852
saving model with acc 0.685


KeyboardInterrupt: 

In [None]:
data = pd.DataFrame({
    'Epoch': range(1, len(train_losses) + 1),
    'Train_Loss': train_losses
    'Valid_Loss': valid_losses
    "Train_acc": trainacc
    'Val_acc': valacc    
})

data.to_csv('testing.csv', index=False)