In [4]:

# PyTorch
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# For data preprocess
import numpy as np
import csv
import os

# Utility
import gc

my_seed = 0
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(my_seed)
torch.manual_seed(my_seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(my_seed)

print("Loading data ...")


train_data = np.load("train_11.npy")
train_label = np.load("train_label_11.npy")
test_data = np.load("test_11.npy")

print("Size of training data: {}".format(train_data.shape))
print("Size of testing data: {}".format(test_data.shape))

# 統計每個類別的數量
print("Total number {:d}".format(train_label.shape[0]))

train_cnt = np.zeros((39), dtype=int)
for i in range(39):
    train_cnt[i] = np.sum(train_label == str(i))

sum = np.sum(train_cnt)
print("\n   class   count    rate")
for i in range(39):
    print("{:8d}".format(i), end='')
    print("{:8d}".format(train_cnt[i]), end='')
    print("  {:.4f}".format(train_cnt[i] / sum))


class TIMITDataset(Dataset):
    def __init__(self, x, y=None):
        self.x = torch.from_numpy(x).float()
        if y is not None:
            y = y.astype(np.int_)
            self.y = torch.LongTensor(y)
        else:
            self.y = None

    def __getitem__(self, index):
        if self.y is not None:
            return self.x[index], self.y[index]
        else:
            return self.x[index]

    def __len__(self):
        return len(self.x)


# 超參數
valid_rate = 0.01
num_epoch = 100
learning_rate = 0.0001
weight_decay_l1 = 0.0
weight_decay_l2 = 0.001
batch_size = 2048  # 原本設64，跑起來更慢，且沒辦法代表分佈
number = 10000  # 每個類別最少補到一萬筆data

model_path = "./model.ckpt"

# 打亂分佈，隨機選取training data跟valid data
train_indices, valid_indices = train_test_split([i for i in range(train_data.shape[0])], test_size=valid_rate,
                                                random_state=1)
train_x = train_data[train_indices, :]
train_y = train_label[train_indices]
valid_x = train_data[valid_indices, :]
valid_y = train_label[valid_indices]

# 紀錄每個類的id，方便sample
train_class = []
id = np.arange(train_x.shape[0])
for i in range(39):
    train_class.append(id[train_y == str(i)])

del train_data, train_label
gc.collect()


class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()

        self.layer1 = nn.Linear(429, 2048)
        self.layer2 = nn.Linear(2048, 2048)
        self.layer3 = nn.Linear(2048, 2048)
        self.layer4 = nn.Linear(2048, 1024)
        self.layer5 = nn.Linear(1024, 512)
        self.layer6 = nn.Linear(512, 128)

        self.bn1 = nn.BatchNorm1d(2048)
        self.bn2 = nn.BatchNorm1d(2048)
        self.bn3 = nn.BatchNorm1d(2048)
        self.bn4 = nn.BatchNorm1d(1024)
        self.bn5 = nn.BatchNorm1d(512)
        self.bn6 = nn.BatchNorm1d(128)

        self.out = nn.Linear(128, 39)

        self.drop = nn.Dropout(0.5)
        self.act_fn = nn.ReLU()

    def forward(self, x):
        x = self.layer1(x)
        x = self.act_fn(x)
        x = self.bn1(x)
        x = self.drop(x)

        x = self.layer2(x)
        x = self.act_fn(x)
        x = self.bn2(x)
        x = self.drop(x)

        x = self.layer3(x)
        x = self.act_fn(x)
        x = self.bn3(x)
        x = self.drop(x)

        x = self.layer4(x)
        x = self.act_fn(x)
        x = self.bn4(x)
        x = self.drop(x)

        x = self.layer5(x)
        x = self.act_fn(x)
        x = self.bn5(x)
        x = self.drop(x)

        x = self.layer6(x)
        x = self.act_fn(x)
        x = self.bn6(x)
        x = self.drop(x)

        x = self.out(x)

        return x

# check device
def get_device():
    return "cuda" if torch.cuda.is_available() else "cpu"


def cal_regularization(model, weight_decay_l1, weight_decay_l2):
    l1 = 0
    l2 = 0
    for i in model.parameters():
        l1 += torch.sum(abs(i))
        l2 += torch.sum(torch.pow(i, 2))
    return weight_decay_l1 * l1 + weight_decay_l2 * l2


def train_model(num_epoch, learning_rate, weight_decay_l1, weight_decay_l2,
                train_dataset, train_dataloader,
                valid_dataset, valid_dataloader):
    model = Classifier().to(device)
    criterion = nn.CrossEntropyLoss()

    best_acc = 0.0
    for epoch in range(num_epoch):
        # 前面使用adam，收斂快，後面使用SGDM，穩定且偏差小
        if epoch == 0:
            optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        elif epoch == 35:
            optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

        train_acc = 0.0
        train_loss = 0.0
        val_acc = 0.0
        val_loss = 0.0

        # training
        model.train()  # set the model to training mode
        for i, data in enumerate(train_dataloader):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)

            batch_loss = criterion(outputs, labels)
            _, train_pred = torch.max(outputs, 1)  # get the index of the class with the highest probability
            (batch_loss + cal_regularization(model, weight_decay_l1, weight_decay_l2)).backward()

            optimizer.step()

            train_acc += (train_pred.cpu() == labels.cpu()).sum().item()
            train_loss += batch_loss.item()

        # validation
        if len(valid_dataset) > 0:
            model.eval()  # set the model to evaluation mode
            with torch.no_grad():
                for i, data in enumerate(valid_dataloader):
                    inputs, labels = data
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs)
                    batch_loss = criterion(outputs, labels)
                    _, val_pred = torch.max(outputs, 1)

                    val_acc += (
                                val_pred.cpu() == labels.cpu()).sum().item()  # get the index of the class with the highest probability
                    val_loss += batch_loss.item()

                print("[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f} | Val Acc: {:3.6f} loss: {:3.6f}".format(
                    epoch + 1, num_epoch, train_acc / len(train_dataset), train_loss / len(train_dataloader),
                    val_acc / len(valid_dataset), val_loss / len(valid_dataloader)
                ))

                # if the model improves, save a checkpoint at this epoch
                if val_acc > best_acc:
                    best_acc = val_acc
                    torch.save(model.state_dict(), model_path)
                    print("saving model with acc {:.3f}".format(best_acc / len(valid_dataset)))
        else:
            print("[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f}".format(
                epoch + 1, num_epoch, train_acc / len(train_dataset), train_loss / len(train_dataloaders)
            ))

    # if not validating, save the last epoch
    if len(valid_dataset) == 0:
        torch.save(model.state_dict(), model_path)
        print("saving model at last epoch")


# get device
device = get_device()
print(f"DEVICE: {device}")

valid_dataset = TIMITDataset(valid_x, valid_y)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

del valid_x, valid_y
gc.collect()

print("Sample data:")
print("\n   class   count")
for i in range(len(train_class)):
    if (train_class[i].shape[0] < number):
        print("{:8d}".format(i), end='')
        print("{:8d}".format(number - train_class[i].shape[0]))

        id = np.random.choice(train_class[i], size=number - train_class[i].shape[0])
        train_x = np.vstack((train_x, train_x[id]))
        label = np.empty((id.shape[0]), dtype=int)
        train_y = np.append(train_y, label)
        train_y[-id.shape[0]:] = int(i)

print("\n", train_x.shape, train_y.shape)
train_dataset = TIMITDataset(train_x, train_y)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

train_model(num_epoch, learning_rate, weight_decay_l1, weight_decay_l2, train_dataset, train_dataloader, valid_dataset,
            valid_dataloader)

del train_x, train_y, train_dataset, train_dataloader, valid_dataset, valid_dataloader
gc.collect()



Loading data ...
Size of training data: (1229932, 429)
Size of testing data: (451552, 429)
Total number 1229932

   class   count    rate
       0   62708  0.0510
       1   83746  0.0681
       2   35048  0.0285
       3   59031  0.0480
       4   38930  0.0317
       5   26380  0.0214
       6    4038  0.0033
       7   73827  0.0600
       8   28797  0.0234
       9   34289  0.0279
      10   11028  0.0090
      11   11711  0.0095
      12   26790  0.0218
      13   43410  0.0353
      14   39583  0.0322
      15   11342  0.0092
      16   20922  0.0170
      17   51533  0.0419
      18   24938  0.0203
      19   47059  0.0383
      20    8508  0.0069
      21    7083  0.0058
      22    7050  0.0057
      23   10663  0.0087
      24    3883  0.0032
      25    8219  0.0067
      26    7825  0.0064
      27    6059  0.0049
      28   11492  0.0093
      29   21012  0.0171
      30   25094  0.0204
      31   31618  0.0257
      32   12003  0.0098
      33   22907  0.0186
      34    

KeyboardInterrupt: 

In [7]:
test_dataset = TIMITDataset(test_data, None)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
model = Classifier().to(device)
model.load_state_dict(torch.load(model_path))


predict = []
model.eval()

with torch.no_grad():
    for i, data in enumerate(test_dataloader):
        inputs = data
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, test_pred = torch.max(outputs, 1) 

        for y in test_pred.cpu().numpy():
            predict.append(y)
     

with open("prediction.csv", 'w') as f:
    f.write("Id,Class\n")
    for i, y in enumerate(predict):
        f.write("{},{}\n".format(i, y))
print('done')

done
