In [1]:
from os.path import join as path_join
from time import time

from torchvision.transforms import Compose

from config import ROOT_DIR
from dataset import MfccDataset
from transforms import PaddingSame2d, ToTensor

seq_len = 224

trans = Compose([ToTensor(),
                 PaddingSame2d(seq_len=seq_len, value=0)])

start = time()

train_set = MfccDataset(
    root=path_join(ROOT_DIR, "相同文本300"), train=True, transform=trans)

val_set = MfccDataset(
    root=path_join(ROOT_DIR, "不同文本100"), transform=trans)

test_set = MfccDataset(
    root=path_join(ROOT_DIR, "相同文本300"), train=False, transform=trans)

print('total time:', time()-start)

total time: 356.72507309913635


In [11]:
from collections import OrderedDict

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.utils.rnn as rnn_utils
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt
%matplotlib inline


class Flatten(nn.Module):
    """Note that batch_size is the first dimension"""

    def forward(self, x):
        return x.view(x.size(0), -1)  # [batch, seq_len*input_size]


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=13, stride=13),
            #             nn.MaxPool2d(kernel_size=2),
            nn.ReLU(),
            #             nn.Dropout(0.1),
            Flatten(),
            nn.Linear(136, 6),

            nn.Softmax()
        )

    def forward(self, x):
        x = x.unsqueeze(dim=1)
        x = self.cnn(x)
        return x


class LSTM(nn.Module):
    def __init__(self):
        super(LSTM, self).__init__()


# def collate_fn(batch):
#     batch.sort(key=lambda x: len(x[0]), reverse=True)
#     data = list(zip(*batch))
#     mfccs, labels = data[0], data[1]
#     mfccs = rnn_utils.pad_sequence(mfccs, batch_first=True, padding_value=0)
#     return mfccs.unsqueeze(-1), labels


def main(model):
    # hyper params
    T = 20
    learning_rate = 5e-5
    batch_size = 32
    epochs = 100
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)

    train_loader = DataLoader(dataset=train_set+val_set, num_workers=4,
                              shuffle=True, batch_size=batch_size)
    val_loader = DataLoader(dataset=val_set, num_workers=4,
                            shuffle=True, batch_size=batch_size)
    test_loader = DataLoader(dataset=test_set, num_workers=4,
                             shuffle=True, batch_size=batch_size)

    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    model.to(device)

    # training
    for epoch in range(epochs):  # loop over the dataset multiple times
        print(f"epoch {epoch}")
        running_loss = 0.0
        correct = 0
        total = 0

        for i, (x, y) in enumerate(train_loader, start=0):
            # get the inputs; data is a list of [inputs, labels]
            inputs = Variable(x).type(torch.cuda.FloatTensor)
            labels = Variable(y).type(torch.cuda.LongTensor)
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = loss_func(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

            if i % T == 0:    # print every T mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / T))
                running_loss = 0.0

        with torch.no_grad():
            for x, y in test_loader:
                test_x = x.type(torch.cuda.FloatTensor)
                labels = y.type(torch.cuda.LongTensor)
                outputs = model(test_x)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        acc = correct / total
        print('Accuracy : %d %%' % (100 * acc))
#         plt.plot(epoch, acc)
        print()

    print('Finished Training')


if __name__ == "__main__":
    cnn = CNN()
    main(cnn)

cuda:0
epoch 0
[1,     1] loss: 0.095
[1,    21] loss: 1.852
[1,    41] loss: 1.859
[1,    61] loss: 1.876
[1,    81] loss: 1.854
[1,   101] loss: 1.836
[1,   121] loss: 1.863
[1,   141] loss: 1.849
[1,   161] loss: 1.865
[1,   181] loss: 1.856
Accuracy : 17 %

epoch 1
[2,     1] loss: 0.088
[2,    21] loss: 1.832
[2,    41] loss: 1.843
[2,    61] loss: 1.850
[2,    81] loss: 1.849
[2,   101] loss: 1.833
[2,   121] loss: 1.844
[2,   141] loss: 1.857
[2,   161] loss: 1.825
[2,   181] loss: 1.850
Accuracy : 19 %

epoch 2
[3,     1] loss: 0.091
[3,    21] loss: 1.834
[3,    41] loss: 1.820
[3,    61] loss: 1.851
[3,    81] loss: 1.851
[3,   101] loss: 1.825
[3,   121] loss: 1.840
[3,   141] loss: 1.808
[3,   161] loss: 1.842
[3,   181] loss: 1.834
Accuracy : 19 %

epoch 3
[4,     1] loss: 0.088
[4,    21] loss: 1.844
[4,    41] loss: 1.804
[4,    61] loss: 1.826
[4,    81] loss: 1.833
[4,   101] loss: 1.841
[4,   121] loss: 1.846
[4,   141] loss: 1.811
[4,   161] loss: 1.811
[4,   181] lo

[32,    81] loss: 1.604
[32,   101] loss: 1.595
[32,   121] loss: 1.600
[32,   141] loss: 1.613
[32,   161] loss: 1.634
[32,   181] loss: 1.605
Accuracy : 44 %

epoch 32
[33,     1] loss: 0.084
[33,    21] loss: 1.569
[33,    41] loss: 1.611
[33,    61] loss: 1.616
[33,    81] loss: 1.599
[33,   101] loss: 1.585
[33,   121] loss: 1.622
[33,   141] loss: 1.590
[33,   161] loss: 1.641
[33,   181] loss: 1.623
Accuracy : 44 %

epoch 33
[34,     1] loss: 0.079
[34,    21] loss: 1.587
[34,    41] loss: 1.589
[34,    61] loss: 1.610
[34,    81] loss: 1.611
[34,   101] loss: 1.613
[34,   121] loss: 1.599
[34,   141] loss: 1.584
[34,   161] loss: 1.581
[34,   181] loss: 1.608
Accuracy : 45 %

epoch 34
[35,     1] loss: 0.085
[35,    21] loss: 1.597
[35,    41] loss: 1.627
[35,    61] loss: 1.592
[35,    81] loss: 1.591
[35,   101] loss: 1.590
[35,   121] loss: 1.604
[35,   141] loss: 1.601
[35,   161] loss: 1.568
[35,   181] loss: 1.579
Accuracy : 47 %

epoch 35
[36,     1] loss: 0.076
[36,    

[63,    81] loss: 1.475
[63,   101] loss: 1.479
[63,   121] loss: 1.503
[63,   141] loss: 1.499
[63,   161] loss: 1.545
[63,   181] loss: 1.503
Accuracy : 53 %

epoch 63
[64,     1] loss: 0.073
[64,    21] loss: 1.502
[64,    41] loss: 1.504
[64,    61] loss: 1.500
[64,    81] loss: 1.489
[64,   101] loss: 1.533
[64,   121] loss: 1.483
[64,   141] loss: 1.513
[64,   161] loss: 1.504
[64,   181] loss: 1.521
Accuracy : 54 %

epoch 64
[65,     1] loss: 0.074
[65,    21] loss: 1.499
[65,    41] loss: 1.481
[65,    61] loss: 1.482
[65,    81] loss: 1.526
[65,   101] loss: 1.518
[65,   121] loss: 1.519
[65,   141] loss: 1.507
[65,   161] loss: 1.498
[65,   181] loss: 1.509
Accuracy : 54 %

epoch 65
[66,     1] loss: 0.070
[66,    21] loss: 1.499
[66,    41] loss: 1.529
[66,    61] loss: 1.511
[66,    81] loss: 1.510
[66,   101] loss: 1.504
[66,   121] loss: 1.499
[66,   141] loss: 1.470
[66,   161] loss: 1.501
[66,   181] loss: 1.499
Accuracy : 54 %

epoch 66
[67,     1] loss: 0.068
[67,    

[94,    81] loss: 1.481
[94,   101] loss: 1.458
[94,   121] loss: 1.452
[94,   141] loss: 1.453
[94,   161] loss: 1.482
[94,   181] loss: 1.463
Accuracy : 56 %

epoch 94
[95,     1] loss: 0.073
[95,    21] loss: 1.477
[95,    41] loss: 1.470
[95,    61] loss: 1.448
[95,    81] loss: 1.450
[95,   101] loss: 1.452
[95,   121] loss: 1.458
[95,   141] loss: 1.485
[95,   161] loss: 1.491
[95,   181] loss: 1.422
Accuracy : 56 %

epoch 95
[96,     1] loss: 0.077
[96,    21] loss: 1.484
[96,    41] loss: 1.444
[96,    61] loss: 1.458
[96,    81] loss: 1.451
[96,   101] loss: 1.441
[96,   121] loss: 1.461
[96,   141] loss: 1.440
[96,   161] loss: 1.475
[96,   181] loss: 1.491
Accuracy : 57 %

epoch 96
[97,     1] loss: 0.072
[97,    21] loss: 1.441
[97,    41] loss: 1.472
[97,    61] loss: 1.460
[97,    81] loss: 1.481
[97,   101] loss: 1.452
[97,   121] loss: 1.480
[97,   141] loss: 1.449
[97,   161] loss: 1.488
[97,   181] loss: 1.446
Accuracy : 58 %

epoch 97
[98,     1] loss: 0.076
[98,    

<Figure size 432x288 with 0 Axes>

In [36]:
import torch
import torch.nn as nn


class Flatten(nn.Module):
    """Note that batch_size is the first dimension"""

    def forward(self, x):
        return x.view(x.size(0), -1)  # [batch, seq_len*input_size]


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=13, stride=13),
            #             nn.MaxPool2d(kernel_size=2),
            nn.ReLU(),
            #             nn.Dropout(0.1),
            Flatten(),
            nn.Linear(136, 6),

            nn.Softmax()
        )

    def forward(self, x):
        x = x.unsqueeze(dim=1)
        for layer in self.cnn:
            x = layer(x)
            print(x.shape)
        return x


t = torch.Tensor(2, 13, 224)

cnn = CNN()
cnn(t)

torch.Size([2, 8, 1, 17])
torch.Size([2, 8, 1, 17])
torch.Size([2, 136])
torch.Size([2, 6])
torch.Size([2, 6])


tensor([[1., 0., 0., 0., 0., 0.],
        [nan, nan, nan, nan, nan, nan]], grad_fn=<SoftmaxBackward>)

In [7]:
len(train_set + val_set), len(test_set)

(6002, 872)