In [1]:
import logging
from os.path import join as path_join
from time import time

from joblib import Parallel
from torchvision.transforms import Compose, Resize

from config import ROOT_DIR
from dataset import MfccDataset
from transforms import *

seq_len = 224

trans = Compose([ToTensor(),
                 PaddingSame2d(seq_len=seq_len, value=0)])

start = time()

train_set = MfccDataset(
    root=path_join(ROOT_DIR, "相同文本300"), train=True, transform=trans, n_jobs=-1)

val_set = MfccDataset(
    root=path_join(ROOT_DIR, "不同文本100"), transform=trans, n_jobs=-1)

test_set = MfccDataset(
    root=path_join(ROOT_DIR, "相同文本300"), train=False, transform=trans, n_jobs=-1)

print('total time:', time()-start)

print(train_set)
print(val_set)
print(test_set)

total time: 277.5007576942444
A mfcc dataset with length 3607
A mfcc dataset with length 2395
A mfcc dataset with length 872


**多线程读取大约能节省100s**

In [2]:
from collections import OrderedDict

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.utils.rnn as rnn_utils
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader

%matplotlib inline

torch.manual_seed(17377191)


class Flatten(nn.Module):
    """Note that batch_size is the first dimension"""

    def forward(self, x):
        return x.view(x.size(0), -1)  # [batch, seq_len*input_size]


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=13, stride=13),
            #             nn.MaxPool2d(kernel_size=2),
            nn.ReLU(),
            #             nn.Dropout(0.1),
            Flatten(),
            nn.Linear(136, 6),

            nn.Softmax()
        )

    def forward(self, x):
        x = x.unsqueeze(dim=1)
        x = self.cnn(x)
        return x

# the training loop


def main(model):
    # hyper params
    T = 20
    learning_rate = 5e-5
    batch_size = 32
    epochs = 100
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(f"Now using {device}")

    train_loader = DataLoader(dataset=train_set+val_set, num_workers=4,
                              shuffle=True, batch_size=batch_size)
    val_loader = DataLoader(dataset=val_set, num_workers=4,
                            shuffle=True, batch_size=batch_size)
    test_loader = DataLoader(dataset=test_set, num_workers=4,
                             shuffle=True, batch_size=batch_size)

    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    model.to(device)

    # training
    for epoch in range(epochs):  # loop over the dataset multiple times
        print(f"epoch {epoch}")
        running_loss = 0.0
        correct = 0
        total = 0

        for i, (x, y) in enumerate(train_loader, start=0):
            # get the inputs; data is a list of [inputs, labels]
            inputs = Variable(x).type(torch.cuda.FloatTensor)
            labels = Variable(y).type(torch.cuda.LongTensor)
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = loss_func(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

            if i % T == 0:    # print every T mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / T))
                running_loss = 0.0

        with torch.no_grad():
            for x, y in test_loader:
                test_x = x.type(torch.cuda.FloatTensor)
                labels = y.type(torch.cuda.LongTensor)
                outputs = model(test_x)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        acc = correct / total
        print('Accuracy : %d %%' % (100 * acc))
#         plt.plot(epoch, acc)
        print()

    print('Finished Training')


if __name__ == "__main__":
    cnn = CNN()
    main(cnn)

Now using cuda:0
epoch 0
[1,     1] loss: 0.097
[1,    21] loss: 1.879
[1,    41] loss: 1.875
[1,    61] loss: 1.846
[1,    81] loss: 1.851
[1,   101] loss: 1.867
[1,   121] loss: 1.865
[1,   141] loss: 1.835
[1,   161] loss: 1.841
[1,   181] loss: 1.867
Accuracy : 18 %

epoch 1
[2,     1] loss: 0.096
[2,    21] loss: 1.853
[2,    41] loss: 1.831
[2,    61] loss: 1.842
[2,    81] loss: 1.867
[2,   101] loss: 1.827
[2,   121] loss: 1.830
[2,   141] loss: 1.825
[2,   161] loss: 1.834
[2,   181] loss: 1.829
Accuracy : 22 %

epoch 2
[3,     1] loss: 0.088
[3,    21] loss: 1.827
[3,    41] loss: 1.831
[3,    61] loss: 1.815
[3,    81] loss: 1.813
[3,   101] loss: 1.819
[3,   121] loss: 1.823
[3,   141] loss: 1.820
[3,   161] loss: 1.798
[3,   181] loss: 1.827
Accuracy : 24 %

epoch 3
[4,     1] loss: 0.090
[4,    21] loss: 1.819
[4,    41] loss: 1.787
[4,    61] loss: 1.823
[4,    81] loss: 1.796
[4,   101] loss: 1.788
[4,   121] loss: 1.819
[4,   141] loss: 1.775
[4,   161] loss: 1.794
[4,

[32,    81] loss: 1.633
[32,   101] loss: 1.629
[32,   121] loss: 1.662
[32,   141] loss: 1.637
[32,   161] loss: 1.677
[32,   181] loss: 1.661
Accuracy : 44 %

epoch 32
[33,     1] loss: 0.081
[33,    21] loss: 1.663
[33,    41] loss: 1.679
[33,    61] loss: 1.654
[33,    81] loss: 1.665
[33,   101] loss: 1.637
[33,   121] loss: 1.642
[33,   141] loss: 1.660
[33,   161] loss: 1.633
[33,   181] loss: 1.650
Accuracy : 44 %

epoch 33
[34,     1] loss: 0.082
[34,    21] loss: 1.643
[34,    41] loss: 1.648
[34,    61] loss: 1.650
[34,    81] loss: 1.662
[34,   101] loss: 1.643
[34,   121] loss: 1.645
[34,   141] loss: 1.696
[34,   161] loss: 1.648
[34,   181] loss: 1.627
Accuracy : 44 %

epoch 34
[35,     1] loss: 0.079
[35,    21] loss: 1.660
[35,    41] loss: 1.645
[35,    61] loss: 1.626
[35,    81] loss: 1.648
[35,   101] loss: 1.668
[35,   121] loss: 1.621
[35,   141] loss: 1.654
[35,   161] loss: 1.665
[35,   181] loss: 1.671
Accuracy : 45 %

epoch 35
[36,     1] loss: 0.092
[36,    

[63,    81] loss: 1.592
[63,   101] loss: 1.594
[63,   121] loss: 1.562
[63,   141] loss: 1.587
[63,   161] loss: 1.572
[63,   181] loss: 1.568
Accuracy : 49 %

epoch 63
[64,     1] loss: 0.079
[64,    21] loss: 1.570
[64,    41] loss: 1.559
[64,    61] loss: 1.574
[64,    81] loss: 1.563
[64,   101] loss: 1.579
[64,   121] loss: 1.578
[64,   141] loss: 1.564
[64,   161] loss: 1.562
[64,   181] loss: 1.592
Accuracy : 50 %

epoch 64
[65,     1] loss: 0.076
[65,    21] loss: 1.593
[65,    41] loss: 1.570
[65,    61] loss: 1.588
[65,    81] loss: 1.560
[65,   101] loss: 1.569
[65,   121] loss: 1.575
[65,   141] loss: 1.557
[65,   161] loss: 1.562
[65,   181] loss: 1.563
Accuracy : 49 %

epoch 65
[66,     1] loss: 0.076
[66,    21] loss: 1.549
[66,    41] loss: 1.558
[66,    61] loss: 1.572
[66,    81] loss: 1.566
[66,   101] loss: 1.564
[66,   121] loss: 1.561
[66,   141] loss: 1.581
[66,   161] loss: 1.588
[66,   181] loss: 1.548
Accuracy : 50 %

epoch 66
[67,     1] loss: 0.073
[67,    

[94,    81] loss: 1.563
[94,   101] loss: 1.514
[94,   121] loss: 1.508
[94,   141] loss: 1.535
[94,   161] loss: 1.494
[94,   181] loss: 1.484
Accuracy : 53 %

epoch 94
[95,     1] loss: 0.066
[95,    21] loss: 1.511
[95,    41] loss: 1.518
[95,    61] loss: 1.491
[95,    81] loss: 1.532
[95,   101] loss: 1.512
[95,   121] loss: 1.547
[95,   141] loss: 1.532
[95,   161] loss: 1.519
[95,   181] loss: 1.503
Accuracy : 53 %

epoch 95
[96,     1] loss: 0.075
[96,    21] loss: 1.519
[96,    41] loss: 1.524
[96,    61] loss: 1.505
[96,    81] loss: 1.529
[96,   101] loss: 1.482
[96,   121] loss: 1.530
[96,   141] loss: 1.512
[96,   161] loss: 1.529
[96,   181] loss: 1.538
Accuracy : 53 %

epoch 96
[97,     1] loss: 0.078
[97,    21] loss: 1.487
[97,    41] loss: 1.513
[97,    61] loss: 1.532
[97,    81] loss: 1.535
[97,   101] loss: 1.507
[97,   121] loss: 1.513
[97,   141] loss: 1.509
[97,   161] loss: 1.518
[97,   181] loss: 1.512
Accuracy : 53 %

epoch 97
[98,     1] loss: 0.069
[98,    

In [3]:
model_path = "./pickles/cnn.pkl"
torch.save(cnn.state_dict(), model_path)
print('Saved at', model_path)

Saved at ./pickles/cnn.pkl


In [7]:
cnn.state_dict().get('cnn.0.weight').shape

torch.Size([8, 1, 13, 13])

In [19]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=13, stride=13),
            #             nn.MaxPool2d(kernel_size=2),
            nn.ReLU(),
            #             nn.Dropout(0.1),
            Flatten(),
            nn.Linear(136, 6),

            nn.Softmax()
        )

    def forward(self, x):
        x = x.unsqueeze(dim=1)
#         x = self.cnn(x)
        for layer in self.cnn:
            x = layer(x)
            print(x.shape)
        return x


test_cnn = CNN()
test_cnn(torch.Tensor(13, 224).unsqueeze(dim=0))

torch.Size([1, 8, 1, 17])
torch.Size([1, 8, 1, 17])
torch.Size([1, 136])
torch.Size([1, 6])
torch.Size([1, 6])


tensor([[nan, nan, nan, nan, nan, nan]], grad_fn=<SoftmaxBackward>)

In [14]:
train_loader = iter(DataLoader(dataset=train_set+val_set, num_workers=4,
                               shuffle=True, batch_size=1))
next(train_loader)[0].shape

torch.Size([1, 13, 224])

In [21]:
torch.Tensor(13, 224).unsqueeze(dim=0).shape

torch.Size([1, 13, 224])