In [17]:
# pip install pysoundfile
def load_data():
    import os
    import torchaudio
    
    # 0是数据 1是采样率
    # print(torchaudio.load('data/superb/0_11.wav')[0].shape)
    # print(torchaudio.load('data/superb/0_11.wav'))
    
    xs = []
    ys = []
    
    for filename in os.listdir('data/superb'):
        if filename.endswith('.wav'):
            x, sr = torchaudio.load('data/superb/' + filename)
            xs.append(x.transpose(0, 1))
            ys.append(int(filename[0]))
            
    return xs, ys
    
    
xs, ys = load_data()
len(load_data()[0]), load_data()[0][0].shape
len(xs), len(ys), xs[0].shape, ys[0]

(23000, 23000, torch.Size([4000, 1]), 0)

In [19]:
import  torch
class Dataset(torch.utils.data.Dataset):
    def __init__(self, xs, ys):
        self.xs = xs
        self.ys = ys
        
    def __len__(self):
        return len(self.xs)
    
    def __getitem__(self, index):
        return self.xs[index], self.ys[index]
    
dataset = Dataset(xs, ys)
dataset[0][0].shape

torch.Size([4000, 1])

In [47]:
loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True,drop_last=True)

len(loader), next(iter(loader))[0].shape

(718, torch.Size([32, 4000, 1]))

In [48]:
class Model(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # 32 4000 1 => 32 4000 16
        self.rnn1 = torch.nn.RNN(1, 16, batch_first=True)
        # 32 4000 2 => 32 799 16
        # output_size = (4000 + 2 *padding - kernel_size)/stride + 1= (4000 + 2 * 0 - 7)/5 + 1 = 799
        self.pool = torch.nn.AvgPool1d(kernel_size=7, stride=5)
        # 32 799 16 => 32 799 32
        self.rnn2 = torch.nn.RNN(16, 32, batch_first=True)
        # 799 - 7)/5 + 1 = 159
        # 32 799 32 => 32 159 32
        # 32 159 32 => 32 159 64
        self.rnn3 = torch.nn.RNN(32, 64, batch_first=True)
        # (159 - 7)/5 + 1 = 31
        # 32 159 64 => 32 31 64
        # 32 31 64 => 32 31 128
        self.rnn4 = torch.nn.RNN(64, 128, batch_first=True)
        # (31 - 7)/5 + 1 = 5
        # 32 31 128 => 32 5 128
        # 32 5 128 => 32 640
        # 32 640 => 32 10
        self.fc = torch.nn.Linear(640, 10)
        self.relu = torch.nn.ReLU()
        
    def forward(self, x):
        # 32 4000 1 => 32 4000 16
        x, _ = self.rnn1(x)
        x = self.relu(x)
        # 32 4000 16 => 32 799 16
        x = self.pool(x.transpose(1, 2)).transpose(1, 2)
        # 32 799 16 => 32 799 32
        x, _ = self.rnn2(x)
        x = self.relu(x)
        # 32 799 32 => 32 159 32
        x = self.pool(x.transpose(1, 2)).transpose(1, 2)
        # 32 159 32 => 32 159 64
        x, _ = self.rnn3(x)
        x = self.relu(x)
        # 32 159 64 => 32 31 64
        x = self.pool(x.transpose(1, 2)).transpose(1, 2)
        # 32 31 64 => 32 31 128
        x, _ = self.rnn4(x)
        x = self.relu(x)
        # 32 31 128 => 32 5 128
        x = self.pool(x.transpose(1, 2)).transpose(1, 2)
        # 32 5 128 => 32 640
        # x = x.reshape(x.shape[0], -1)
        x = x.flatten(start_dim=1)
        # 32 640 => 32 10
        x = self.fc(x)
        return x
    
model = Model()
model(torch.randn(32, 4000, 1)).shape

torch.Size([32, 10])

In [49]:
def train():
    driver = torch.device('cuda')
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = torch.nn.CrossEntropyLoss()
    model.to(driver)
    
    
    for epoch in range(10):
        for i, (x, y) in enumerate(loader):
            x = x.to(driver)
            y = y.to(driver)
            out = model(x)
            loss = criterion(out, y)

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            if i % 1000 == 0:
                acc = (out.argmax(dim=1) == y).sum().item() / len(y)
                print(epoch, i, loss.item(), acc)

    torch.save(model, 'model/6.model')
    
train()

0 0 2.298471689224243 0.125
1 0 1.5890699625015259 0.46875
2 0 1.1918052434921265 0.53125
3 0 0.8085414171218872 0.78125
4 0 0.9860033392906189 0.65625
5 0 0.7360809445381165 0.8125
6 0 0.6946264505386353 0.6875
7 0 0.910082221031189 0.5
8 0 0.47498610615730286 0.84375
9 0 0.494581401348114 0.75


In [50]:
@torch.no_grad()
def test():
    driver = torch.device('cuda')
    model = torch.load('model/6.model')
    model.to(driver)
    model.eval()

    correct = 0
    total = 0
    for i in range(100):
        x, y = next(iter(loader))
        x = x.to(driver)
        y = y.to(driver)

        out = model(x).argmax(dim=1)

        correct += (out == y).sum().item()
        total += len(y)

    print(correct / total)


test()

0.7409375
