In [45]:
import numpy as np
import librosa as lb
import pandas as pd
import random
import heapq
import time

from tqdm import tqdm
from sklearn.utils import shuffle
from sklearn.model_selection import KFold,StratifiedKFold
import os
import gc
import warnings
warnings.filterwarnings('ignore')

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, SequentialSampler
from torch.utils.tensorboard import SummaryWriter
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
torch.__version__

'1.6.0'

In [3]:
def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    
GLOBAL_SEED = 42
setup_seed(GLOBAL_SEED)

In [4]:
train_path = './data/train'
test_path = './data/test'
feat_path = './features'
res_path = './res'
model_save = './model_save'
tensorboard_path = './tb_run'
if not os.path.exists(model_save):
    os.makedirs(model_save)
if not os.path.exists(res_path):
    os.makedirs(res_path)
if not os.path.exists(tensorboard_path):
    os.makedirs(tensorboard_path)

## 读取提取好的特征数据

In [5]:
melspec = np.load(os.path.join(feat_path, 'melspec_256.npz'))
basic_feature = np.load(os.path.join(feat_path, 'basic_feature.npz'))

In [6]:
X = np.concatenate([melspec['X'], basic_feature['X']], axis=-1)
X_train = X[:57886] 
X_test = X[57886:]
y_train = melspec['y']
test_names = melspec['test_names']
labels = os.listdir(train_path)

In [7]:
del melspec, basic_feature
gc.collect()

134

## 准备数据集

In [8]:
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [9]:
BATCH_SIZE_TRAIN = 256
BATCH_SIZE_VAL = 256
BATCH_SIZE_TEST = 256
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=GLOBAL_SEED)
data_folds = []
valid_indexs = []    


test_dataset = CustomDataset(X_test, np.random.rand(X_test.shape[0], 1))
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE_TEST, sampler=SequentialSampler(test_dataset), shuffle=False, num_workers=0)

for idx, (train_index, valid_index) in enumerate(kf.split(X=X_train, y=y_train)):
    valid_indexs.append(valid_index)
    
    train_dataset = CustomDataset(X_train[train_index], y_train[train_index])
    val_dataset = CustomDataset(X_train[valid_index], y_train[valid_index])

    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE_TRAIN, shuffle=True, num_workers=0)
    valid_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE_VAL, sampler=SequentialSampler(val_dataset), shuffle=False, num_workers=0)
    data_folds.append((train_dataloader, valid_dataloader, test_dataloader))

## 搭建模型

In [10]:
class BiLSTM(nn.Module):
    def __init__(self, feature_dim, lstm_size, fc1, fc2, num_layers=1, rnn_dropout=0.2, embedding_dropout=0.2, fc_dropout=0.2):
        super().__init__()
        self.lstm = nn.LSTM(input_size = feature_dim, 
                          hidden_size = lstm_size, 
                          num_layers = num_layers,
                          bidirectional = True, 
                          batch_first = True, 
                          dropout = rnn_dropout) 
                                                  
        self.fc1 = nn.Linear(2*lstm_size, fc1)
        self.fc2 = nn.Linear(fc1, fc2)
        self.fc3 = nn.Linear(fc2, 30)
        
        self.rnn_dropout = nn.Dropout(rnn_dropout)
        self.embedding_dropout = nn.Dropout(embedding_dropout)
        self.fc_dropout = nn.Dropout(fc_dropout)
        
    def forward(self, X):
        batch_size, total_length, _= X.size()
        X = self.embedding_dropout(X)
        lstm_output, (h_n, c_n) = self.lstm(X)
        # lstm_output shape: (batchsize, total_length, 2*lstm_size)
        max_output = F.max_pool2d(lstm_output, (total_length, 1), stride=(1, 1)).squeeze()
        # output shape: (batchsize, 2*lstm_size)
        fc_out = F.relu(self.fc1(max_output))
        fc_out = self.fc_dropout(fc_out)
        fc_out = F.relu(self.fc2(fc_out))
        pred = self.fc3(fc_out)
        return pred

In [11]:
def validate(model, val_dataloader, criterion, history, n_iters):
    model.eval()
    costs = []
    accs = []
    with torch.no_grad():
        for idx, batch in enumerate(val_dataloader):
            X, y = batch
            X, y = X.cuda(), y.cuda().long()
            y_output = model(X)    
            loss = criterion(y_output, y)
            costs.append(loss.item())
            _, y_preds = torch.max(y_output, 1)
            accs.append((y_preds == y).float().mean().item())
    mean_accs = np.mean(accs)
    mean_costs = np.mean(costs)
    writer.add_scalar('age/validate_accuracy', mean_accs, n_iters)
    writer.add_scalar('age/validate_loss', mean_costs, n_iters)
    if mean_accs > history['best_model'][0][0]:  
        heapq.heapify(history['best_model'])
        checkpoint_pth = history['best_model'][0][1]
        heapq.heappushpop(history['best_model'], (mean_accs, checkpoint_pth))
        torch.save(model.state_dict(), checkpoint_pth)
    return mean_costs, mean_accs


def train(model, train_dataloader, val_dataloader, criterion, optimizer, epoch, history, validate_points, scheduler, step=True):
    model.train()
    costs = []
    accs = []
    val_loss, val_acc = 0, 0
    with tqdm(total=len(train_dataloader.dataset), desc='Epoch{}'.format(epoch)) as pbar:
        for idx, batch in enumerate(train_dataloader):
            X, y = batch
            X, y = X.cuda(), y.cuda().long()
            y_output = model(X)    
            loss = criterion(y_output, y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if step:
                scheduler.step()
            with torch.no_grad():
                costs.append(loss.item())
                _, y_preds = torch.max(y_output, 1)
                accs.append((y_preds == y).float().mean().item())
                pbar.update(y.size(0))
            n_iters = idx + len(train_dataloader) * (epoch-1)
            if idx in validate_points:
                val_loss, val_acc = validate(model, val_dataloader, criterion, history, n_iters)
                model.train()
            
            writer.add_scalar('age/train_accuracy', accs[-1], n_iters)
            writer.add_scalar('age/train_loss', costs[-1], n_iters)
            writer.add_scalar('age/learning_rate', scheduler.get_lr()[0], n_iters)
            pbar.set_postfix_str('loss:{:.4f}, acc:{:.4f}, val-loss:{:.4f}, val-acc:{:.4f}'.format(np.mean(costs[-10:]), np.mean(accs[-10:]), val_loss, val_acc))
            torch.cuda.empty_cache()

    
def test(oof_train_test, model, test_dataloader, val_dataloader, valid_index, weight=1):
    model.eval()
    y_preds = []
    y_preds_val = []
    with torch.no_grad():
        for idx, batch in enumerate(test_dataloader):
            X, _ = batch
            X= X.cuda()
            y_output = model(X)    
            y_preds.append(y_output.cpu())
            
        for idx, batch in enumerate(val_dataloader):
            X, y = batch
            X = X.cuda()
            y_output = model(X)
            y_preds_val.append(y_output.cpu())
    
    oof_train_test[valid_index] += F.softmax(torch.cat(y_preds_val)).numpy() * weight
    oof_train_test[57886:] += F.softmax(torch.cat(y_preds)).numpy() * weight

In [12]:
def criterion(y_output, y_true):
    loss = nn.CrossEntropyLoss()(y_output, y_true)
    return loss

res_folds = []
acc_folds = []
model_name = 'lstm_base'
best_checkpoint_num = 3
for idx, (train_dataloader, val_dataloader, test_dataloader) in enumerate(data_folds):
    oof_train_test = np.zeros((X_train.shape[0] + X_test.shape[0], 30))
    history = {'best_model': []}
    for i in range(best_checkpoint_num):
        history['best_model'].append((0, os.path.join(model_save, '{}_checkpoint_fold_{}_{}.pth'.format(model_name, idx, i))))
    validate_points = list(np.linspace(0, len(train_dataloader)-1, 3).astype(int))[1:]
    
    model = BiLSTM(feature_dim=324, lstm_size=512, fc1=512, fc2=256, num_layers=2, rnn_dropout=0.0, fc_dropout=0.2, embedding_dropout=0.0)  

    model = model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), lr=1e-3)
    epochs = 30
#     scheduler = None
#     scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=1)
    scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=1e-5, max_lr=3e-3, step_size_up=int(len(train_dataloader)/2), cycle_momentum=False, mode='triangular')
#     scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=3e-3, epochs=epochs, steps_per_epoch=len(train_dataloader), pct_start=0.2, anneal_strategy='linear', div_factor=30, final_div_factor=1e4)
    for epoch in range(1, epochs+1):
        writer = SummaryWriter(log_dir=os.path.join(tensorboard_path, '{}_fold_{}'.format(model_name, idx)))
        train(model, train_dataloader, val_dataloader, criterion, optimizer, epoch, history, validate_points, scheduler, step=True)
#         scheduler.step()
        gc.collect()
    for (acc, checkpoint_pth), weight in zip(sorted(history['best_model'], reverse=True), [0.5, 0.3, 0.2]):
        model.load_state_dict(torch.load(checkpoint_pth, map_location=torch.device('cpu')), strict=True)
        test(oof_train_test, model, test_dataloader, val_dataloader, valid_indexs[idx], weight=weight)
    acc_folds.append(sorted(history['best_model'], reverse=True)[0][0])
    res_folds.append(oof_train_test)
    np.save(os.path.join(res_path, "{}_fold_{}.npy".format(model_name, idx)), oof_train_test)
    del model, history
    gc.collect()
    torch.cuda.empty_cache()

Epoch1: 100%|████████| 46305/46305 [00:17<00:00, 2700.29it/s, loss:1.2725, acc:0.5989, val-loss:1.2068, val-acc:0.6232]
Epoch2: 100%|████████| 46305/46305 [00:15<00:00, 2911.47it/s, loss:0.7248, acc:0.7743, val-loss:0.6943, val-acc:0.7818]
Epoch3: 100%|████████| 46305/46305 [00:16<00:00, 2850.33it/s, loss:0.5933, acc:0.8082, val-loss:0.5623, val-acc:0.8234]
Epoch4: 100%|████████| 46305/46305 [00:16<00:00, 2882.69it/s, loss:0.5001, acc:0.8439, val-loss:0.5121, val-acc:0.8385]
Epoch5: 100%|████████| 46305/46305 [00:15<00:00, 2926.97it/s, loss:0.4874, acc:0.8394, val-loss:0.4641, val-acc:0.8547]
Epoch6: 100%|████████| 46305/46305 [00:15<00:00, 2912.29it/s, loss:0.4357, acc:0.8620, val-loss:0.4372, val-acc:0.8630]
Epoch7: 100%|████████| 46305/46305 [00:15<00:00, 2918.11it/s, loss:0.4224, acc:0.8618, val-loss:0.4076, val-acc:0.8673]
Epoch8: 100%|████████| 46305/46305 [00:15<00:00, 2916.24it/s, loss:0.3952, acc:0.8777, val-loss:0.3901, val-acc:0.8795]
Epoch9: 100%|████████| 46305/46305 [00:1

Epoch9: 100%|████████| 46315/46315 [00:15<00:00, 2968.04it/s, loss:0.3773, acc:0.8813, val-loss:0.4198, val-acc:0.8674]
Epoch10: 100%|███████| 46315/46315 [00:15<00:00, 2964.07it/s, loss:0.4045, acc:0.8727, val-loss:0.4211, val-acc:0.8723]
Epoch11: 100%|███████| 46315/46315 [00:15<00:00, 2963.26it/s, loss:0.3817, acc:0.8771, val-loss:0.4060, val-acc:0.8772]
Epoch12: 100%|███████| 46315/46315 [00:15<00:00, 2966.17it/s, loss:0.3203, acc:0.8994, val-loss:0.3994, val-acc:0.8802]
Epoch13: 100%|███████| 46315/46315 [00:15<00:00, 2965.09it/s, loss:0.3583, acc:0.8840, val-loss:0.4057, val-acc:0.8764]
Epoch14: 100%|███████| 46315/46315 [00:15<00:00, 2966.37it/s, loss:0.3234, acc:0.8985, val-loss:0.3685, val-acc:0.8865]
Epoch15: 100%|███████| 46315/46315 [00:15<00:00, 2974.78it/s, loss:0.3663, acc:0.8828, val-loss:0.3874, val-acc:0.8854]
Epoch16: 100%|███████| 46315/46315 [00:15<00:00, 2971.14it/s, loss:0.3303, acc:0.8946, val-loss:0.3706, val-acc:0.8883]
Epoch17: 100%|███████| 46315/46315 [00:1

In [14]:
res_folds

[array([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        ...,
        [6.22121911e-07, 2.32445025e-08, 2.27266271e-11, ...,
         7.62088483e-07, 1.06691051e-10, 4.07728932e-12],
        [1.55499918e-03, 1.36706105e-04, 4.98975908e-07, ...,
         3.14545230e-08, 4.30256684e-07, 2.36042192e-06],
        [5.60847566e-06, 6.33265705e-06, 3.21540896e-09, ...,
         8.49415604e-08, 5.57732705e-07, 1.20443138e-06]]),
 array([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [8.77333246e-02, 

In [16]:
res = []
for i in range(len(data_folds)):
    res.append(np.load(os.path.join(res_path, "{}_fold_{}.npy".format(model_name, i))))

In [34]:
sub = pd.DataFrame()
sub['file_name'] = test_names
sub['label'] = np.argmax(np.mean(res, axis=0)[57886:], axis=1)

In [42]:
sub['label'] = sub['label'].map({i:label for i, label in enumerate(labels)})

In [43]:
sub

Unnamed: 0,file_name,label
0,003gtit8kw.wav,one
1,006irl4pgx.wav,yes
2,007sh75o5w.wav,tree
3,009k6j5dbw.wav,tree
4,009lyahcx8.wav,marvin
...,...,...
6830,zyvkhzi7pt.wav,house
6831,zzbo90jvjj.wav,nine
6832,zzgk3zkfr8.wav,right
6833,zzqta071j9.wav,eight


In [46]:
now = time.strftime("%Y%m%d_%H%M%S",time.localtime(time.time())) 
fname="submit_" + now + ".csv"    
sub.to_csv(fname, index=False)