In [1]:
import numpy as np
import librosa as lb
import pandas as pd
import random
import heapq
import time

from tqdm import tqdm
from sklearn.utils import shuffle
from sklearn.model_selection import KFold,StratifiedKFold
import os
import gc
import warnings
warnings.filterwarnings('ignore')

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, SequentialSampler
from torch.utils.tensorboard import SummaryWriter
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
torch.__version__

'1.6.0'

In [3]:
def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    
GLOBAL_SEED = 2020
setup_seed(GLOBAL_SEED)

In [4]:
train_path = './data/train'
test_path = './data/test'
feat_path = './features'
res_path = './res'
model_save = './model_save'
tensorboard_path = './tb_run'
if not os.path.exists(model_save):
    os.makedirs(model_save)
if not os.path.exists(res_path):
    os.makedirs(res_path)
if not os.path.exists(tensorboard_path):
    os.makedirs(tensorboard_path)

## 读取提取好的特征数据

In [10]:
melspec = np.load(os.path.join(feat_path, 'melspec_256.npz'))
basic_feature = np.load(os.path.join(feat_path, 'basic_feature.npz'))

In [11]:
X = np.concatenate([melspec['X'], basic_feature['X']], axis=-1)
X_train = X[:57886] 
X_test = X[57886:]
y_train = melspec['y']
test_names = melspec['test_names']
labels = os.listdir(train_path)

In [12]:
X_train = X_train[:, np.newaxis, :, :]
X_test = X_test[:, np.newaxis, :, :]

In [13]:
del melspec, basic_feature
gc.collect()

158

## 准备数据集

In [9]:
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [10]:
BATCH_SIZE_TRAIN = 128
BATCH_SIZE_VAL = 128
BATCH_SIZE_TEST = 128
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=GLOBAL_SEED)
data_folds = []
valid_indexs = []    


test_dataset = CustomDataset(X_test, np.random.rand(X_test.shape[0], 1))
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE_TEST, sampler=SequentialSampler(test_dataset), shuffle=False, num_workers=0)

for idx, (train_index, valid_index) in enumerate(kf.split(X=X_train, y=y_train)):
    valid_indexs.append(valid_index)
    
    train_dataset = CustomDataset(X_train[train_index], y_train[train_index])
    val_dataset = CustomDataset(X_train[valid_index], y_train[valid_index])

    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE_TRAIN, shuffle=True, num_workers=0)
    valid_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE_VAL, sampler=SequentialSampler(val_dataset), shuffle=False, num_workers=0)
    data_folds.append((train_dataloader, valid_dataloader, test_dataloader))

## 搭建模型

In [11]:
class CNNBase(nn.Module):
    def __init__(self):
        super().__init__()
        self.main = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=(3, 9), stride=(1, 1), padding=(1, 1)),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
            nn.Conv2d(64, 64, kernel_size=(3, 9), stride=(1, 1), padding=(1, 1)),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
            nn.Conv2d(64, 128, kernel_size=(3, 9), stride=(1, 1), padding=(1, 1)),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=(3, 9), stride=(1, 1), padding=(1, 1)),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
            nn.Flatten(),
            nn.Linear(4096, 2048),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2),
            nn.Linear(2048, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, 30)
        )
        
    def forward(self, X):
        return self.main(X)

In [12]:
def validate(model, val_dataloader, criterion, history, n_iters):
    model.eval()
    costs = []
    accs = []
    with torch.no_grad():
        for idx, batch in enumerate(val_dataloader):
            X, y = batch
            X, y = X.cuda(), y.cuda().long()
            y_output = model(X)    
            loss = criterion(y_output, y)
            costs.append(loss.item())
            _, y_preds = torch.max(y_output, 1)
            accs.append((y_preds == y).float().mean().item())
    mean_accs = np.mean(accs)
    mean_costs = np.mean(costs)
    writer.add_scalar('age/validate_accuracy', mean_accs, n_iters)
    writer.add_scalar('age/validate_loss', mean_costs, n_iters)
    if mean_accs > history['best_model'][0][0]:  
        heapq.heapify(history['best_model'])
        checkpoint_pth = history['best_model'][0][1]
        heapq.heappushpop(history['best_model'], (mean_accs, checkpoint_pth))
        torch.save(model.state_dict(), checkpoint_pth)
    return mean_costs, mean_accs


def train(model, train_dataloader, val_dataloader, criterion, optimizer, epoch, history, validate_points, scheduler, step=True):
    model.train()
    costs = []
    accs = []
    val_loss, val_acc = 0, 0
    with tqdm(total=len(train_dataloader.dataset), desc='Epoch{}'.format(epoch)) as pbar:
        for idx, batch in enumerate(train_dataloader):
            X, y = batch
            X, y = X.cuda(), y.cuda().long()
            y_output = model(X)    
            loss = criterion(y_output, y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if step:
                scheduler.step()
            with torch.no_grad():
                costs.append(loss.item())
                _, y_preds = torch.max(y_output, 1)
                accs.append((y_preds == y).float().mean().item())
                pbar.update(y.size(0))
            n_iters = idx + len(train_dataloader) * (epoch-1)
            if idx in validate_points:
                val_loss, val_acc = validate(model, val_dataloader, criterion, history, n_iters)
                model.train()
            
            writer.add_scalar('age/train_accuracy', accs[-1], n_iters)
            writer.add_scalar('age/train_loss', costs[-1], n_iters)
            writer.add_scalar('age/learning_rate', scheduler.get_lr()[0], n_iters)
            pbar.set_postfix_str('loss:{:.4f}, acc:{:.4f}, val-loss:{:.4f}, val-acc:{:.4f}'.format(np.mean(costs[-10:]), np.mean(accs[-10:]), val_loss, val_acc))
            torch.cuda.empty_cache()

    
def test(oof_train_test, model, test_dataloader, val_dataloader, valid_index, weight=1):
    model.eval()
    y_preds = []
    y_preds_val = []
    with torch.no_grad():
        for idx, batch in enumerate(test_dataloader):
            X, _ = batch
            X= X.cuda()
            y_output = model(X)    
            y_preds.append(y_output.cpu())
            
        for idx, batch in enumerate(val_dataloader):
            X, y = batch
            X = X.cuda()
            y_output = model(X)
            y_preds_val.append(y_output.cpu())
    
    oof_train_test[valid_index] += F.softmax(torch.cat(y_preds_val)).numpy() * weight
    oof_train_test[57886:] += F.softmax(torch.cat(y_preds)).numpy() * weight

In [13]:
def criterion(y_output, y_true):
    loss = nn.CrossEntropyLoss()(y_output, y_true)
    return loss

res_folds = []
acc_folds = []
model_name = 'cnn_base'
best_checkpoint_num = 3
for idx, (train_dataloader, val_dataloader, test_dataloader) in enumerate(data_folds):
    oof_train_test = np.zeros((X_train.shape[0] + X_test.shape[0], 30))
    history = {'best_model': []}
    for i in range(best_checkpoint_num):
        history['best_model'].append((0, os.path.join(model_save, '{}_checkpoint_fold_{}_{}.pth'.format(model_name, idx, i))))
    validate_points = list(np.linspace(0, len(train_dataloader)-1, 3).astype(int))[1:]
    
    model = CNNBase()  

    model = model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), lr=1e-3)
    epochs = 15
#     scheduler = None
#     scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=1)
    scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=1e-5, max_lr=2e-3, step_size_up=int(len(train_dataloader)/2), cycle_momentum=False, mode='triangular')
#     scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=3e-3, epochs=epochs, steps_per_epoch=len(train_dataloader), pct_start=0.2, anneal_strategy='linear', div_factor=30, final_div_factor=1e4)
    for epoch in range(1, epochs+1):
        writer = SummaryWriter(log_dir=os.path.join(tensorboard_path, '{}_fold_{}'.format(model_name, idx)))
        train(model, train_dataloader, val_dataloader, criterion, optimizer, epoch, history, validate_points, scheduler, step=True)
#         scheduler.step()
        gc.collect()
    for (acc, checkpoint_pth), weight in zip(sorted(history['best_model'], reverse=True), [0.5, 0.3, 0.2]):
        model.load_state_dict(torch.load(checkpoint_pth, map_location=torch.device('cpu')), strict=True)
        test(oof_train_test, model, test_dataloader, val_dataloader, valid_indexs[idx], weight=weight)
    acc_folds.append(sorted(history['best_model'], reverse=True)[0][0])
    res_folds.append(oof_train_test)
    np.save(os.path.join(res_path, "{}_fold_{}.npy".format(model_name, idx)), oof_train_test)
    del model, history
    gc.collect()
    torch.cuda.empty_cache()

Epoch1: 100%|████████| 46296/46296 [00:43<00:00, 1070.90it/s, loss:0.9068, acc:0.7247, val-loss:0.8721, val-acc:0.7365]
Epoch2: 100%|████████| 46296/46296 [00:41<00:00, 1122.20it/s, loss:0.5202, acc:0.8571, val-loss:0.4802, val-acc:0.8543]
Epoch3: 100%|████████| 46296/46296 [00:41<00:00, 1108.57it/s, loss:0.4154, acc:0.8798, val-loss:0.3842, val-acc:0.8855]
Epoch4: 100%|████████| 46296/46296 [00:41<00:00, 1105.73it/s, loss:0.3278, acc:0.8871, val-loss:0.3514, val-acc:0.8925]
Epoch5: 100%|████████| 46296/46296 [00:42<00:00, 1079.02it/s, loss:0.3143, acc:0.9104, val-loss:0.3340, val-acc:0.9003]
Epoch6: 100%|████████| 46296/46296 [00:42<00:00, 1095.12it/s, loss:0.2498, acc:0.9232, val-loss:0.3432, val-acc:0.8991]
Epoch7: 100%|████████| 46296/46296 [00:41<00:00, 1124.13it/s, loss:0.2298, acc:0.9339, val-loss:0.3151, val-acc:0.9097]
Epoch8: 100%|████████| 46296/46296 [00:41<00:00, 1124.06it/s, loss:0.1781, acc:0.9470, val-loss:0.3232, val-acc:0.9078]
Epoch9: 100%|████████| 46296/46296 [00:4

In [14]:
res_folds

[array([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        ...,
        [3.89421654e-08, 2.17634833e-07, 1.34784787e-11, ...,
         3.35284531e-13, 7.51938564e-12, 6.43016290e-12],
        [5.20235454e-09, 9.30683011e-08, 1.91447927e-15, ...,
         3.71508911e-16, 1.25467160e-12, 2.81640965e-10],
        [4.44232854e-09, 1.46632332e-07, 1.86014994e-12, ...,
         9.43859178e-13, 1.48233233e-11, 2.05418063e-08]]),
 array([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 

In [6]:
res = []
for i in range(5):
    res.append(np.load(os.path.join(res_path, "{}_fold_{}.npy".format('cnn_base', i))))

In [7]:
for i in range(5):
    res.append(np.load(os.path.join(res_path, "{}_fold_{}.npy".format('lstm_base', i))))

In [14]:
sub = pd.DataFrame()
sub['file_name'] = test_names
sub['label'] = np.argmax(np.mean(res, axis=0)[57886:], axis=1)

In [15]:
sub['label'] = sub['label'].map({i:label for i, label in enumerate(labels)})

In [16]:
sub

Unnamed: 0,file_name,label
0,003gtit8kw.wav,one
1,006irl4pgx.wav,yes
2,007sh75o5w.wav,tree
3,009k6j5dbw.wav,three
4,009lyahcx8.wav,marvin
...,...,...
6830,zyvkhzi7pt.wav,house
6831,zzbo90jvjj.wav,nine
6832,zzgk3zkfr8.wav,right
6833,zzqta071j9.wav,three


In [17]:
now = time.strftime("%Y%m%d_%H%M%S",time.localtime(time.time())) 
fname="submit_" + now + ".csv"    
sub.to_csv(fname, index=False)