In [3]:
import pandas as pd
import numpy as np


train_df = pd.read_csv('./data/train.csv')
test_df = pd.read_csv('./data/sample_submission.csv')

In [4]:
train_df.head()

Unnamed: 0,fname,label,manually_verified
0,00044347.wav,Hi-hat,0
1,001ca53d.wav,Saxophone,1
2,002d256b.wav,Trumpet,0
3,0033e230.wav,Glockenspiel,1
4,00353774.wav,Cello,1


In [5]:
test_df.head()

Unnamed: 0,fname,label
0,00063640.wav,Laughter Hi-Hat Flute
1,0013a1db.wav,Laughter Hi-Hat Flute
2,002bb878.wav,Laughter Hi-Hat Flute
3,002d392d.wav,Laughter Hi-Hat Flute
4,00326aa9.wav,Laughter Hi-Hat Flute


In [6]:
train_df.fname.isin(test_df.fname).sum()

0

In [7]:
import os

train_f = os.listdir('./data/audio_train')
test_f = os.listdir('./data/audio_test')

len(train_df), train_df.fname.isin(train_f).sum()

(9473, 9473)

In [8]:
len(test_df), test_df.fname.isin(test_f).sum()

(9400, 9400)

In [9]:
train_fs = [(f, os.stat(f'./data/audio_train/{f}').st_size) for f in train_f]
sorted(train_fs, key=lambda x: x[1])[:3]

[('75923738.wav', 26504), ('6aaff359.wav', 28268), ('6a30339c.wav', 28268)]

In [10]:
# size 44 bytes - header size(empty wav)
test_fs = [(f, os.stat(f'./data/audio_test/{f}').st_size) for f in test_f]
sorted(test_fs, key=lambda x: x[1])[:5]

[('b39975f5.wav', 44),
 ('6ea0099f.wav', 44),
 ('0b0427e2.wav', 44),
 ('ab02c784.wav', 26504),
 ('d593ed57.wav', 26504)]

In [11]:
# step 1.
!python -W ignore trim_data.py

Train ...
New dir, ./data/audio_train_trim
  0%|                                                  | 0/9473 [00:00<?, ?it/s]

In [None]:
# step 2.
!python -W ignore extract_features.py

In [None]:
import os
print('train:', len(os.listdir('./data/audio_train_logmel/')))
print('test:', len(os.listdir('./data/audio_test_logmel/')))

In [None]:
# check parameters
from v2.config import DefaultConfig
print(DefaultConfig().logmel)

In [None]:
# example of padding
import os
import numpy as np
files = os.listdir('data/audio_train_logmel')
for f in files[:10]:
    x = np.load(os.path.join('data/audio_train_logmel', f))
    x = np.pad(x, ((0, 0), (0, 0), (0, max(0, 150 - x.shape[2]))), mode='constant')
    print(x.shape)

In [None]:
# testing dataloaders
!python dataloader.py

In [None]:
# testing dataloaders
from v2.config import DefaultConfig
from v2.dataloader import stratified_loaders
for i, dl in enumerate(stratified_loaders(DefaultConfig()), 1):
    print(f'fold {i}')
    x, y = next(iter(dl['train']))
    print(x.size(), y.size())
    x, y = next(iter(dl['val']))
    print(x.size(), y.size())
    x = next(iter(dl['test']))
    print(x.size())

In [None]:
# check parameters
from v2.config import DefaultConfig
print(DefaultConfig().dataloader)

In [None]:
# check model in/out
!python model.py

In [None]:
from v2.config import DefaultConfig
conf = DefaultConfig()
d = conf.to_dict()
print(d['train']['optim']['optimizer']['sgd'])
print(d['train']['optim']['scheduler']['cosine_annealing'])

In [1]:
!python -W ignore runner.py

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name  | Type           | Params
-----------------------------------------
0 | model | ResNetFinetune | 23 M  
Epoch 1:  80%|██▍| 60/75 [00:12<00:03,  4.73it/s, loss=191468.438, v_num=fold_1]
Validating: 0it [00:00, ?it/s][A
Epoch 1:  81%|██▍| 61/75 [00:12<00:02,  4.70it/s, loss=191468.438, v_num=fold_1][A
Epoch 1:  84%|██▌| 63/75 [00:13<00:02,  4.80it/s, loss=191468.438, v_num=fold_1][A
Epoch 1:  87%|██▌| 65/75 [00:13<00:02,  4.90it/s, loss=191468.438, v_num=fold_1][A
Epoch 1:  89%|██▋| 67/75 [00:13<00:01,  4.98it/s, loss=191468.438, v_num=fold_1][A
Epoch 1:  92%|██▊| 69/75 [00:13<00:01,  5.07it/s, loss=191468.438, v_num=fold_1][A
Epoch 1:  95%|██▊| 71/75 [00:13<00:00,  5.16it/s, loss=191468.438, v_num=fold_1][A
Epoch 1:  97%|██▉| 73/75 [00:13<00:00,  5.25it/s, loss=191468.438, v_num=fold_1][A
Epoch 1: 100%|███| 75/75 [00:14<00:0

In [None]:
# test model accuracy from checkpoints
from v2.utils import accuracy
from v2.runner import Model
# testing dataloaders
from v2.config import DefaultConfig
from v2.dataloader import stratified_loaders
conf = DefaultConfig()
model = Model.load_from_checkpoint('tb_logs/resnext101_32x4d/fold_1/epoch=27-val_acc1=0.8070.ckpt')
model.to(conf.train.device)
model.freeze()

for i, dl in enumerate(stratified_loaders(conf), 1):
    print(f'fold {i}')
    x, y = next(iter(dl['train']))
    y_out = model(x.to(conf.train.device))
    print(accuracy(y_out.cpu(), y.cpu()))
    print(x.size(), y.size())
    x, y = next(iter(dl['val']))
    y_out = model(x.to(conf.train.device))
    print(accuracy(y_out.cpu(), y.cpu()))
    print(x.size(), y.size())
    x = next(iter(dl['test']))
    print(x.size())

In [None]:
# test model accuracy from checkpoints
import numpy as np
from v2.utils import accuracy
from v2.runner import Model
# testing dataloaders
from v2.config import DefaultConfig
from v2.dataloader import stratified_loaders
conf = DefaultConfig()
model = Model.load_from_checkpoint('tb_logs/resnext101_32x4d/fold_1/epoch=27-val_acc1=0.8070.ckpt')
model.to(conf.train.device)
model.freeze()

preds = []
for i, dl in enumerate(stratified_loaders(conf), 1):
    for x in dl['test']:
        y_pred = model(x.to(conf.train.device)).softmax(1)
        preds.append(y_pred.cpu().numpy())
        print(y_pred.shape)
    break
preds = np.concatenate(preds, axis=0)

In [1]:
!python -W ignore submit.py

Result saved as sbm.csv


In [1]:
!cp sbm.csv sbm1.csv

In [5]:
# check augmentation
import torch
from v2.misc import augment
augment['train']['logmel'](torch.rand(3, 64, 150)).size()

torch.Size([3, 64, 300])