In [6]:
%pip install -r ../../requirements.txt

In [7]:
import torchaudio

In [8]:
import timm

In [111]:
import json
import sys
from types import SimpleNamespace
from glob import glob
import ntpath

import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn import metrics
import seaborn as sns
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader

sys.path.append('../../src')

from custom.data import AudioDataset, DataModule
from custom.trainer import TrainModule
from custom.net import SimpleCNN
from custom.utils import batch_to_device, get_state_dict, get_min_max, min_max_norm, Mixup, Compose, OneOf, MaskFrequency, MaskTime
from custom.eval import inference_k_random, error_analysis


# determinism
# seed = 1337
# torch.manual_seed(seed)
# np.random.seed(seed)
# random.seed(seed)
# torch.cuda.manual_seed(seed)
# torch.cuda.manual_seed_all(seed)
%load_ext autoreload
%autoreload 2

full_path = '../../'

In [17]:
cfg = SimpleNamespace()

cfg.wav_crop_len = 6

cfg.data_path = f'{full_path}/data/production_data/{str(cfg.wav_crop_len).replace(".", "-")}s_crop/'
cfg.n_classes = 66
cfg.pretrained = True
cfg.backbone = 'tf_efficientnetv2_s.in21k'
cfg.in_chans = 1
cfg.num_workers = 4
cfg.include_val = False
cfg.max_amp = False

# Training Hyperparameters
cfg.n_epochs = 5
cfg.lr = 0.000251
cfg.weight_decay = 1e-4
cfg.label_smoothing = 0.1
cfg.batch_size = 32
cfg.sample_rate = 44100

# Mel Spectogram Hyperparameters
cfg.n_mels = 128
cfg.n_fft = 2048
cfg.fmin = 300
cfg.fmax = cfg.sample_rate / 2 
cfg.window_size = cfg.n_fft
cfg.hop_length = int(cfg.n_fft / 2)
cfg.power = 2
cfg.top_db = 80.0

# Norm
cfg.mel_normalized = False
cfg.minmax_norm = True

# Augmentation Parameters
cfg.impulse_prob = 0.2
cfg.noise_prob = 0.2

cfg.max_noise = 0.04
cfg.min_snr = 5
cfg.max_snr = 20

if cfg.minmax_norm:
    cfg.min, cfg.max = get_min_max(cfg, DataModule, SimpleCNN)
cfg.exp_name = f'mel_normalized_4.5'

In [11]:
val_df = pd.read_csv(f"../../data/metadata.csv")
val_df = val_df#[val_df['subset']=='validation']
val_df['path'] = val_df['path'].apply(lambda x: f'{full_path}/{x}')
test_data = '../../data/production_data/6s_crop/val/'

In [None]:
exp_path = f'effnet_baseline/15_crop-35e_noiseAug_weighted2/'
label_path = f'{full_path}/data/labels.json'
state_dict_path = f"{exp_path}/checkpoints/last.ckpt"


In [95]:

with open(label_path, 'r') as infile:
    data = json.load(infile)

torch.cuda.empty_cache()
device = "cuda" if torch.cuda.is_available() else 'cpu'
sd = get_state_dict(state_dict_path)
model = model.eval().to(device)
model.load_state_dict(sd)

preds = []
for i in tqdm(range(1), leave=False):
    name = ntpath.basename(val_df.iloc[i]['path'][:-4]) 
    data = glob(f'{test_data}/{name}*.wav')
    df = pd.DataFrame(data, columns=['path'])
    pred_ds = AudioDataset(df, mode='test', cfg=cfg)
    pred_dl = DataLoader(pred_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=cfg.num_workers)

    df = pd.DataFrame(data, columns=['path'])
    with torch.no_grad():
        _preds = []
        for batch in tqdm(pred_dl):
            batch = batch_to_device(batch, device)
            with torch.cuda.amp.autocast():
                out = model(batch['wave'])
                _preds += [out.cpu().numpy()]
        _preds = np.vstack(_preds)
        
        preds.append(_preds)
preds = np.array(preds)
torch.cuda.empty_cache()

In [123]:
data

In [122]:
from IPython.display import Audio # for listening to our insects
Audio(data[1])

In [110]:
from eda_utils import plot_waveform
plot_waveform(val_df['path'].values[0])
plot_waveform(data[0])
plot_waveform(data[1])
plot_waveform(data[2])

In [60]:
preds[0].argmax(axis=-1)

In [79]:
fpreds = [p.mean(axis=0).argmax() for p in preds]

In [81]:
report = (metrics.classification_report(pred_df['label'].values , fpreds , digits=3,  output_dict=True))
evaluation = pd.DataFrame(report).transpose()
evaluation

In [112]:
pred_ds=AudioDataset(val_df.iloc[[0]], mode='test', cfg=cfg)
pred_dl = DataLoader(pred_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=4)

with open(label_path, 'r') as infile:
    data = json.load(infile)


sd = get_state_dict(state_dict_path)
model = SimpleCNN(cfg).eval()
model.load_state_dict(sd)

ppreds = []
for _ in range(10):
    with torch.no_grad():
        _preds = []
        for batch in tqdm(pred_dl):
            batch = batch_to_device(batch, device)
            with torch.cuda.amp.autocast():
                out = model(batch['wave'])
                _preds += [out.cpu().numpy()]
        ppreds.append(np.vstack(_preds))

In [114]:
wave_transforms = Compose(
                [OneOf(
                    [MaskFrequency(p=1) ,
                     MaskTime(p=1) ,
                     ],
                    p=1),
                ]
            )

In [12]:
pred_ds = AudioDataset(val_df.iloc[[10, 100, 2, 5, 25]], mode='test', cfg=cfg)
pred_dl = DataLoader(pred_ds, shuffle=False, batch_size=32, num_workers=4)
# pred_df = inference_k_random(SimpleCNN(cfg), state_dict_path, pred_dl, val_df, k=3)

In [13]:
def collate_fn(batch):
    return {
      'wave': torch.stack([x['wave'] for x in batch]),
      'labels': torch.stack([x['labels'] for x in batch])
    }

In [14]:
b1 = collate_fn([pred_ds.__getitem__(i) for i in range(len(pred_ds))])

In [23]:
imgbatch = model.wav2img(b1['wave'][:, None, :])
imgbatch = min_max_norm(imgbatch, min_val=cfg.min, max_val=cfg.max) 

In [121]:
a = wave_transforms(imgbatch, 0)

In [39]:
masking = torchaudio.transforms.TimeMasking(time_mask_param=40, iid_masks=True)
fmasking = torchaudio.transforms.FrequencyMasking(freq_mask_param=40, iid_masks=True)

In [40]:
masked = masking(imgbatch)

In [41]:
fmasked = fmasking(imgbatch)

In [80]:
mlabels.sum(axis=1)

In [107]:
b1['labels'][0, 0] = 1
b1['labels'][1, 1] = 1
b1['labels'][2, 2] = 1
b1['labels'][3, 3] = 1
b1['labels'][4, 4] = 1

In [90]:
mixup = Mixup(1)

In [108]:
mbatch, mlabels = mixup(imgbatch, b1['labels'])

In [92]:
perm = torch.tensor([4, 0, 2, 3, 1])
coeffs = torch.tensor([0.3527, 0.7591, 0.6406, 0.1887, 0.2248])

In [100]:
 (1 - coeffs.view(-1, 1))

In [93]:
 Y = coeffs.view(-1, 1) * b1['labels'] + (1 - coeffs.view(-1, 1)) * b1['labels'][perm]

In [67]:
import librosa
fig, ax = plt.subplots()
img = librosa.display.specshow(imgbatch[0, 0].numpy(), ax=ax)
fig.colorbar(img, ax=ax)

In [122]:
fig, ax = plt.subplots()
img = librosa.display.specshow(a[3, 0].numpy(), ax=ax)
fig.colorbar(img, ax=ax)

In [42]:
fig, ax = plt.subplots()
img = librosa.display.specshow(masked[0, 0].numpy(), ax=ax)
fig.colorbar(img, ax=ax)

In [43]:
fig, ax = plt.subplots()
img = librosa.display.specshow(masked[2, 0].numpy(), ax=ax)
fig.colorbar(img, ax=ax)

In [47]:
pred_ds = AudioDataset(val_df.iloc[[10]], mode='test', cfg=cfg)
pred_ds1 = AudioDataset(val_df.iloc[[10, *np.random.randint(0, 100, size=32)]], mode='test', cfg=cfg)
pred_ds2 = AudioDataset(val_df.iloc[[10, *np.random.randint(0, 100, size=10)]], mode='test', cfg=cfg)

b1 = collate_fn([pred_ds.__getitem__(i) for i in range(len(pred_ds))])
b2 = collate_fn([pred_ds1.__getitem__(i) for i in range(len(pred_ds1))])
b3 = collate_fn([pred_ds2.__getitem__(i) for i in range(len(pred_ds2))])

print('batch x time')
imgbatch = model.wav2img(b1['wave'])
imgbatch2 = model.wav2img(b2['wave'])
imgbatch3 = model.wav2img(b3['wave'])
print(torch.sum(torch.abs(imgbatch[0]-imgbatch3[0])))
print(torch.sum(torch.abs(imgbatch[0]-imgbatch2[0])))
print(torch.sum(torch.abs(imgbatch2[0]-imgbatch3[0])))

print('   ')
print('batch x channels x time')
print('Sum(abs(batch1-batch2))')

imgbatch = model.wav2img(b1['wave'][:, None, :])
imgbatch2 = model.wav2img(b2['wave'][:, None, :])
imgbatch3 = model.wav2img(b3['wave'][:, None, :])
print(torch.sum(torch.abs(imgbatch[0]-imgbatch3[0])))
print(torch.sum(torch.abs(imgbatch[0]-imgbatch2[0])))
print(torch.sum(torch.abs(imgbatch2[0]-imgbatch3[0])))

In [39]:
total_max = 0
total_min = 1e3
for batch in pred_dl:
    spec = model.wav2img(batch['wave'])
    if spec.max() > total_max:
        total_max = spec.max()
    if spec.min() < total_max:
        total_min = spec.min()

In [50]:
# create recorders
global_stats  = StatsRecorder()

# step through the training dataset
with torch.no_grad():
    for batch in pred_dl:
        spec = model.wav2img(batch['wave'])[:, None, ...]
        # update normalization statistics
        global_stats.update(spec)
    
# parse out both sets of stats
global_mean,global_std = global_stats.mean,global_stats.std

In [19]:
import librosa
import librosa.display

In [84]:
import torchaudio
torchaudio.transforms.MelSpectrogram

In [96]:
fig, ax = plt.subplots()
img = librosa.display.specshow(imgbatch[0, 0].numpy(), ax=ax)
fig.colorbar(img, ax=ax, format="%+2.f dB")

In [95]:
librosa.display.specshow(imgbatch2[0, 0].numpy())
plt.colorbar()

In [94]:
librosa.display.specshow(imgbatch22[0, 0].numpy())
plt.colorbar()

In [99]:
librosa.display.specshow(imgbatch22[0, 0].numpy() - imgbatch2[0, 0].numpy())
plt.colorbar()

In [98]:
imgbatch[0, 0].numpy() - imgbatch2[0, 0].numpy()

In [51]:
test =(spec - global_mean) / global_std

In [92]:
imgbatch = model.wav2img(batch['wave'][:, None, :])
imgbatch2 = model.wav2img(batchw['wave'][:, None, :])
imgbatch22 = model.wav2img(batchw2['wave'][:, None, :])

In [15]:
minn = imgbatch2[0].min() 
maxx = imgbatch2[0].max() 

normed = (imgbatch2[0] - minn) / (maxx - minn)

In [19]:
normed * 255.9

In [63]:
((imgbatch2[0]) / 80).min()

In [81]:
test = imgbatch - imgbatch2[0]

In [82]:
test

In [18]:
model = SimpleCNN(cfg).eval()

In [67]:
imgbbatch = model.wav2img(bbatch['wave'])

In [41]:
imgbbatch2 = model.wav2img(bbatch['wave'])
imgbatch2 = model.wav2img(batch['wave'])

In [73]:
test = (imgbbatch[0]+80) / 80

In [76]:
test.min()

In [63]:
imgbbatch[0]

In [62]:
imgbatch[0]

In [27]:
class StatsRecorder:
    def __init__(self, red_dims=(0,2,3)):
        """Accumulates normalization statistics across mini-batches.
        ref: http://notmatthancock.github.io/2017/03/23/simple-batch-stat-updates.html
        """
        self.red_dims = red_dims # which mini-batch dimensions to average over
        self.nobservations = 0   # running number of observations

    def update(self, data):
        """
        data: ndarray, shape (nobservations, ndimensions)
        """
        # initialize stats and dimensions on first batch
        if self.nobservations == 0:
            self.mean = data.mean(dim=self.red_dims, keepdim=True)
            self.std  = data.std (dim=self.red_dims,keepdim=True)
            self.nobservations = data.shape[0]
            self.ndimensions   = data.shape[1]
        else:
            if data.shape[1] != self.ndimensions:
                raise ValueError('Data dims do not match previous observations.')
            
            # find mean of new mini batch
            newmean = data.mean(dim=self.red_dims, keepdim=True)
            newstd  = data.std(dim=self.red_dims, keepdim=True)
            
            # update number of observations
            m = self.nobservations * 1.0
            n = data.shape[0]

            # update running statistics
            tmp = self.mean
            self.mean = m/(m+n)*tmp + n/(m+n)*newmean
            self.std  = m/(m+n)*self.std**2 + n/(m+n)*newstd**2 +\
                        m*n/(m+n)**2 * (tmp - newmean)**2
            self.std  = torch.sqrt(self.std)
                                 
            # update total number of seen samples
            self.nobservations += n

In [41]:
bbatch

In [30]:
batchwave = batch['wave'][0]

In [77]:
import sklearn.metrics 
report = (metrics.classification_report(pred_df['label'].values , pred_df['predicted_class_id'].values , digits=3,  output_dict=True))
evaluation = pd.DataFrame(report).transpose()
evaluation

In [12]:
def inference_k_random(net, state_dict_path, test_dl, test_metadata_df, k=1):
    torch.cuda.empty_cache()
    device = "cuda" if torch.cuda.is_available() else 'cpu'
    sd = get_state_dict(state_dict_path)
    model = net.eval().to(device)
    model.load_state_dict(sd)

    preds = []
    for _ in range(k):
        with torch.no_grad():
            _preds = []
            for batch in tqdm(test_dl):
                batch = batch_to_device(batch, device)
                with torch.cuda.amp.autocast():
                    out = model(batch['wave'])
                    _preds += [out.cpu().numpy()]
            preds.append(np.vstack(_preds))
            
    # preds = np.mean(preds, axis=0)
    # test_metadata_df['predicted_class_id']  = preds.argmax(axis=-1)
    # torch.cuda.empty_cache()
    return preds


def inference_all(net, state_dict_path, test_dl, test_metadata_df, loss_fn, label_path, k=1):
    with open(label_path, 'r') as infile:
        data = json.load(infile)

    torch.cuda.empty_cache()
    device = "cuda" if torch.cuda.is_available() else 'cpu'
    sd = get_state_dict(state_dict_path)
    model = net.eval().to(device)
    model.load_state_dict(sd)

    preds = []
    for i in range(556):
        with torch.no_grad():
            _preds = []
            for batch in tqdm(test_dl):
                batch = batch_to_device(batch, device)
                with torch.cuda.amp.autocast():
                    out = model(batch['wave'])
                    _preds += [out.cpu().numpy()]
            preds.append(_preds)
            
    preds = np.mean(preds, axis=0)
    test_metadata_df['predicted_class_id']  = preds.argmax(axis=-1)
    torch.cuda.empty_cache()
    return test_metadata_df


In [None]:
# glob(f'{exp_path}/checkpoints/*')
exp_path = f'effnet_baseline/{cfg.exp_name}/'
label_path = f'{full_path}/data/labels.json'
state_dict_path = f"{exp_path}/checkpoints/last.ckpt"
#
k_predictions = 25

val_df = pd.read_csv(f"{full_path}/data/metadata.csv")
val_df = val_df[val_df['subset']=='validation']
val_df['path'] = val_df['path'].apply(lambda x: f'{full_path}/{x}')

test_df = pd.read_csv(f"{full_path}/data/test/metadata.csv")
test_df['path'] = test_df['file_name'].apply(lambda x: f'{full_path}/data/test/{x}')

for data in [(val_df, 'val'), (test_df, 'test')]:
    df, dset = data
    print(f'Predict {dset}')
    pred_ds = AudioDataset(df, mode='test', cfg=cfg)
    pred_dl = DataLoader(pred_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=cfg.num_workers)
    pred_df = inference_k_random(SimpleCNN(cfg), state_dict_path, pred_dl, df, 
                                 loss_fn, label_path, k=k_predictions)
    pred_df.to_csv(f'{exp_path}/{dset}_predictions.csv', index=False) 
    if dset == 'test':
        pred_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path}/submission_predictions.csv', index=False)
    elif dset == 'val':
        error_analysis(exp_path)