In [None]:
%load_ext autoreload
%autoreload 2

In [8]:
!ls

audio_training.ipynb	     evaluation.ipynb  models	    saved
audio_training_output.ipynb  logs	       __pycache__  utils.py


## Extract data from zip file




In [9]:
# import package to unzip .7zip file
!sudo apt-get install -y p7zip-full

Reading package lists... Done
Building dependency tree       
Reading state information... Done
p7zip-full is already the newest version (16.02+dfsg-3+deb9u1).
0 upgraded, 0 newly installed, 0 to remove and 0 not upgraded.


In [4]:
# locally unzip the audio files in train.7z
# !cd ../Data/ && p7zip -d train.7z

In [5]:
# !ls ../Data/train/audio

## Imports

In [10]:
%matplotlib inline

import IPython.display
import numpy as np
import matplotlib.pyplot as plt
import copy
from scipy.io import wavfile
from scipy.signal import butter, lfilter
import scipy.ndimage
from torch.utils.data import DataLoader

from utils import log_textfile

## Log file

In [7]:
MODELNAME = 'normalTrain1'
logfile = './logs/' + MODELNAME

## Data Loader

In [11]:
DATADIR = '../Data/' # unzipped train and test data
OUTDIR = './model-k' # just a random name
# Data Loading
import os
import re
from glob import glob

LABELS = 'yes no up down left right on off stop go silence unknown'.split()
id2name = {i: name for i, name in enumerate(LABELS)}
name2id = {name: i for i, name in id2name.items()}

def load_data(data_dir):
    """ Return 2 lists of tuples:
    [(class_id, user_id, path), ...] for train
    [(class_id, user_id, path), ...] for validation
    """
    # Just a simple regexp for paths with three groups:
    # prefix, label, user_id
    pattern = re.compile("(.+\/)?(\w+)\/([^_]+)_.+wav")
    all_files = glob(os.path.join(data_dir, 'train/audio/*/*wav'))

    with open(os.path.join(data_dir, 'train/validation_list.txt'), 'r') as fin:
        validation_files = fin.readlines()
    valset = set()
    for entry in validation_files:
        r = re.match(pattern, entry)
        if r:
            valset.add(r.group(3))

    possible = set(LABELS)
    train, val = [], []
    for entry in all_files:
        bl_true = True
        r = re.match(pattern, entry)
        if r:
            label, uid = r.group(2), r.group(3)
            if label == '_background_noise_':
                label = 'silence'
                bl_true = False
            if label not in possible:
                label = 'unknown'
                bl_true = False
            
            if bl_true:
                label_id = name2id[label]

                sample = (label_id, uid, entry)
                if uid in valset:
                    val.append(sample)
                else:
                    train.append(sample)

    print('There are {} train and {} val samples'.format(len(train), len(val)))
    return train, val

trainset, valset = load_data(DATADIR)

There are 21105 train and 2577 val samples


## Make Spectrogram

In [12]:
### Parameters ###
fft_size = 2048 # window size for the FFT
step_size = fft_size/16 # distance to slide along the window (in time)
spec_thresh = 4 # threshold for spectrograms (lower filters out more noise)
lowcut = 500 # Hz # Low cut for our butter bandpass filter
highcut = 15000 # Hz # High cut for our butter bandpass filter
# For mels
n_mel_freq_components = 64 # number of mel frequency channels
shorten_factor = 10 # how much should we compress the x-axis (time)
start_freq = 300 # Hz # What frequency to start sampling our melS from 
end_freq = 8000 # Hz # What frequency to stop sampling our melS from

In [13]:
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a

def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y

def overlap(X, window_size, window_step):
    """
    Create an overlapped version of X
    Parameters
    ----------
    X : ndarray, shape=(n_samples,)
        Input signal to window and overlap
    window_size : int
        Size of windows to take
    window_step : int
        Step size between windows
    Returns
    -------
    X_strided : shape=(n_windows, window_size)
        2D array of overlapped X
    """
    window_size, window_step = map(int, (window_size, window_step))
    if window_size % 2 != 0:
        raise ValueError("Window size must be even!")
    # Make sure there are an even number of windows before stridetricks
    append = np.zeros((window_size - len(X) % window_size))
    X = np.hstack((X, append))

    ws = window_size
    ss = window_step
    a = X

    valid = len(a) - ws
    nw = (valid) // ss
    out = np.ndarray((nw,ws),dtype = a.dtype)

    for i in range(nw):
        # "slide" the window along the samples
        start = i * ss
        stop = start + ws
        out[i] = a[start : stop]

    return out

def stft(X, fftsize=128, step=65, mean_normalize=True, real=False,
         compute_onesided=True):
    """
    Compute STFT for 1D real valued input X
    """
    X = np.copy(X)
    if real:
        local_fft = np.fft.rfft
        cut = -1
    else:
        local_fft = np.fft.fft
        cut = None
    if compute_onesided:
        cut = fftsize // 2
    if mean_normalize:
        X -= X.mean()

    X = overlap(X, fftsize, step)
    
    size = fftsize
    win = 0.54 - .46 * np.cos(2 * np.pi * np.arange(size) / (size - 1))
    X = X * win[None]
    X = local_fft(X)[:, :cut]
    return X

def pretty_spectrogram(d,log = True, thresh= 5, fft_size = 512, step_size = 64):
    """
    creates a spectrogram
    log: take the log of the spectrgram
    thresh: threshold minimum power for log spectrogram
    """
    specgram = np.abs(stft(d, fftsize=fft_size, step=step_size, real=False,
        compute_onesided=True))
  
    if log == True:
        specgram /= specgram.max() # volume normalize to max 1
        specgram = np.log10(specgram) # take log
        specgram[specgram < -thresh] = -thresh # set anything less than the threshold as the threshold
    else:
        specgram[specgram < thresh] = thresh # set anything less than the threshold as the threshold
    
    return specgram

# Also mostly modified or taken from https://gist.github.com/kastnerkyle/179d6e9a88202ab0a2fe
def invert_pretty_spectrogram(X_s, log = True, fft_size = 512, step_size = 512/4, n_iter = 10):
    
    if log == True:
        X_s = np.power(10, X_s)

    X_s = np.concatenate([X_s, X_s[:, ::-1]], axis=1)
    X_t = iterate_invert_spectrogram(X_s, fft_size, step_size, n_iter=n_iter)
    return X_t

def iterate_invert_spectrogram(X_s, fftsize, step, n_iter=10, verbose=False):
    """
    Under MSR-LA License
    Based on MATLAB implementation from Spectrogram Inversion Toolbox
    References
    ----------
    D. Griffin and J. Lim. Signal estimation from modified
    short-time Fourier transform. IEEE Trans. Acoust. Speech
    Signal Process., 32(2):236-243, 1984.
    Malcolm Slaney, Daniel Naar and Richard F. Lyon. Auditory
    Model Inversion for Sound Separation. Proc. IEEE-ICASSP,
    Adelaide, 1994, II.77-80.
    Xinglei Zhu, G. Beauregard, L. Wyse. Real-Time Signal
    Estimation from Modified Short-Time Fourier Transform
    Magnitude Spectra. IEEE Transactions on Audio Speech and
    Language Processing, 08/2007.
    """
    reg = np.max(X_s) / 1E8
    X_best = copy.deepcopy(X_s)
    for i in range(n_iter):
        if verbose:
            print("Runnning iter %i" % i)
        if i == 0:
            X_t = invert_spectrogram(X_best, step, calculate_offset=True,
                                     set_zero_phase=True)
        else:
            # Calculate offset was False in the MATLAB version
            # but in mine it massively improves the result
            # Possible bug in my impl?
            X_t = invert_spectrogram(X_best, step, calculate_offset=True,
                                     set_zero_phase=False)
        est = stft(X_t, fftsize=fftsize, step=step, compute_onesided=False)
        phase = est / np.maximum(reg, np.abs(est))
        X_best = X_s * phase[:len(X_s)]
    X_t = invert_spectrogram(X_best, step, calculate_offset=True,
                             set_zero_phase=False)
    return np.real(X_t)

def invert_spectrogram(X_s, step, calculate_offset=True, set_zero_phase=True):
    """
    Under MSR-LA License
    Based on MATLAB implementation from Spectrogram Inversion Toolbox
    References
    ----------
    D. Griffin and J. Lim. Signal estimation from modified
    short-time Fourier transform. IEEE Trans. Acoust. Speech
    Signal Process., 32(2):236-243, 1984.
    Malcolm Slaney, Daniel Naar and Richard F. Lyon. Auditory
    Model Inversion for Sound Separation. Proc. IEEE-ICASSP,
    Adelaide, 1994, II.77-80.
    Xinglei Zhu, G. Beauregard, L. Wyse. Real-Time Signal
    Estimation from Modified Short-Time Fourier Transform
    Magnitude Spectra. IEEE Transactions on Audio Speech and
    Language Processing, 08/2007.
    """
    step = int(step)
    size = int(X_s.shape[1] // 2)
    wave = np.zeros((X_s.shape[0] * step + size))
    # Getting overflow warnings with 32 bit...
    wave = wave.astype('float64')
    total_windowing_sum = np.zeros((X_s.shape[0] * step + size))
    win = 0.54 - .46 * np.cos(2 * np.pi * np.arange(size) / (size - 1))

    est_start = int(size // 2) - 1
    est_end = est_start + size
    for i in range(X_s.shape[0]):
        wave_start = int(step * i)
        wave_end = wave_start + size
        if set_zero_phase:
            spectral_slice = X_s[i].real + 0j
        else:
            # already complex
            spectral_slice = X_s[i]

        # Don't need fftshift due to different impl.
        wave_est = np.real(np.fft.ifft(spectral_slice))[::-1]
        if calculate_offset and i > 0:
            offset_size = size - step
            if offset_size <= 0:
                print("WARNING: Large step size >50\% detected! "
                      "This code works best with high overlap - try "
                      "with 75% or greater")
                offset_size = step
            offset = xcorr_offset(wave[wave_start:wave_start + offset_size],
                                  wave_est[est_start:est_start + offset_size])
        else:
            offset = 0
        wave[wave_start:wave_end] += win * wave_est[
            est_start - offset:est_end - offset]
        total_windowing_sum[wave_start:wave_end] += win
    wave = np.real(wave) / (total_windowing_sum + 1E-6)
    return wave

def xcorr_offset(x1, x2):
    """
    Under MSR-LA License
    Based on MATLAB implementation from Spectrogram Inversion Toolbox
    References
    ----------
    D. Griffin and J. Lim. Signal estimation from modified
    short-time Fourier transform. IEEE Trans. Acoust. Speech
    Signal Process., 32(2):236-243, 1984.
    Malcolm Slaney, Daniel Naar and Richard F. Lyon. Auditory
    Model Inversion for Sound Separation. Proc. IEEE-ICASSP,
    Adelaide, 1994, II.77-80.
    Xinglei Zhu, G. Beauregard, L. Wyse. Real-Time Signal
    Estimation from Modified Short-Time Fourier Transform
    Magnitude Spectra. IEEE Transactions on Audio Speech and
    Language Processing, 08/2007.
    """
    x1 = x1 - x1.mean()
    x2 = x2 - x2.mean()
    frame_size = len(x2)
    half = frame_size // 2
    corrs = np.convolve(x1.astype('float32'), x2[::-1].astype('float32'))
    corrs[:half] = -1E30
    corrs[-half:] = -1E30
    offset = corrs.argmax() - len(x1)
    return offset

## Define Custom Dataset

In [14]:
# from torchvision.transforms import Compose
import torchvision.transforms as transforms

class CustomDatasetSimple_new():
    """Simple dataset class for dataloader"""
    def __init__(self, X, y, mean, std):
        """Initialize the CustomDataset"""

        self.mean = mean
        self.std = std
        self.X = X
        self.y = y

    def __len__(self):
        """Return the total length of the dataset"""
        dataset_size = len(self.X)
        return dataset_size

    def __getitem__(self, idx):
        """Return the batch given the indices"""
        # print('debug1')
        rate, data = wavfile.read(self.X[idx])
        spec = pretty_spectrogram(data.astype('float64'), fft_size = fft_size, step_size = step_size, log = True, thresh = spec_thresh)
        height = spec.shape[0]
        if height!=112:
            spec = pad_dimesions(spec)
        # print('debug2')
        X = np.copy(spec)
        X = torch.tensor(X, dtype=torch.float)
        X.unsqueeze_(-1)
        X = X.transpose(2, 0)
        X = X.transpose(2, 1)
        # print('debug3')
        X_batch = (X-self.mean)/self.std
        y_batch = self.y[idx]
        return X_batch, y_batch

## Define Attack Functions

In [15]:
def fgsm(model, X, y, epsilon=0.3):
    """ Construct FGSM adversarial examples on the examples X"""
    delta = torch.zeros_like(X, requires_grad=True)
    delta = delta.cuda()
    loss = nn.CrossEntropyLoss()(model(X + delta), y)
    loss.backward()
    return epsilon * delta.grad.detach().sign()


def pgd_linf(model, X, y, epsilon=0.1, alpha=0.01, num_iter=20, randomize=False):
    """ Construct PGD adversarial examples on the examples X"""
    if randomize:
        delta = torch.rand_like(X, requires_grad=True)
        delta.data = delta.data * 2 * epsilon - epsilon
    else:
        delta = torch.zeros_like(X, requires_grad=True)
        
    for t in range(num_iter):
        loss = nn.CrossEntropyLoss()(model(X + delta), y)
        loss.backward()
        delta.data = (delta + alpha*delta.grad.detach().sign()).clamp(-epsilon,epsilon)
        delta.grad.zero_()
    return delta.detach()

# Train the model on a subset of data

In [16]:
from random import sample
from random import shuffle

# training set subsample
trainset_sub = sample(trainset, 2000)
len(trainset_sub)

# validation subample
shuffle(valset)
valset_sub = valset[:1000]

# subsample the training set from the validation for now!
testset_sub = valset[1000:2000]
print('length of trainset: ' + str(len(trainset_sub)) + ', length of validation set: ' +  str(len(valset_sub)), 
      ', length of test set: ' + str(len(testset_sub)))

length of trainset: 2000, length of validation set: 1000 , length of test set: 1000


In [30]:
def pad_dimesions(spec):
  '''
    Data comes in several dimensions. Pad with zeros to get dimensions (112,1)
  '''
  x_offset = 1  
  y_offset = 0
  result = np.zeros([112, 1024])
  result[x_offset:spec.shape[0] + x_offset, y_offset:spec.shape[1] + y_offset] = spec
  return result

Extract the audio files, read the wave files and separate the labels

In [17]:
trainset_dl = trainset
valset_dl = valset
trainset_dl = trainset
valset_dl = valset
#test_dl = testset_sub

# train
train_filepaths = [i[2] for i in trainset_dl]
train_labels = [i[0] for i in trainset_dl]
valid_filepaths = [i[2] for i in valset_dl]
val_labels = [i[0] for i in valset_dl]
#test_filepaths = [i[2] for i in test_dl]
#test_labels = [i[0] for i in valset_dl]

In [257]:
if False:
    trainset_dl = trainset
    valset_dl = valset
    test_dl = testset_sub

    # train
    train_filepaths = [i[2] for i in trainset_dl]
    train_audio_files = [wavfile.read(i)[1] for i in train_filepaths]
    train_labels = [i[0] for i in trainset_dl]

    train_spec = []
    for i in range(len(train_audio_files)):
      spec = pretty_spectrogram(train_audio_files[i].astype('float64'), fft_size = fft_size, step_size = step_size, log = True, thresh = spec_thresh)
      height = spec.shape[0]
      if height!=112:
        spec = pad_dimesions(spec)
      train_spec.append(spec)

    # validation
    valid_filepaths = [i[2] for i in valset_dl]
    valid_audio_files = [wavfile.read(i)[1] for i in valid_filepaths]
    val_labels = [i[0] for i in valset_dl]

    valid_spec = []
    for i in range(len(valid_audio_files)):
      spec = pretty_spectrogram(valid_audio_files[i].astype('float64'), fft_size = fft_size, step_size = step_size, log = True, thresh = spec_thresh)
      if height!=112:
        spec = pad_dimesions(spec)
      valid_spec.append(spec)

    # test
    test_filepaths = [i[2] for i in test_dl]
    test_audio_files = [wavfile.read(i)[1] for i in test_filepaths]
    test_labels = [i[0] for i in valset_dl]

    test_spec = []
    for i in range(len(test_audio_files)):
      spec = pretty_spectrogram(test_audio_files[i].astype('float64'), fft_size = fft_size, step_size = step_size, log = True, thresh = spec_thresh)
      if height!=112:
        spec = pad_dimesions(spec)
      test_spec.append(spec)

In [18]:
def get_mean_std(loader):
    output_mean = 0.
    output_std = 0.
    n = 0
    for X,y in loader:
        output_mean += np.mean(X.detach().cpu().numpy())
        output_std += np.std(X.detach().cpu().numpy())
        n += 1
        if n % 10 == 0:
            print(n)
    return output_mean/n, output_std/n

In [19]:
import torch
mean=-3.1259581955996425
std=0.8961027914827521
batch_size=32
num_workers=8
data_train_sub = CustomDatasetSimple_new(train_filepaths, train_labels, mean, std)
data_valid_sub = CustomDatasetSimple_new(valid_filepaths, val_labels, mean, std)

train_data_loader = torch.utils.data.DataLoader(
    dataset=data_train_sub, batch_size=batch_size, shuffle=True,
    num_workers=num_workers)

valid_data_loader = torch.utils.data.DataLoader(
    dataset=data_valid_sub, batch_size=batch_size, shuffle=True,
    num_workers=num_workers)

#del(train_spec)
#del(train_spec_np)
#del(valid_spec)
#del(test_spec)

In [20]:
#get_mean_std(train_data_loader)

In [21]:
"""
BaseTrainer provides base functionality for any trainer object.
It provides functionality to:
    - train for one epoch
    - validation 
    - testing
"""
import torch
from torchvision import models
import math
import matplotlib.pyplot as plt
import copy
import os
import datetime
import torch.nn as nn
import time
from torch import optim

class BaseTrainer:
    """Base Class for trainer/train.py."""

    def __init__(self, model, train_dl, valid_dl, test_dl, criterion, n_epochs, model_filename):
        """Initialize the BaseTrainer object."""
        self.model = model
        self.train_dl = train_dl
        self.valid_dl = valid_dl
        self.test_dl = test_dl
        self.criterion = criterion
        self.opt_lrs = []
        self.device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
        # self.attack = attack
        self.model_filename = model_filename

    def run_epoch(self, epoch, loader, optimizer=None, attack=None):
      running_loss = 0.
      running_corrects = 0.
      running_model = 0.
      n = 0.
      counter_batch = 0
      for X, y_true in loader:
        # print('debug4')
        start = time.time()
        X = X.cuda()
        y_true = y_true.cuda()
        if optimizer!=None:
          optimizer.zero_grad()
        if attack!=None:
          delta = attack(model, X, y_true)
          delta = delta.cuda()
          y_pred = self.model(X + delta)
        else:
          # print('debug5')
          y_pred = self.model(X)
        loss = self.criterion(y_pred, y_true) # input criterion is negative
        if optimizer!=None:
          loss.backward()
          optimizer.step()
        indices = torch.argmax(y_pred, dim=1)
        running_loss += float(loss)
        running_corrects += np.sum(indices.detach().cpu().numpy() == y_true.detach().cpu().numpy())
        n += y_true.detach().cpu().numpy().shape[0]
        counter_batch += 1
        end = time.time()
        delta_model = end - start
        running_model += delta_model
        if (counter_batch%20)==0:
            print(counter_batch)
            log_textfile(LOGFILE_PATH, '[epoch: %d, batch:  %5d] loss: %.5f time model: %.5f acc: %.5f' % (epoch + 1, counter_batch + 1, running_loss/n, running_model / 20.0, running_corrects / 20))
        # if optimizer==None:
        #   print('loss: ' + str(running_loss/n) + ' accuracy: ' + str(running_corrects/n))
      return(running_loss/n, running_corrects/n)
    
    def fit_model_new(self, optimizer, n_epochs, model_filename, attack):
        """
        Perform one training epoch.

        Parameters:
            optimizer - optimizer to use while training
            scheduler - scheduler to use while training
        Returns: training loss after epoch
        """
        self.model.train()

        final_loss = None
        lowest_valid_loss = 99999999

        for epoch in range(n_epochs):
          self.model.train()
          train_loss, train_acc = self.run_epoch(epoch, self.train_dl, optimizer, attack=attack)
          self.model.eval()
          valid_loss, valid_acc = self.run_epoch(epoch, self.valid_dl, None, attack=attack)
          if valid_loss < lowest_valid_loss:
            lowest_valid_loss = valid_loss
            torch.save(self.model, 'saved/' + model_filename)
          log_textfile(LOGFILE_PATH, 'epoch:' + str(epoch + 1) + ' train loss: ' + str(train_loss) + ' train acc: ' + str(train_acc) + ' valid loss: ' + str(valid_loss) + ' valid acc: ' + str(valid_acc))
          print('epoch:' + str(epoch + 1) + ' train loss: ' + str(train_loss) + ' train acc: ' + str(train_acc) + ' valid loss: ' + str(valid_loss) + ' valid acc: ' + str(valid_acc))

            
        

In [265]:
MODELNAME = 'full_dataset_normal'
LOGFILE_PATH = 'logs/' + MODELNAME

from models.resnet import ResNet, resnet34

model = resnet34(pretrained=False, progress=False).cuda()

criterion = torch.nn.CrossEntropyLoss()

for param in model.parameters():
  param.requires_grad = True

In [266]:
trainer = BaseTrainer(model=model, train_dl=train_data_loader, valid_dl=valid_data_loader, test_dl=valid_data_loader, criterion=criterion, model_filename=MODELNAME, n_epochs=1)

In [270]:
MODELNAME = 'full_dataset_normal'
LOGFILE_PATH = 'logs/' + MODELNAME
global_eps = 0.3

def fgsm(model, X, y, epsilon=0.3):
    """ Construct FGSM adversarial examples on the examples X"""
    epsilon = global_eps
    delta = torch.zeros_like(X, requires_grad=True)
    delta = delta.cuda()
    loss = nn.CrossEntropyLoss()(model(X + delta), y)
    loss.backward()
    return epsilon * delta.grad.detach().sign()


def pgd_linf(model, X, y, epsilon=0.1, alpha=0.01, num_iter=20, randomize=False):
    """ Construct PGD adversarial examples on the examples X"""
    epsilon = global_eps
    if randomize:
        delta = torch.rand_like(X, requires_grad=True)
        delta.data = delta.data * 2 * epsilon - epsilon
    else:
        delta = torch.zeros_like(X, requires_grad=True)
        
    for t in range(num_iter):
        loss = nn.CrossEntropyLoss()(model(X + delta), y)
        loss.backward()
        delta.data = (delta + alpha*delta.grad.detach().sign()).clamp(-epsilon,epsilon)
        delta.grad.zero_()
    return delta.detach()

from models.resnet import ResNet, resnet34
model = resnet34(pretrained=False, progress=False).cuda()
criterion = torch.nn.CrossEntropyLoss()

for param in model.parameters():
  param.requires_grad = True

trainer = BaseTrainer(model=model, train_dl=train_data_loader, valid_dl=valid_data_loader, test_dl=valid_data_loader, criterion=criterion, model_filename=MODELNAME, n_epochs=1)
trainer.fit_model_new(optimizer=optim.Adam(model.parameters(), lr=.001), n_epochs=10, model_filename=MODELNAME, attack=None)

20
[epoch: 1, batch:     21] loss: 0.07373 time model: 0.66903 acc: 6.35000


KeyboardInterrupt: 

In [None]:
MODELNAME = 'full_dataset_fgsm03'
LOGFILE_PATH = 'logs/' + MODELNAME
global_eps = 0.3

def fgsm(model, X, y, epsilon=0.3):
    """ Construct FGSM adversarial examples on the examples X"""
    epsilon = global_eps
    delta = torch.zeros_like(X, requires_grad=True)
    delta = delta.cuda()
    loss = nn.CrossEntropyLoss()(model(X + delta), y)
    loss.backward()
    return epsilon * delta.grad.detach().sign()


def pgd_linf(model, X, y, epsilon=0.1, alpha=0.01, num_iter=20, randomize=False):
    """ Construct PGD adversarial examples on the examples X"""
    epsilon = global_eps
    if randomize:
        delta = torch.rand_like(X, requires_grad=True)
        delta.data = delta.data * 2 * epsilon - epsilon
    else:
        delta = torch.zeros_like(X, requires_grad=True)
        
    for t in range(num_iter):
        loss = nn.CrossEntropyLoss()(model(X + delta), y)
        loss.backward()
        delta.data = (delta + alpha*delta.grad.detach().sign()).clamp(-epsilon,epsilon)
        delta.grad.zero_()
    return delta.detach()

from models.resnet import ResNet, resnet34
model = resnet34(pretrained=False, progress=False).cuda()
criterion = torch.nn.CrossEntropyLoss()

for param in model.parameters():
  param.requires_grad = True

trainer = BaseTrainer(model=model, train_dl=train_data_loader, valid_dl=valid_data_loader, test_dl=valid_data_loader, criterion=criterion, model_filename=MODELNAME, n_epochs=1)
trainer.fit_model_new(optimizer=optim.Adam(model.parameters(), lr=.001), n_epochs=10, model_filename=MODELNAME, attack=fgsm)

In [None]:
MODELNAME = 'full_dataset_fgsm20'
LOGFILE_PATH = 'logs/' + MODELNAME
global_eps = 2.

def fgsm(model, X, y, epsilon=0.3):
    """ Construct FGSM adversarial examples on the examples X"""
    epsilon = global_eps
    delta = torch.zeros_like(X, requires_grad=True)
    delta = delta.cuda()
    loss = nn.CrossEntropyLoss()(model(X + delta), y)
    loss.backward()
    return epsilon * delta.grad.detach().sign()


def pgd_linf(model, X, y, epsilon=0.1, alpha=0.01, num_iter=20, randomize=False):
    """ Construct PGD adversarial examples on the examples X"""
    epsilon = global_eps
    if randomize:
        delta = torch.rand_like(X, requires_grad=True)
        delta.data = delta.data * 2 * epsilon - epsilon
    else:
        delta = torch.zeros_like(X, requires_grad=True)
        
    for t in range(num_iter):
        loss = nn.CrossEntropyLoss()(model(X + delta), y)
        loss.backward()
        delta.data = (delta + alpha*delta.grad.detach().sign()).clamp(-epsilon,epsilon)
        delta.grad.zero_()
    return delta.detach()

from models.resnet import ResNet, resnet34
model = resnet34(pretrained=False, progress=False).cuda()
criterion = torch.nn.CrossEntropyLoss()

for param in model.parameters():
  param.requires_grad = True

trainer = BaseTrainer(model=model, train_dl=train_data_loader, valid_dl=valid_data_loader, test_dl=valid_data_loader, criterion=criterion, model_filename=MODELNAME, n_epochs=1)
trainer.fit_model_new(optimizer=optim.Adam(model.parameters(), lr=.001), n_epochs=10, model_filename=MODELNAME, attack=fgsm)

In [None]:
MODELNAME = 'full_dataset_pdg03'
LOGFILE_PATH = 'logs/' + MODELNAME
global_eps = 0.3

def fgsm(model, X, y, epsilon=0.3):
    """ Construct FGSM adversarial examples on the examples X"""
    epsilon = global_eps
    delta = torch.zeros_like(X, requires_grad=True)
    delta = delta.cuda()
    loss = nn.CrossEntropyLoss()(model(X + delta), y)
    loss.backward()
    return epsilon * delta.grad.detach().sign()


def pgd_linf(model, X, y, epsilon=0.1, alpha=0.01, num_iter=7, randomize=False):
    """ Construct PGD adversarial examples on the examples X"""
    epsilon = global_eps
    if randomize:
        delta = torch.rand_like(X, requires_grad=True)
        delta.data = delta.data * 2 * epsilon - epsilon
    else:
        delta = torch.zeros_like(X, requires_grad=True)
        
    for t in range(num_iter):
        loss = nn.CrossEntropyLoss()(model(X + delta), y)
        loss.backward()
        delta.data = (delta + alpha*delta.grad.detach().sign()).clamp(-epsilon,epsilon)
        delta.grad.zero_()
    return delta.detach()

from models.resnet import ResNet, resnet34
model = resnet34(pretrained=False, progress=False).cuda()
criterion = torch.nn.CrossEntropyLoss()

for param in model.parameters():
  param.requires_grad = True

trainer = BaseTrainer(model=model, train_dl=train_data_loader, valid_dl=valid_data_loader, test_dl=valid_data_loader, criterion=criterion, model_filename=MODELNAME, n_epochs=1)
trainer.fit_model_new(optimizer=optim.Adam(model.parameters(), lr=.001), n_epochs=10, model_filename=MODELNAME, attack=pgd_linf)



In [None]:
MODELNAME = 'full_dataset_pdg20'
LOGFILE_PATH = 'logs/' + MODELNAME
global_eps = 2.0

def fgsm(model, X, y, epsilon=0.3):
    """ Construct FGSM adversarial examples on the examples X"""
    epsilon = global_eps
    delta = torch.zeros_like(X, requires_grad=True)
    delta = delta.cuda()
    loss = nn.CrossEntropyLoss()(model(X + delta), y)
    loss.backward()
    return epsilon * delta.grad.detach().sign()


def pgd_linf(model, X, y, epsilon=0.1, alpha=0.01, num_iter=7, randomize=False):
    """ Construct PGD adversarial examples on the examples X"""
    epsilon = global_eps
    if randomize:
        delta = torch.rand_like(X, requires_grad=True)
        delta.data = delta.data * 2 * epsilon - epsilon
    else:
        delta = torch.zeros_like(X, requires_grad=True)
        
    for t in range(num_iter):
        loss = nn.CrossEntropyLoss()(model(X + delta), y)
        loss.backward()
        delta.data = (delta + alpha*delta.grad.detach().sign()).clamp(-epsilon,epsilon)
        delta.grad.zero_()
    return delta.detach()

from models.resnet import ResNet, resnet34
model = resnet34(pretrained=False, progress=False).cuda()
criterion = torch.nn.CrossEntropyLoss()

for param in model.parameters():
  param.requires_grad = True

trainer = BaseTrainer(model=model, train_dl=train_data_loader, valid_dl=valid_data_loader, test_dl=valid_data_loader, criterion=criterion, model_filename=MODELNAME, n_epochs=1)
trainer.fit_model_new(optimizer=optim.Adam(model.parameters(), lr=.001), n_epochs=10, model_filename=MODELNAME, attack=pgd_linf)



### Evaluation

In [2]:
import torch
def load_model(name):
    model = torch.load(name)
    return model

In [3]:
normalTrain1 = load_model('saved/normalTrain1')
full_dataset_fgsm03 = load_model('saved/full_dataset_fgsm03')
full_dataset_fgsm20 = load_model('saved/full_dataset_fgsm20')
full_dataset_pdg03 = load_model('saved/full_dataset_pdg03')
full_dataset_normal = load_model('saved/full_dataset_normal')

In [33]:
criterion = torch.nn.CrossEntropyLoss()

LOGFILE_PATH = 'logs/eval/normalTrain1'
trainer_normalTrain1 = BaseTrainer(model=normalTrain1, 
                                   train_dl=train_data_loader, 
                                   valid_dl=valid_data_loader, 
                                   test_dl=valid_data_loader, 
                                   criterion=criterion, 
                                   model_filename='eval_normalTrain1', 
                                   n_epochs=1)

In [34]:
trainer_normalTrain1 = trainer_normalTrain1.run_epoch(1, valid_data_loader, 
                                                      optimizer=None, 
                                                      attack=None)

RuntimeError: CUDA out of memory. Tried to allocate 56.00 MiB (GPU 0; 11.17 GiB total capacity; 9.95 GiB already allocated; 10.94 MiB free; 205.02 MiB cached)

In [None]:
error

In [None]:
np.squeeze(np.moveaxis(dd[0], 0, 2)).shape

In [262]:
trainset_sub[0][2]

'../Data/train/audio/on/6a700f9d_nohash_1.wav'

In [263]:
rate, data = wavfile.read(trainset_sub[0][2])

In [116]:
output1 = np.squeeze(np.moveaxis(dd[10], 0, 2))

In [191]:
np.clip(10*np.random.normal(size=output1.shape), a_max=10, a_min=-10).shape

(112, 1024)

In [240]:
dddd = np.random.normal(size=(112,1024))*1

In [253]:
ddd = np.clip(dddd, a_max=3, a_min=-3)*std+mean

In [210]:
np.sum(np.abs(ddd-dddd))

0.0

In [254]:
output = (output1*std+mean)+ddd

In [255]:
recovered_audio_orig = invert_pretty_spectrogram(output, fft_size = fft_size,
                                            step_size = step_size, log = True, n_iter = 10)
IPython.display.Audio(data=recovered_audio_orig, rate=rate) # play the audio

In [0]:
# normal training, fgsm attack
trainer.run_epoch(valid_data_loader, optimizer=None, attack=fgsm)