# Important Packages, framework versions, and functions

In [None]:
!nvidia-smi

In [None]:
# to use tensorflow of version 1. in colab (colab supports two versions 
# to help us in hparams which include constants needed in spectrogram and inv_spectrogram functions)
%tensorflow_version 1.x 

In [None]:
pip install hparams

In [None]:
import tensorflow as tf
from hparams import hparams

functions for loading and saving audios, spectrogram and inv_spectrogram functions, normalize and denormalize, griffin_lim for constructing phase, stft and istft 

In [None]:
def save_wav(wav, path):
    wav=np.int16(wav/np.max(np.abs(wav))*32768)
    if len(wav)>16000: 
      wav=wav[0:16000]
      wavfile.write(path, hparams.sample_rate, wav)
    elif len(wav)==16000: 
      wavfile.write(path, hparams.sample_rate, wav)
    #librosa.output.write_wav(path, wav, hparams.sample_rate)

def spectrogram(y):
    D = _stft(_preemphasis(y))
    S = _amp_to_db(np.abs(D)) - hparams.ref_level_db
    return _normalize(S)


def inv_spectrogram(spectrogram):
    S = _db_to_amp(_denormalize(spectrogram) + hparams.ref_level_db)  # Convert back to linear
    return _inv_preemphasis(_griffin_lim(S ** 1.5))  # Reconstruct phase


def melspectrogram(y):
    D = _stft(_preemphasis(y))
    S = _amp_to_db(_linear_to_mel(np.abs(D)))
    return _normalize(S)


def inv_melspectrogram(melspectrogram):
    S = _mel_to_linear(_db_to_amp(_denormalize(melspectrogram)))  # Convert back to linear
    return _inv_preemphasis(_griffin_lim(S ** 1.5))  # Reconstruct phase


# Based on https://github.com/librosa/librosa/issues/434
def _griffin_lim(S):
    angles = np.exp(2j * np.pi * np.random.rand(*S.shape))
    S_complex = np.abs(S).astype(np.complex)
    for i in range(hparams.griffin_lim_iters):
        if i > 0:
            angles = np.exp(1j * np.angle(_stft(y)))
        y = _istft(S_complex * angles)
    return y


def _stft(y):
    n_fft = (hparams.num_freq - 1) * 2
    hop_length = int(hparams.frame_shift_ms / 1000. * hparams.sample_rate)
    win_length = int(hparams.frame_length_ms / 1000. * hparams.sample_rate)
    return librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length)


def _istft(y):
    hop_length = int(hparams.frame_shift_ms / 1000. * hparams.sample_rate)
    win_length = int(hparams.frame_length_ms / 1000. * hparams.sample_rate)
    return librosa.istft(y, hop_length=hop_length, win_length=win_length)


# Conversions:
_mel_basis = None
_inv_mel_basis = None

def _linear_to_mel(spectrogram):
    global _mel_basis
    if _mel_basis is None:
        _mel_basis = _build_mel_basis()
    return np.dot(_mel_basis, spectrogram)

def _mel_to_linear(mel_spectrogram):
    global _inv_mel_basis
    if _inv_mel_basis is None:
        _inv_mel_basis = np.linalg.pinv(_build_mel_basis())
    return np.maximum(1e-10, np.dot(_inv_mel_basis, mel_spectrogram))

def _build_mel_basis():
    n_fft = (hparams.num_freq - 1) * 2
    return librosa.filters.mel(hparams.sample_rate, n_fft, n_mels=hparams.num_mels)

def _amp_to_db(x):
    return 20 * np.log10(np.maximum(1e-5, x))

def _db_to_amp(x):
    return np.power(10.0, x * 0.05)


def _preemphasis(x):
    return signal.lfilter([1, -hparams.preemphasis], [1], x)


def _inv_preemphasis(x):
    return signal.lfilter([1], [1, -hparams.preemphasis], x)

def _normalize(S):
    return np.clip((S - hparams.min_level_db) / -float(hparams.min_level_db), 0, 1)

def _denormalize(S):
  return (np.clip(S, 0, 1) * -hparams.min_level_db) + hparams.min_level_db

constants to make the conversion of spectogram and inv_spectrogram most audio information 

In [None]:
hparams = tf.contrib.training.HParams(
    num_mels=128,
    num_freq=1013,
    sample_rate=16000,
    frame_length_ms=16.0,
    frame_shift_ms=8.0,
    preemphasis=0.97,
    min_level_db=-80,
    ref_level_db=20,

    griffin_lim_iters=60
)

# Google Drive Mount

In [None]:
from google.colab import drive
drive.mount('/content/drive')

to get .npz file needed for normalizing and denormalizing spectrogram and inv_spectrogram 

In [None]:
%cd /content/drive/My\ Drive/NLP_Nwishy/GAN_10_classes/melspectrogram_features

In [None]:
import os
import librosa
import torch
import IPython.display as ipd
import matplotlib.pyplot as plt
from librosa.display import specshow
from scipy.io import wavfile
import numpy as np

In [None]:
DATAPATH = "training_data.npz"

CATEGORIES = np.load(DATAPATH)["category_names"]

# for denomalizing mel_spectrogram
mel_means = np.load(DATAPATH)["mean"]
mel_stds = np.load(DATAPATH)["std"]

In [None]:
import librosa.filters
from scipy import signal
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Reshape, Flatten,InputLayer
from keras.layers import Activation, UpSampling2D, Conv2D
from keras.layers.merge import _Merge
from keras.layers.convolutional import Convolution2D, Conv2DTranspose
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import LeakyReLU
from keras.optimizers import Adam
from keras.losses import categorical_crossentropy
from keras.utils.np_utils import to_categorical
from keras import backend as K
from functools import partial

In [None]:
try:
    from PIL import Image
except ImportError:
    print('This script depends on pillow! Please install it (e.g. with pip install pillow)')
    exit()

# a constant in generator and discriminator models
D = 64 # model size coef

generator model

In [None]:
def make_generator():
    """Creates a generator model that takes a 100-dimensional noise vector as a "seed", and outputs images
    of size 128x128x1."""

    model = Sequential()
    model.add(Dense(256 * D, input_dim=100))
    model.add(Reshape((4, 4, 16 * D)))
    model.add(Activation('relu'))
    model.add(UpSampling2D(size=(2, 2)))
    model.add(Conv2D(8 * D, (5, 5), padding='same'))
    model.add(Activation('relu'))
    model.add(UpSampling2D(size=(2, 2)))
    model.add(Conv2D(4 * D, (5, 5), padding='same'))
    model.add(Activation('relu'))
    model.add(UpSampling2D(size=(2, 2)))
    model.add(Conv2D(2 * D, (5, 5), padding='same'))
    model.add(Activation('relu'))
    model.add(Conv2D(D, (5, 5), padding='same'))
    model.add(UpSampling2D(size=(2, 2)))
    model.add(Activation('relu'))
    model.add(UpSampling2D(size=(2, 2)))
    model.add(Conv2D(1, (5, 5), padding='same'))
    model.add(Activation('tanh'))

    return model

discriminator model

In [None]:
def make_discriminator(nb_categories):
    """ Discriminator to determine if it's real or fake and category of the sound.
        Note that unlike normal GANs, the real/fake output is not sigmoid and does not represent a probability
     """

    input_data = Input(shape=(128, 128, 1))
    x = Conv2D(D, (5, 5), strides=(2,2), padding='same')(input_data)
    x = LeakyReLU(alpha=0.2)(x)
    x = Conv2D(D * 2, (5, 5), strides=(2,2), kernel_initializer='he_normal',padding='same')(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = Conv2D(D * 4, (5, 5), strides=(2,2), kernel_initializer='he_normal',padding='same')(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = Conv2D(D * 8, (5, 5), strides=(2,2), kernel_initializer='he_normal',padding='same')(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = Conv2D(D * 16, (5, 5), strides=(2,2), kernel_initializer='he_normal', padding='same')(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = Flatten()(x)
    real_fake = Dense(1, kernel_initializer='he_normal', name='real_fake')(x) # no activation for wasserstein_loss
    categories = Dense(nb_categories, kernel_initializer='he_normal', name='categories', activation='softmax')(x)

    model = Model(input_data, [real_fake, categories])

    return model

to save audio generated

In [None]:
def save_audio(y, path):
    """ generate a wav file from a given spectrogram and save it """
    s = np.squeeze(y)
    s = denormalize(s)
    w = inv_melspectrogram(s)
    save_wav(w, path)

def denormalize(norm_s):
    """ normalized spectrogram to original spectrogram using the calculated mean/standard deviation """
    assert norm_s.shape[0] == mel_means.shape[0]
    Y = (norm_s * (3.0 * mel_stds)) + mel_means
    return Y

initialize models

In [None]:
nb_categories = 10

In [None]:
generator = make_generator()
discriminator = make_discriminator(nb_categories)

to get saved models .h5 file

In [None]:
%cd /content/drive/My\ Drive/NLP_Nwishy/GAN_10_classes/GAN_Outputs/Generated_Images_and_Audios

In [None]:
generator.load_weights('generator_epoch_844_0.858.h5')
discriminator.load_weights('discriminator_epoch_844_-1.24.h5')

if you want to save in drive

In [None]:
%cd /content/drive/My\ Drive/NLP_Nwishy/GAN_10_classes/GAN_Outputs/generate_samples

In [None]:
import librosa
from librosa import display
import matplotlib.pyplot as plt
import IPython
from IPython.display import Audio
from IPython.display import clear_output

# check if the discriminator thinks the generated sound as real sound
CONFIDENCE_THRESH = 0.8

function to classify audio generated 128x128x1 spectrogram image

In [None]:
def classify_generated(w, thresh=CONFIDENCE_THRESH):
    # prepare image to be input to discriminator
    w = np.squeeze(w)
    w = w[np.newaxis, :, :, np.newaxis]
    # will return r which is real or fake and p probability array of categories 
    r, p = discriminator.predict([w])
    # r must be greater than thresh to get sample if not will return -1 
    if float(r) > thresh:
        return np.argmax(p)
    else:
        return -1

function for checking generated samples category

In [None]:
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import init
from torch import Tensor
import torch
import librosa
import math

In [None]:
num_classes = 12
in_channels = 1

In [None]:
use_gpu = torch.cuda.is_available()
print('use_gpu', use_gpu)
if use_gpu:
    torch.backends.cudnn.benchmark = True

In [None]:
classes=  ['down',
          'go',
          'left',
          'no',
          'off',
          'on',
          'right',   
          'silence',    
          'stop',
          'unknown',
          'up',   
          'yes']    

models

In [None]:
%cd /content/drive/My\ Drive/NLP_Nwishy/transfer_learning/densenet_bc_100_12_librosa_features

In [None]:
__all__ = [ 'DenseNet' ]

class Bottleneck(nn.Module):
    def __init__(self, inplanes, expansion=4, growthRate=12, dropRate=0):
        super(Bottleneck, self).__init__()
        planes = expansion * growthRate
        self.bn1 = nn.BatchNorm2d(inplanes)
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, growthRate, kernel_size=3,
                               padding=1, bias=False)
        self.relu = nn.ReLU(inplace=True)
        self.dropRate = dropRate

    def forward(self, x):
        out = self.bn1(x)
        out = self.relu(out)
        out = self.conv1(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv2(out)
        if self.dropRate > 0:
            out = F.dropout(out, p=self.dropRate, training=self.training)

        out = torch.cat((x, out), 1)

        return out


class BasicBlock(nn.Module):
    def __init__(self, inplanes, expansion=1, growthRate=12, dropRate=0):
        super(BasicBlock, self).__init__()
        planes = expansion * growthRate
        self.bn1 = nn.BatchNorm2d(inplanes)
        self.conv1 = nn.Conv2d(inplanes, growthRate, kernel_size=3,
                               padding=1, bias=False)
        self.relu = nn.ReLU(inplace=True)
        self.dropRate = dropRate

    def forward(self, x):
        out = self.bn1(x)
        out = self.relu(out)
        out = self.conv1(out)
        if self.dropRate > 0:
            out = F.dropout(out, p=self.dropRate, training=self.training)

        out = torch.cat((x, out), 1)

        return out


class Transition(nn.Module):
    def __init__(self, inplanes, outplanes):
        super(Transition, self).__init__()
        self.bn1 = nn.BatchNorm2d(inplanes)
        self.conv1 = nn.Conv2d(inplanes, outplanes, kernel_size=1,
                               bias=False)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        out = self.bn1(x)
        out = self.relu(out)
        out = self.conv1(out)
        out = F.avg_pool2d(out, 2)
        return out


class DenseNet(nn.Module):

    def __init__(self, depth=22, block=Bottleneck,
        dropRate=0, num_classes=10, growthRate=12, compressionRate=2, in_channels=3):
        super(DenseNet, self).__init__()

        assert (depth - 4) % 3 == 0, 'depth should be 3n+4'
        n = (depth - 4) / 3 if block == BasicBlock else (depth - 4) // 6

        self.growthRate = growthRate
        self.dropRate = dropRate

        # self.inplanes is a global variable used across multiple
        # helper functions
        self.inplanes = growthRate * 2
        self.conv1 = nn.Conv2d(in_channels, self.inplanes, kernel_size=3, padding=1,
                               bias=False)
        self.dense1 = self._make_denseblock(block, n)
        self.trans1 = self._make_transition(compressionRate)
        self.dense2 = self._make_denseblock(block, n)
        self.trans2 = self._make_transition(compressionRate)
        self.dense3 = self._make_denseblock(block, n)
        self.bn = nn.BatchNorm2d(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.avgpool = nn.AvgPool2d(8)
        self.fc = nn.Linear(self.inplanes, num_classes)

        # Weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_denseblock(self, block, blocks):
        layers = []
        for i in range(blocks):
            # Currently we fix the expansion ratio as the default value
            layers.append(block(self.inplanes, growthRate=self.growthRate, dropRate=self.dropRate))
            self.inplanes += self.growthRate

        return nn.Sequential(*layers)

    def _make_transition(self, compressionRate):
        inplanes = self.inplanes
        outplanes = int(math.floor(self.inplanes // compressionRate))
        self.inplanes = outplanes
        return Transition(inplanes, outplanes)


    def forward(self, x):
        x = self.conv1(x)

        x = self.trans1(self.dense1(x))
        x = self.trans2(self.dense2(x))
        x = self.dense3(x)
        x = self.bn(x)
        x = self.relu(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [None]:
MODEL_NAME1 = 'DenseNet_scipy_12_classes_semi_supervised'

modelA = DenseNet(depth=100, growthRate=12, compressionRate=2, num_classes=num_classes, in_channels=in_channels)
if use_gpu:
    modelA = torch.nn.DataParallel(modelA).cuda()

checkpoint = torch.load(f'{MODEL_NAME1}_bestloss.pth')
modelA.load_state_dict(checkpoint['state_dict'])
modelA.float()    
del checkpoint

In [None]:
%cd /content/drive/My\ Drive/NLP_Nwishy/transfer_learning/wide_resnet_librosa_features

In [None]:
__all__ = [ 'WideResNet' ]

class BasicBlock(nn.Module):
    def __init__(self, in_planes, out_planes, stride, dropRate=0.0):
        super(BasicBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)
        self.relu2 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.droprate = dropRate
        self.equalInOut = (in_planes == out_planes)
        self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
                               padding=0, bias=False) or None
    def forward(self, x):
        if not self.equalInOut:
            x = self.relu1(self.bn1(x))
        else:
            out = self.relu1(self.bn1(x))
        out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x)))
        if self.droprate > 0:
            out = F.dropout(out, p=self.droprate, training=self.training)
        out = self.conv2(out)
        return torch.add(x if self.equalInOut else self.convShortcut(x), out)

class NetworkBlock(nn.Module):
    def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0):
        super(NetworkBlock, self).__init__()
        self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate)
    def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate):
        layers = []
        for i in range(nb_layers):
            layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate))
        return nn.Sequential(*layers)
    def forward(self, x):
        return self.layer(x)

class WideResNet(nn.Module):
    def __init__(self, depth, num_classes, in_channels=3, widen_factor=1, dropRate=0.0):
        super(WideResNet, self).__init__()
        nChannels = [16, 16*widen_factor, 32*widen_factor, 64*widen_factor]
        assert((depth - 4) % 6 == 0)
        n = (depth - 4) // 6
        block = BasicBlock
        # 1st conv before any network block
        self.conv1 = nn.Conv2d(in_channels, nChannels[0], kernel_size=3, stride=1,
                               padding=1, bias=False)
        # 1st block
        self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate)
        # 2nd block
        self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropRate)
        # 3rd block
        self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropRate)
        # global average pooling and classifier
        self.bn1 = nn.BatchNorm2d(nChannels[3])
        self.relu = nn.ReLU(inplace=True)
        self.fc = nn.Linear(nChannels[3], num_classes)
        self.nChannels = nChannels[3]

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()
    def forward(self, x):
        out = self.conv1(x)
        out = self.block1(out)
        out = self.block2(out)
        out = self.block3(out)
        out = self.relu(self.bn1(out))
        out = F.avg_pool2d(out, 8)
        out = out.view(-1, self.nChannels)
        return self.fc(out)

In [None]:
MODEL_NAME2 = 'WideResNet_scipy_12_classes'

modelB = WideResNet(depth=52, widen_factor=10, dropRate=0, num_classes=num_classes, in_channels=in_channels)
if use_gpu:
    modelB = torch.nn.DataParallel(modelB).cuda()

checkpoint = torch.load(f'{MODEL_NAME2}_bestloss.pth')
modelB.load_state_dict(checkpoint['state_dict'])
modelB.float() 
del checkpoint       

In [None]:
%cd /content/drive/My\ Drive/NLP_Dataset/transfer_learning/playground

In [None]:
class ResNeXtBottleneck(nn.Module):
    """
    RexNeXt bottleneck type C (https://github.com/facebookresearch/ResNeXt/blob/master/models/resnext.lua)
    """

    def __init__(self, in_channels, out_channels, stride, cardinality, base_width, widen_factor):
        """ Constructor
        Args:
            in_channels: input channel dimensionality
            out_channels: output channel dimensionality
            stride: conv stride. Replaces pooling layer.
            cardinality: num of convolution groups.
            base_width: base number of channels in each group.
            widen_factor: factor to reduce the input dimensionality before convolution.
        """
        super(ResNeXtBottleneck, self).__init__()
        width_ratio = out_channels / (widen_factor * 64.)
        D = cardinality * int(base_width * width_ratio)
        self.conv_reduce = nn.Conv2d(in_channels, D, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn_reduce = nn.BatchNorm2d(D)
        self.conv_conv = nn.Conv2d(D, D, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
        self.bn = nn.BatchNorm2d(D)
        self.conv_expand = nn.Conv2d(D, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn_expand = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if in_channels != out_channels:
            self.shortcut.add_module('shortcut_conv',
                                     nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0,
                                               bias=False))
            self.shortcut.add_module('shortcut_bn', nn.BatchNorm2d(out_channels))

    def forward(self, x):
        bottleneck = self.conv_reduce.forward(x)
        bottleneck = F.relu(self.bn_reduce.forward(bottleneck), inplace=True)
        bottleneck = self.conv_conv.forward(bottleneck)
        bottleneck = F.relu(self.bn.forward(bottleneck), inplace=True)
        bottleneck = self.conv_expand.forward(bottleneck)
        bottleneck = self.bn_expand.forward(bottleneck)
        residual = self.shortcut.forward(x)
        return F.relu(residual + bottleneck, inplace=True)


class CifarResNeXt(nn.Module):
    """
    ResNext optimized for the Cifar dataset, as specified in
    https://arxiv.org/pdf/1611.05431.pdf
    """

    def __init__(self, nlabels, cardinality=8, depth=29, base_width=64, widen_factor=4, in_channels=3):
        """ Constructor
        Args:
            cardinality: number of convolution groups.
            depth: number of layers.
            nlabels: number of classes
            base_width: base number of channels in each group.
            widen_factor: factor to adjust the channel dimensionality
        """
        super(CifarResNeXt, self).__init__()
        self.cardinality = cardinality
        self.depth = depth
        self.block_depth = (self.depth - 2) // 9
        self.base_width = base_width
        self.widen_factor = widen_factor
        self.nlabels = nlabels
        self.output_size = 64
        self.stages = [64, 64 * self.widen_factor, 128 * self.widen_factor, 256 * self.widen_factor]

        self.conv_1_3x3 = nn.Conv2d(in_channels, 64, 3, 1, 1, bias=False)
        self.bn_1 = nn.BatchNorm2d(64)
        self.stage_1 = self.block('stage_1', self.stages[0], self.stages[1], 1)
        self.stage_2 = self.block('stage_2', self.stages[1], self.stages[2], 2)
        self.stage_3 = self.block('stage_3', self.stages[2], self.stages[3], 2)
        self.classifier = nn.Linear(self.stages[3], nlabels)
        init.kaiming_normal(self.classifier.weight)

        for key in self.state_dict():
            if key.split('.')[-1] == 'weight':
                if 'conv' in key:
                    init.kaiming_normal(self.state_dict()[key], mode='fan_out')
                if 'bn' in key:
                    self.state_dict()[key][...] = 1
            elif key.split('.')[-1] == 'bias':
                self.state_dict()[key][...] = 0

    def block(self, name, in_channels, out_channels, pool_stride=2):
        """ Stack n bottleneck modules where n is inferred from the depth of the network.
        Args:
            name: string name of the current block.
            in_channels: number of input channels
            out_channels: number of output channels
            pool_stride: factor to reduce the spatial dimensionality in the first bottleneck of the block.
        Returns: a Module consisting of n sequential bottlenecks.
        """
        block = nn.Sequential()
        for bottleneck in range(self.block_depth):
            name_ = '%s_bottleneck_%d' % (name, bottleneck)
            if bottleneck == 0:
                block.add_module(name_, ResNeXtBottleneck(in_channels, out_channels, pool_stride, self.cardinality,
                                                          self.base_width, self.widen_factor))
            else:
                block.add_module(name_,
                                 ResNeXtBottleneck(out_channels, out_channels, 1, self.cardinality, self.base_width,
                                                   self.widen_factor))
        return block

    def forward(self, x):
        x = self.conv_1_3x3.forward(x)
        x = F.relu(self.bn_1.forward(x), inplace=True)
        x = self.stage_1.forward(x)
        x = self.stage_2.forward(x)
        x = self.stage_3.forward(x)
        x = F.avg_pool2d(x, 8, 1)
        x = x.view(-1, self.stages[3])
        return self.classifier(x)

In [None]:
MODEL_NAME3 = 'Resnextscipyv2'

modelC = CifarResNeXt(nlabels=num_classes, in_channels=in_channels)
if use_gpu:
    modelC = torch.nn.DataParallel(modelC).cuda()

checkpoint = torch.load(f'{MODEL_NAME3}_bestloss.pth')
modelC.load_state_dict(checkpoint['state_dict'])
modelC.float() 
del checkpoint  

In [None]:
class Bottleneck(nn.Module):
    def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer):
        super(Bottleneck, self).__init__()
        self.out_planes = out_planes
        self.dense_depth = dense_depth

        self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False)
        self.bn2 = nn.BatchNorm2d(in_planes)
        self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_planes+dense_depth)

        self.shortcut = nn.Sequential()
        if first_layer:
            self.shortcut = nn.Sequential(
                nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_planes+dense_depth)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        x = self.shortcut(x)
        d = self.out_planes
        out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1)
        out = F.relu(out)
        return out


class DPN(nn.Module):
    def __init__(self, num_classes, in_channels, cfg):
        super(DPN, self).__init__()
        in_planes, out_planes = cfg['in_planes'], cfg['out_planes']
        num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth']

        self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.last_planes = 64
        self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1)
        self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2)
        self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2)
        self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2)
        self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], num_classes)

    def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for i,stride in enumerate(strides):
            layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0))
            self.last_planes = out_planes + (i+2) * dense_depth
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [None]:
def DPN92(num_classes, in_channels=1):
    cfg = {
        'in_planes': (96,192,384,768),
        'out_planes': (256,512,1024,2048),
        'num_blocks': (3,4,20,3),
        'dense_depth': (16,32,24,128)
    }
    return DPN(num_classes, in_channels, cfg)

In [None]:
MODEL_NAME4 = 'DPN92'

modelD = DPN92(num_classes=num_classes, in_channels=in_channels)
if use_gpu:
    modelD = torch.nn.DataParallel(modelD).cuda()

checkpoint = torch.load(f'{MODEL_NAME4}_bestloss.pth')
modelD.load_state_dict(checkpoint['state_dict'])
modelD.float() 
del checkpoint      

In [None]:
def ToTensor(data):
  """Converts into a tensor."""
  tensor = torch.FloatTensor(data)
  return tensor

def ToSTFT(data,n_fft=2048, hop_length=512):
  """Applies on an audio the short time fourier transform."""
  data_stft = librosa.stft(data, n_fft=n_fft, hop_length=hop_length)
  return data_stft   

def ToMFCC(data,n_mels=32,sample_rate=16000,n_fft=2048):
  """Creates the mel spectrogram from the short time fourier transform of a file. The result is a 32x32 matrix."""
  data = data.astype(np.float32, order='C') / 32768.0
  stft = ToSTFT(data)
  mel_basis = librosa.filters.mel(sample_rate, n_fft, n_mels)
  s = np.dot(mel_basis, np.abs(stft)**2.0)
  data_mfcc = librosa.power_to_db(s, ref=np.max)
  data_mfcc = ToTensor(data_mfcc)
  return data_mfcc 

def FixAudioLength(data,time = 1,sample_rate = 16000):
    """fixes audio length to be 16000 sample."""
    length = int(time * sample_rate)
    if length < len(data):
        data = data[:length]
    elif length > len(data):
        data = np.pad(data, (0, length - len(data)), "constant")
    return data  

In [None]:
def check_samples_category(wav_vector):
  ''' checking sample category '''

  # fix audio length to be 16000 samples
  wav_vector = FixAudioLength(wav_vector)
  # convert wav_audio to mel-spectrogram to be ready for CNN models 
  input = ToMFCC(wav_vector)
  # Set model to evaluate mode
  modelA.eval()  
  modelB.eval()
  modelC.eval()
  modelD.eval()
  # prepare input
  input = torch.unsqueeze(input, 0)
  input = torch.unsqueeze(input, 1)
  input = input.float()
  with torch.no_grad():
    if use_gpu:
        input = input.cuda()
  # models predictions
  outputsA = modelA(input)
  outputsB = modelB(input)
  outputsC = modelC(input)
  outputsD = modelD(input)
  outputs_CNN = outputsA + outputsB + outputsC + outputsD
  outputs = torch.nn.functional.softmax(outputs_CNN, dim=1)
  # get index of highest probability
  pred = outputs.data.max(1, keepdim=True)[1]
  # get predicted class
  predicted_class = classes[pred]
  return predicted_class

# **GAN run**

In [None]:
CATEGORIES

In [None]:
%cd /content/drive/My\ Drive/NLP_Dataset/dataset_gan/
!ls

In [None]:
folder = "dataset_part90"

In [None]:
!mkdir $folder

In [None]:
import os
for cat in CATEGORIES:
    os.mkdir(os.path.join(folder,cat))

In [None]:
def generate_random_sound(folder, max_try= 1000):
    i = 0
    while i < max_try:
        seed = np.random.rand(1, 100)              # random vector 
        w =   generator.predict(seed)              # generated 128x128x1            
        id_category = classify_generated(w[0])     # real or not and belong to much category
        category =  CATEGORIES[id_category]        # get name of class (right or left or no or off ...)
        ###############################################################################################
        # process w to be int16 vect
        s = np.squeeze(w)
        s = denormalize(s)
        wav = inv_melspectrogram(s)
        wav = np.int16(wav/np.max(np.abs(wav))*32768)
        wav = wav[0:16000]
        prediction = check_samples_category(wav)
        ###############################################################################################
        # id_category is what  classify_generated returns and must be > 0 to get the generated sample
        if  id_category >= 0 and prediction == category:
            save_audio(w, f"{folder}/%s/sample_%d.wav" %(category, i))
            i += 1
            if i % 400 == 0:
              print(f"sample number {i}")            
            #print(category)
        #else: 
          #print('ok!!!!!')     # if the generated sample can't fool the discriminator



In [None]:
import time

In [None]:
start = time.time()
generate_random_sound(folder, 2500)
end = time.time()
print( (end - start) / (60*60) ) 
           

In [None]:
#from scipy.io import wavfile      

In [None]:
#fs, samples = wavfile.read('dataset/up/sample_70.wav')

In [None]:
#check_samples_category(samples)

In [None]:
#folder

In [None]:
#!mv dataset dataset_part56

In [None]:
!zip -r $folder $folder

In [None]:
!zip -r dataset_part74 dataset_part74
!rm -r dataset_part74

In [None]:
!rm -r $folder

In [None]:
#!cp dataset_part56.zip /content/drive/My\ Drive/NLP_Dataset/dataset_gan/.

In [None]:
import os
for dir in os.listdir(f'/content/drive/My Drive/NLP_Dataset/dataset_gan/{folder}'):
  path = f"/content/drive/My Drive/NLP_Dataset/dataset_gan/{folder}/{dir}"
  audios = os.listdir(path)
  print(f"{dir} : {len(audios)}") 

**ushould copy the dataset_zipped to the drive**

In [None]:
#!rm -r dataset

trying the function

In [None]:
%cd /content/drive/My\ Drive/NLP_Nwishy/GAN_10_classes/GAN_Outputs/generate_samples/down

In [None]:
path = 'down'
waves = [f for f in os.listdir(path) if f.endswith('.wav')]

In [None]:
counter = 0
for f in range(len(waves)):
  sample_rate, data = wavfile.read(waves[f], 16000)
  prediction = check_samples_category(data)
  if prediction == 'down':
    counter += 1
  print((waves[f],prediction))
print(counter) 