Author: Mark McDonnell, mark.mcdonnell@unisa.edu.au

In [1]:
model_name = 'McDonnell_Task1b_dev_train_val' 

In [2]:
#select a GPU
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "4"

#imports 
import sys
import numpy as np
import matplotlib.pyplot as plt
import h5py
import pandas as pd
from sklearn.metrics import confusion_matrix, balanced_accuracy_score
from scipy.io import savemat,loadmat
import soundfile as sound
import librosa
import matplotlib.pyplot as plt

import tensorflow
from tensorflow.keras.optimizers import SGD

print("keras version = ",tensorflow.keras.__version__)
print("tensorflow version = ",tensorflow.__version__)

sys.path.insert(0, "../")
from NNets import model_resnet_updated_all_binary
from DCASE_training_functions_v2 import LR_WarmRestart, MixupGenerator
from DCASE_plots import plot_confusion_matrix

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


keras version =  2.2.4-tf
tensorflow version =  1.13.1


  data = yaml.load(f.read()) or {}


In [3]:
#source data attributes
sr = 48000
num_audio_channels = 2
SampleDuration = 10 #seconds

#log-mel spectrogram parameters
NumFreqBins = 256
NumFFTPoints = 4096
HopLength = int(NumFFTPoints/4)
NumTimeBins = int(np.ceil(SampleDuration*sr/HopLength))

#training parameters
init_lr = 0.025
batch_size = 32
num_epochs = 310
mixup_alpha = 0.4
crop_length = 400

#model parameters
wd = 5e-4
num_filters=26

In [4]:
#class information
meta_df = pd.read_csv('../../Data/TAU-urban-acoustic-scenes-2020-3class-development/meta.csv',sep='\t', encoding='ASCII')
ClassNames = np.unique(meta_df['scene_label'])
NumClasses = len(ClassNames)
print('Classes=',ClassNames)

#get  official DCASE 2020 validation dev split:
BasePath = '../../Data/TAU-urban-acoustic-scenes-2020-3class-development/'
ValFile = BasePath + 'evaluation_setup/fold1_evaluate.csv'
dev_val_df = pd.read_csv(ValFile,sep='\t', encoding='ASCII')
wavpaths_val = dev_val_df['filename'].tolist()
y_val_labels =  dev_val_df['scene_label'].astype('category').cat.codes.values
y_val = tensorflow.keras.utils.to_categorical(y_val_labels, NumClasses)

Classes= ['indoor' 'outdoor' 'transportation']


In [5]:
X_val = np.load('Task1b_LM_val_256_4096.npy')
X_val = np.log(X_val+1e-8)

In [6]:
#define and compile the model
inference_model = model_resnet_updated_all_binary(NumClasses,
                         input_shape =[NumFreqBins,None,num_audio_channels], 
                         num_filters =num_filters,
                         wd=wd,binarise_weights=True)

#load the weights stored in default 32 bit precision, but noting that all conv weights are -1.0 or +1.0
inference_model.load_weights('DCASE2020_Task1b_development_example.h5')

Instructions for updating:
Colocations handled automatically by placer.


In [7]:
#get accuracy for default-saved 32 bit trained weights:
y_pred_val = inference_model.predict(X_val)
print('Val_accuracy (%):', 100*sum(np.argmax(y_pred_val,-1)==np.argmax(y_val,-1))/y_val.shape[0])


Val_accuracy (%): 96.86977299880526


In [8]:
#Convert model parameters to one bit storage

#Note: the constraint in Task 1B is for 500 kB of convolutional or fully connected weights. Batch norm 
#params explciitly do not count, and can be additional memory.

ZeroOneWeightsDict = {}
AllParamsDict={}
NumBinaryWeights=0.0
Num32bitWeights=0.0
for layer in inference_model.layers:
    #print(layer.name)

    if 'conv2d' in layer.name:
        ww=layer.get_weights()

        #storage using 1 bit booleans
        binary_weights = (0.5*(np.sign(ww)+1.0)).astype('bool') #save weights as 0 or 1
        ZeroOneWeightsDict[layer.name]=binary_weights
        AllParamsDict[layer.name]=binary_weights
        NumBinaryWeights+=np.prod(ww[0].shape)
    elif 'batch_normalization' in layer.name:
        #the saved model also nees floating point batch norm params
        ww=layer.get_weights()
        AllParamsDict[layer.name]=ww
        cc=0
        for kk in ww:
            #print(cc,layer.name,np.prod(kk.shape))
            Num32bitWeights+=np.prod(kk.shape)
            cc=cc+1
        
savemat('FinalModel_01weights.mat',ZeroOneWeightsDict,do_compression=True,long_field_names=True)
savemat('FinalModel_allparams.mat',AllParamsDict,do_compression=True,long_field_names=True)

WeightsMemory=NumBinaryWeights/8/1024
BNMemory=32.0*Num32bitWeights/8/1024
print('Num binary weights is less than 500kb: ',int(NumBinaryWeights),'conv weights = conv weights memory of ',WeightsMemory,'  kB')
print('Num 32-bit weights (all batch norm parameters) = ',int(Num32bitWeights),'; weights memory = ',BNMemory,'  kB')
print('Total memory = ',WeightsMemory+BNMemory,'  MB')


Num binary weights is less than 500kb:  3987000 conv weights = conv weights memory of  486.6943359375   kB
Num 32-bit weights (all batch norm parameters) =  6340 ; weights memory =  24.765625   kB
Total memory =  511.4599609375   MB


In [9]:
#verify that these 0/1 weights when loaded work as expected:

AllParamsDict_loaded=loadmat('FinalModel_allparams.mat')

conv_names=[m for m in list(AllParamsDict_loaded.keys()) if any(s in m for s in ['conv2d'])]
bn_names=[m for m in list(AllParamsDict_loaded.keys()) if any(s in m for s in ['batch'])]

c1=0
c2=0
for layer in inference_model.layers:
    if 'conv2d' in layer.name:
        ww=AllParamsDict_loaded[conv_names[c1]].astype('float32')*2.0-1.0
        ww=ww*np.sqrt(2.0/np.prod(ww[0].shape[0:3]))
        layer.set_weights([ww[0]])
        print('conv layer ',c1,' has ', len(np.unique(ww)),' unique weight values')
        c1=c1+1
    elif 'batch_normalization' in layer.name:
        ww=AllParamsDict_loaded[bn_names[c2]]
        layer.set_weights(ww)
        c2=c2+1

#get accuracy:
y_pred_val_binary_conv = inference_model.predict(X_val)
print('One-bit-per-weight Test accuracy (%):', 100*sum(np.argmax(y_pred_val_binary_conv,-1)==np.argmax(y_val,-1))/y_val.shape[0])


conv layer  0  has  2  unique weight values
conv layer  1  has  2  unique weight values
conv layer  2  has  2  unique weight values
conv layer  3  has  2  unique weight values
conv layer  4  has  2  unique weight values
conv layer  5  has  2  unique weight values
conv layer  6  has  2  unique weight values
conv layer  7  has  2  unique weight values
conv layer  8  has  2  unique weight values
conv layer  9  has  2  unique weight values
conv layer  10  has  2  unique weight values
conv layer  11  has  2  unique weight values
conv layer  12  has  2  unique weight values
conv layer  13  has  2  unique weight values
conv layer  14  has  2  unique weight values
conv layer  15  has  2  unique weight values
conv layer  16  has  2  unique weight values
conv layer  17  has  2  unique weight values
conv layer  18  has  2  unique weight values
conv layer  19  has  2  unique weight values
conv layer  20  has  2  unique weight values
conv layer  21  has  2  unique weight values
conv layer  22  has 

In [10]:
#final test: we get the same results if the model uses regular conv layers, and not the binarising version:

inference_model_regular_conv = model_resnet_updated_all_binary(NumClasses,
                         input_shape =[NumFreqBins,None,num_audio_channels], 
                         num_filters =num_filters,
                         wd=wd,binarise_weights=False)


c1=0
c2=0
for layer in inference_model_regular_conv.layers:
    if 'conv2d' in layer.name:
        ww=AllParamsDict_loaded[conv_names[c1]].astype('float32')*2.0-1.0
        ww=ww*np.sqrt(2.0/np.prod(ww[0].shape[0:3]))
        layer.set_weights([ww[0]])
        print('conv layer ',c1,' has ', len(np.unique(ww)),' unique weight values')
        c1=c1+1
    elif 'batch_normalization' in layer.name:
        ww=AllParamsDict_loaded[bn_names[c2]]
        layer.set_weights(ww)
        c2=c2+1

#get accuracy:
y_pred_val_regular_conv = inference_model_regular_conv.predict(X_val)
print('One-bit-per-weight Test accuracy (%):', 100*sum(np.argmax(y_pred_val_regular_conv,-1)==np.argmax(y_val,-1))/y_val.shape[0])


conv layer  0  has  2  unique weight values
conv layer  1  has  2  unique weight values
conv layer  2  has  2  unique weight values
conv layer  3  has  2  unique weight values
conv layer  4  has  2  unique weight values
conv layer  5  has  2  unique weight values
conv layer  6  has  2  unique weight values
conv layer  7  has  2  unique weight values
conv layer  8  has  2  unique weight values
conv layer  9  has  2  unique weight values
conv layer  10  has  2  unique weight values
conv layer  11  has  2  unique weight values
conv layer  12  has  2  unique weight values
conv layer  13  has  2  unique weight values
conv layer  14  has  2  unique weight values
conv layer  15  has  2  unique weight values
conv layer  16  has  2  unique weight values
conv layer  17  has  2  unique weight values
conv layer  18  has  2  unique weight values
conv layer  19  has  2  unique weight values
conv layer  20  has  2  unique weight values
conv layer  21  has  2  unique weight values
conv layer  22  has 