In [None]:
import os
import csv
import numpy as np
import sys
import time
import random
import datetime
import tensorflow as tf
import pathlib

from multiprocessing import Process, Pool
from random import shuffle
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.losses import categorical_crossentropy

os.environ["CUDA_VISIBLE_DEVICES"] = "7"

In [None]:
num_threads = 32
PATH = '/home/power703/work/cgh/data/student/'
IN_PATH = []

def save_spectrum_to_npy(wavfile):
    import librosa
    y, _ = librosa.load(wavfile)
    S = np.abs(librosa.stft(y, n_fft=512))
    p = librosa.amplitude_to_db(S, ref=np.max)
    tmp = np.zeros([256, 128])
    if p.shape[1] > 128:
        tmp[:256, :128] = p[:256, :128]
    else:
        tmp[:256, :p.shape[1]] = p[:256, :p.shape[1]]
    tmp = (tmp+40)
    tmp = tmp/40.0
    np.save(wavfile[:-4]+'.npy', tmp)

def save_mfcc_to_npy(wavfile):
    import librosa
    y, sr = librosa.load(wavfile)
    data_mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=32)

    tmp = np.zeros([32, 64])
    if data_mfcc.shape[1] > 64:
        tmp[:32, :64] = data_mfcc[:32, :64]
    else:
        tmp[:32, :data_mfcc.shape[1]] = data_mfcc[:64, :data_mfcc.shape[1]]
    np.save(wavfile[:-4]+'.npy', tmp)


if __name__ == '__main__':

    for root2, dirs2, files2 in os.walk(os.path.abspath(PATH)):
        for file2 in files2:
            if('wav' in file2):
                IN_PATH.append(os.path.join(root2, file2))
    Pool(num_threads).map(save_mfcc_to_npy, IN_PATH)

In [None]:
num_threads = 64
dataset = 'student'
ROOT = '/work/power703/cgh/data/'
save_path = ROOT+dataset+'/'
classes = []
ori_path = ROOT+dataset+'/ori/'
aug_path = ROOT+dataset+'/aug/'

for n, _, _ in os.walk(os.path.abspath(ori_path)):
    classes.append(n.split('/')[-1])

classes = classes[1:]
classes = [int(x) for x in classes]
classes.sort()

classes = [str(x) for x in classes]

numberOfPart = 5

def chunkIt(seq, num):
    avg = len(seq) / float(num)
    out = []
    last = 0.0

    while last < len(seq):
        out.append(seq[int(last):int(last + avg)])
        last += avg
    return out


def load_data(npy_name):
    lable = npy_name.split('/')
    lable = classes.index(lable[-2])
    data = np.load(npy_name[:-4]+'.npy', allow_pickle=True)
    return data, lable


train_list = [[] for i in range(numberOfPart)]
test_list = [[] for i in range(numberOfPart)]
origin_list = []

for n in classes:
    ori_path = ROOT+dataset+'/ori/' + str(n)+'/'
    ori_n = 0

    for root2, _, files2 in os.walk(os.path.abspath(ori_path)):
        for file2 in files2:
            if('npy' in file2):
                origin_list.append(os.path.join(root2, file2))
                ori_n = ori_n+1

origin_list = random.sample(origin_list, len(origin_list))
origin_list = chunkIt(origin_list, numberOfPart)
aug_list = [[] for i in range(numberOfPart)]

for i in range(len(origin_list)):
    tmp_list = [y[:-4] for y in origin_list[i]]
    tmp_list = [y[y.rfind('/')+1:] for y in tmp_list]
    for root1, _, files1 in os.walk(os.path.abspath(aug_path)):
        for file1 in files1:
            if('npy' in file1):
                myname = file1[:file1.rfind('_')]
                myname = myname[:myname.rfind('_')]
                if(myname in tmp_list):
                    aug_list[i].append(os.path.join(root1, file1))

number_ori = np.zeros(len(classes))

for data in origin_list[0]:
    lable = data[data.rfind('/')+1:].split('_')
    lable = classes.index(lable[0])  # +'_'+lable[1]
    number_ori[lable] = number_ori[lable]+1


for n in range(numberOfPart):
    tmp_list = []
    aug_list[n] = random.sample(aug_list[n], len(aug_list[n]))
    number_aug = np.zeros(len(classes))
    for data in aug_list[n]:
        lable = data[data.rfind('/')+1:].split('_')
        lable = classes.index(lable[0])  # +'_'+lable[1]
        if (number_aug[lable] < (1500 - number_ori[lable])):  # or True
            number_aug[lable] = number_aug[lable]+1
            tmp_list.append(data)
        else:
            pass
    aug_list[n] = tmp_list


for select_part in range(numberOfPart):
    for other_part in range(numberOfPart):
        if select_part != other_part:
            train_list[select_part] = train_list[select_part] + \
                origin_list[other_part]   + aug_list[other_part]
        else:
            test_list[select_part] = origin_list[select_part]


pool = Pool(num_threads)
for n in range(numberOfPart):
    x_train = []
    y_train = []
    x_test = []
    y_test = []
    number_list = np.zeros(len(classes))
    tmp_list = []
    for data in test_list[n]:
        lable = data[data.rfind('/')+1:].split('_')
        lable = classes.index(lable[0])  # +'_'+lable[1]
        if (number_list[lable] < 82):  # or True
            number_list[lable] = number_list[lable]+1
            tmp_list.append(data)
    test_list[n] = tmp_list

    pool_outputs = pool.map(load_data, train_list[n])
    for i in pool_outputs:
        x_train.append(i[0])
        y_train.append(i[1])

    pool_outputs = pool.map(load_data, test_list[n])
    for i in pool_outputs:
        x_test.append(i[0])
        y_test.append(i[1])

    np.savez(
        save_path+'part_'+str(n),
        x_train=np.asarray(x_train),
        y_train=np.asarray(y_train),
        x_test=np.asarray(x_test),
        y_test=np.asarray(y_test)
    )


In [None]:
classes = 4
BATCH_SIZE = 32
epochs = 100
load_last = False
dataset = 'student'
model_name = 'MobileNetV3Small'
s_n='0'
INPUT_X=32
INPUT_Y=64

NAME = model_name+'_'+dataset+'_c'+str(classes)+'_p'+str(s_n)+'_bs'+str(BATCH_SIZE)+'_data_' + \
    datetime.datetime.now().strftime("%m%d_%H%M")
SAVE_PATH = '/work/power703/cgh/weight/'
log_dir = '/work/power703/cgh/logs/' + NAME
WEIGHT_PATH = '/work/power703/cgh/data/' + dataset + '/part_'+s_n+'.npz'

LOAD_WEIGHT = '/work/power703/cgh/weight/DenseNet201_chu_7_c2_p0_bs32_data_0506_1054.14-0.67-0.99.hdf5'
class_weight = {}

def get_compiled_model():
    model = tf.keras.applications.MobileNetV3Small(
        input_shape=(INPUT_X, INPUT_Y, 1),
        weights=None,
        classes=classes
    )
    if load_last:
        model.load_weights(LOAD_WEIGHT)
    model.compile(optimizer=Adam(),
                  loss="categorical_crossentropy",
                  metrics=['accuracy'])
    return model


def get_dataset():

    x = np.load(WEIGHT_PATH, mmap_mode='r', allow_pickle=True)
    x_train = x['x_train']
    y_train = x['y_train']
    x_test = x['x_test']
    y_test = x['y_test']

    total = len(y_train)
    unique, counts = np.unique(y_train, return_counts=True)
    class_weight = dict(zip(unique, counts))
    for l in class_weight:
        w = class_weight[l]
        new = (1 / w)*(total)/2.0
        class_weight.update({l: new})

    x_train = x_train.reshape(x_train.shape[0], INPUT_X, INPUT_Y, 1)
    y_train = to_categorical(y_train, num_classes=classes)

    x_test = x_test.reshape(x_test.shape[0], INPUT_X, INPUT_Y, 1)
    y_test = to_categorical(y_test, num_classes=classes)

    SHUFFLE_BUFFER_SIZE = len(x_train)
    return(
        tf.data.Dataset.from_tensor_slices(
            (x_train, y_train)).shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE),
        tf.data.Dataset.from_tensor_slices(
            (x_test, y_test)).batch(BATCH_SIZE)
    )


strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
    model = get_compiled_model()

train_dataset, test_dataset = get_dataset()
options = tf.data.Options()
options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
train_dataset = train_dataset.with_options(options)
test_dataset = test_dataset.with_options(options)

model.fit(
    train_dataset,
    epochs=epochs,
    verbose=2,
    callbacks=[
        TensorBoard(log_dir=log_dir),
        ModelCheckpoint(
            SAVE_PATH + NAME + '.{epoch:02d}-{val_accuracy:.2f}-{val_loss:.2f}.hdf5',
            monitor='val_accuracy',
            verbose=2, save_best_only=True
        ),
    ],
    validation_data=test_dataset,
    class_weight=class_weight
)



In [None]:
LOAD_WEIGHT = '/home/power703/work/cgh/weight/MobileNetV3Small_student_c4_p4_bs32_data_0523_1643.86-0.80-1.26.hdf5'
OUTPUT_MODEL = '/work/power703/cgh/weight/'
TFL_PATH = '/work/power703/cgh/tflite/'+'MobileNetV3Small_student_c4_p4'+'.tflite'

model = tf.keras.models.load_model(LOAD_WEIGHT)
tflite_models_dir = pathlib.Path("/home/power703/work/cgh/tflite")
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
resnet_quantized_tflite_file = tflite_models_dir / \
    "MobileNetV3Small_student_c4_p4_quantized.tflite"
resnet_quantized_tflite_file.write_bytes(converter.convert())
