## TODO

In [1]:
DATA_PATH = "drive/MyDrive/Colab Notebooks/AdaptiveSound/data/"
MODEL_DIR = "drive/MyDrive/Colab Notebooks/AdaptiveSound/model/"

In [2]:
import tensorflow as tf

# TF version should be 2.7 or higher
print("TensorFlow version:", tf.__version__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

TensorFlow version: 2.12.0
Num GPUs Available:  1


In [3]:
import os
import pandas as pd
import random
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import pickle

In [4]:
NUM_CLASSES = 10
FILE_SIZE = 1   #Length of each sample in seconds
SAMPLING_RATE = 44100

In [5]:
# transforming waveform tensor to mel-spectrogram
# copied from https://github.com/tensorflow/models/blob/master/research/audioset/yamnet/features.py
def wav2spec(waveform):
    '''
    waveform: batch_size, num_samples(seconds*sr)
    '''
    # normalize waveform
    mean = tf.reduce_mean(waveform)
    std = tf.math.reduce_std(waveform)
    waveform = (waveform - mean) / (std + 1e-6)

    sr = float(waveform.shape[1])
    window_length_samples = int(
      round(sr * 0.025))
    hop_length_samples = int(
      round(sr * 0.01))
    fft_length = 2 ** int(np.ceil(np.log(window_length_samples) / np.log(2.0)))
    num_spectrogram_bins = fft_length // 2 + 1

    magnitude_spectrogram = tf.abs(tf.signal.stft(
          signals=waveform,
          frame_length=window_length_samples,
          frame_step=hop_length_samples,
          fft_length=fft_length))
    # magnitude_spectrogram has shape [<# STFT frames>, num_spectrogram_bins]

    # Convert spectrogram into log mel spectrogram.
    linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
        num_mel_bins=128,
        num_spectrogram_bins=num_spectrogram_bins,
        sample_rate=sr,
        lower_edge_hertz=125.0,
        upper_edge_hertz=7500)
    mel_spectrogram = tf.matmul(
      magnitude_spectrogram, linear_to_mel_weight_matrix)
    log_mel_spectrogram = tf.math.log(mel_spectrogram + 0.001)
    # log_mel_spectrogram has shape [<# STFT frames>, <# mel_bins>]

    spectrogram_hop_length_samples = int(
      round(sr * 0.01))
    spectrogram_sample_rate = sr / spectrogram_hop_length_samples
    patch_window_length_samples = int(
      round(spectrogram_sample_rate * 0.96))
    patch_hop_length_samples = int(
      round(spectrogram_sample_rate * 0.48))
    # features = tf.signal.frame(
    #     signal=log_mel_spectrogram,
    #     frame_length=patch_window_length_samples,
    #     frame_step=patch_hop_length_samples,
    #     axis=0)

    return log_mel_spectrogram#, features

def spec_to_image(spec, eps=1e-6):
    mean = tf.reduce_mean(spec)
    std = tf.math.reduce_std(spec)
    spec_norm = (spec - mean) / (std + eps)
    spec_min, spec_max = tf.reduce_min(spec_norm), tf.reduce_max(spec_norm)
    spec_scaled = 255 * (spec_norm - spec_min) / (spec_max - spec_min)
    return tf.expand_dims(spec_scaled,axis=-1)

In [6]:
def get_waveform(file_path, sr=SAMPLING_RATE):
    wav,sr = librosa.load(file_path,sr=sr)
    if wav.shape[0]<FILE_SIZE*sr:
        wav=np.pad(wav,int(np.ceil((FILE_SIZE*sr-wav.shape[0])/2)),mode='reflect')
    wav=wav[:FILE_SIZE*sr]
    return tf.convert_to_tensor(wav)

In [7]:
def categoricalAcc(lbl,pred):
    m = tf.keras.metrics.CategoricalAccuracy()
    m.reset_state()
    m.update_state(lbl,pred)
    return m.result().numpy()

In [8]:
class SoundWatchData(Dataset):
    def __init__(self, data_path_directory, df, in_col, out_col):
        self.df = df
        self.data = []
        self.labels = []
        self.c2i={}
        self.i2c={}
        self.categories = sorted(df[out_col].unique())
        for i, category in enumerate(self.categories):
            self.c2i[category]=i
            self.i2c[i]=category
        for ind in tqdm(range(len(df))):
            row = df.iloc[ind]
            file_path = os.path.join(data_path_directory, row[in_col])
            waveform = get_waveform(file_path)
            if waveform.shape[0] == 44100:
                self.data.append(waveform)
                self.labels.append(self.c2i[row['category']])
            else:
                print(waveform.shape[0])
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

In [None]:
from sklearn import preprocessing
w = get_waveform(f'{DATA_PATH}maml-data-one-sec/maml_data_one_group/6_354180_2079872-hq_chunk1_chunk0_000.wav')
w = tf.expand_dims(w,axis=0)

mean = tf.reduce_mean(w)
std = tf.math.reduce_std(w)
n_w = (w - mean) / (std + 1e-6)


print(np.mean(w))
print(np.std(w))

print(np.mean(n_w))
print(np.std(n_w))

# w = w / tf.int16.max
# print(w.shape)

# spec = spec_to_image(wav2spec(w))

# print(spec.shape)

2.4711626e-06
0.06250869
0.4965794
0.07499853


### Data Preparation

In [None]:
!cp -r drive/MyDrive/Colab\ Notebooks/AdaptiveSound/data/ /tmp/

cp: cannot stat 'drive/MyDrive/Colab Notebooks/AdaptiveSound/data/train.pkl': No such file or directory
cp: cannot stat 'drive/MyDrive/Colab Notebooks/AdaptiveSound/data/valid.pkl': No such file or directory
cp: cannot stat 'drive/MyDrive/Colab Notebooks/AdaptiveSound/data/test.pkl': No such file or directory


In [None]:
df = pd.read_csv(f'{DATA_PATH}maml-data-one-sec/homesound_data.csv')
valid_test_classes = [NUM_CLASSES - 1, NUM_CLASSES]
train = df[~df.fold.isin(valid_test_classes)]
valid = df[df['fold']==NUM_CLASSES-1]
test = df[df['fold']==NUM_CLASSES]

train_data = SoundWatchData(f'{"/tmp/data/"}maml-data-one-sec/maml_data_one_group', train, 'filename', 'category')
valid_data = SoundWatchData(f'{"/tmp/data/"}maml-data-one-sec/maml_data_one_group', valid, 'filename', 'category')
test_data = SoundWatchData(f'{"/tmp/data/"}maml-data-one-sec/maml_data_one_group', test, 'filename', 'category')

100%|██████████| 4402/4402 [00:04<00:00, 1039.00it/s]
100%|██████████| 548/548 [00:00<00:00, 939.74it/s]
100%|██████████| 547/547 [00:00<00:00, 952.28it/s]


In [None]:
train_data = pickle.load(open(f'{DATA_PATH}train.pkl','rb'))
valid_data = pickle.load(open(f'{DATA_PATH}valid.pkl','rb'))
test_data = pickle.load(open(f'{DATA_PATH}test.pkl','rb'))

In [None]:
print(train_data.i2c)
print(valid_data.i2c)
print(test_data.i2c)

{0: 'appliances', 1: 'baby_cry', 2: 'car_honk', 3: 'cat_meow', 4: 'dog_bark', 5: 'doorbell', 6: 'fire_alarm', 7: 'knocking', 8: 'siren', 9: 'water_running'}
{0: 'appliances', 1: 'baby_cry', 2: 'car_honk', 3: 'cat_meow', 4: 'dog_bark', 5: 'doorbell', 6: 'fire_alarm', 7: 'knocking', 8: 'siren', 9: 'water_running'}
{0: 'appliances', 1: 'baby_cry', 2: 'car_honk', 3: 'cat_meow', 4: 'dog_bark', 5: 'doorbell', 6: 'fire_alarm', 7: 'knocking', 8: 'siren', 9: 'water_running'}


In [None]:
BATCH_SIZE = 64
SHUFFLE_BUFFER_SIZE = 4000

train_ds = tf.data.Dataset.from_tensor_slices((tf.cast(train_data.data,tf.float32),tf.keras.utils.to_categorical(train_data.labels)))
dev_ds = tf.data.Dataset.from_tensor_slices((tf.cast(valid_data.data,tf.float32),tf.keras.utils.to_categorical(valid_data.labels)))
test_ds = tf.data.Dataset.from_tensor_slices((tf.cast(test_data.data,tf.float32),tf.keras.utils.to_categorical(test_data.labels)))

batch_train_ds = train_ds.shuffle(SHUFFLE_BUFFER_SIZE,reshuffle_each_iteration=True).batch(BATCH_SIZE)
one_train_ds = train_ds.shuffle(SHUFFLE_BUFFER_SIZE,reshuffle_each_iteration=True).batch(1)
dev_ds = dev_ds.shuffle(SHUFFLE_BUFFER_SIZE,reshuffle_each_iteration=True).batch(1)
test_ds = test_ds.batch(1)

In [9]:
class SoundClassificationPTModel(tf.keras.Model):
    def __init__(self, model_name):
        super().__init__()
        if model_name == 'mobilenetv2':
            self.extractor = tf.keras.applications.mobilenet.MobileNet(input_shape=[98,128,3],include_top=False)
        elif model_name == 'vgg16':
            self.extractor = tf.keras.applications.vgg16.VGG16(input_shape=[98,128,3],include_top=False)

        self.prediction_head = tf.keras.Sequential([tf.keras.layers.Flatten(input_shape=[3,4,1024],name='flatten_hidden'),
        tf.keras.layers.Dense(10, name='dense_2'),
                                               ])
        self.concatenate = tf.keras.layers.Concatenate()


    def call(self, x):
        # waveform to spectrogram
        x = spec_to_image(wav2spec(x))
        x = self.concatenate([x,x,x])
        h = self.extractor(x)
        return self.prediction_head(h)

### Model Wrapper
This wrapper would be converted to tf-lite, and can be used by kotlin/java in Android App.

The function of this wrapper includes:

+ train
+ finetune
+ inference
+ save
+ load

In [10]:
# Wrapper for the whole system
class SoundClassificationWrapper(tf.Module):
    def __init__(self):
        self.model = SoundClassificationPTModel(model_name='mobilenetv2')

        lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
            1e-3,
            decay_steps=10000,
            decay_rate=0.96,
            staircase=True)

        self.model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
            loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
        )

    @tf.function(input_signature=[
      tf.TensorSpec(shape=[None, 44100], dtype=tf.float32),
      tf.TensorSpec([None, 10], tf.float32),
    ])
    def train(self,x,y):
        with tf.GradientTape() as tape:
            prediction = self.model(x, training=True)
            loss = self.model.loss(y, prediction)
        gradients = tape.gradient(loss, self.model.trainable_variables)
        self.model.optimizer.apply_gradients(
            zip(gradients, self.model.trainable_variables))
        result = {"loss": loss, 'logits': prediction}
        return result

    @tf.function(input_signature=[
      tf.TensorSpec(shape=[None, 44100], dtype=tf.float32),
      tf.TensorSpec([None, 10], tf.float32),
    ])
    def finetune(self,x,y):
        with tf.GradientTape() as tape:
            prediction = self.model(x, training=True)
            loss = self.model.loss(y, prediction)
        self.model.extractor.trainable = False
        gradients = tape.gradient(loss, self.model.trainable_variables)
        self.model.optimizer.apply_gradients(
            zip(gradients, self.model.trainable_variables))
        result = {"loss": loss, 'logits': prediction}
        return result

    @tf.function(input_signature=[
      tf.TensorSpec([None, 44100], tf.float32),
    ])
    def inference(self,x):
        # fix the batch normalization layer
        for layer in SC_Wrapper.model.extractor.layers:
            if isinstance(layer, tf.keras.layers.BatchNormalization):
                layer.training = False
        logits = self.model(x)
        probabilities = tf.nn.softmax(logits, axis=-1)
        return {
            "output": probabilities,
            "logits": logits,
            "class": tf.math.argmax(logits,axis=-1)
        }

    @tf.function(input_signature=[tf.TensorSpec(shape=[], dtype=tf.string)])
    def save(self, checkpoint_path):
        tensor_names = [weight.name for weight in self.model.weights]
        tensors_to_save = [weight.read_value() for weight in self.model.weights]
        tf.raw_ops.Save(
            filename=checkpoint_path, tensor_names=tensor_names,
            data=tensors_to_save, name='save')
        return {"checkpoint_path": checkpoint_path}

    @tf.function(input_signature=[tf.TensorSpec(shape=[], dtype=tf.string)])
    def load(self, checkpoint_path):
        restored_tensors = {}
        for var in self.model.weights:
            restored = tf.raw_ops.Restore(
              file_pattern=checkpoint_path, tensor_name=var.name, dt=var.dtype,
              name='restore')
            var.assign(restored)
            restored_tensors[var.name] = restored
        return restored_tensors

### Training

In [None]:
NUM_EPOCHS = 100
BATCH_SIZE = 64
PATIENCE = 10
WARMUP = 0

epochs = np.arange(1, NUM_EPOCHS + 1, 1)
loss_lst = np.zeros([NUM_EPOCHS])
gen_loss_lst = np.zeros([NUM_EPOCHS])
disc_loss_lst = np.zeros([NUM_EPOCHS])
acc_lst = np.zeros([NUM_EPOCHS])

SC_Wrapper = SoundClassificationWrapper()

cnt = 0
best_loss = np.inf
best_acc = 0
for i in range(NUM_EPOCHS):
    lbl = []
    pred = []
    for x,y in tqdm(batch_train_ds):
        result = SC_Wrapper.train(x, y)
        lbl.append(y)
        pred.append(result['logits'])
        loss_lst[i] += result['loss']

    acc_lst[i] = categoricalAcc(tf.concat(lbl,axis=0), tf.concat(pred,axis=0))

    if i > WARMUP:
        if loss_lst[i] <= best_loss:
            best_loss = loss_lst[i]
            best_acc = acc_lst[i]
            cnt = 0
            SC_Wrapper.save(f'{MODEL_DIR}model.ckpt')
        else:
            cnt += 1
            if cnt > PATIENCE:
                print('Early stopping')
                print(f'best loss {best_loss}')
                print(f'best acc {best_acc}')
                break

    print(f"{i+1} epochs | train loss: {loss_lst[i]:.3f} | train acc: {acc_lst[i]:.3f}")

100%|██████████| 69/69 [00:21<00:00,  3.19it/s]


1 epochs | train loss: 84.410 | train acc: 0.752


100%|██████████| 69/69 [00:05<00:00, 13.24it/s]


2 epochs | train loss: 28.468 | train acc: 0.898


100%|██████████| 69/69 [00:04<00:00, 14.24it/s]


3 epochs | train loss: 16.742 | train acc: 0.934


100%|██████████| 69/69 [00:05<00:00, 13.70it/s]


4 epochs | train loss: 16.181 | train acc: 0.938


100%|██████████| 69/69 [00:05<00:00, 13.69it/s]


5 epochs | train loss: 9.672 | train acc: 0.961


100%|██████████| 69/69 [00:05<00:00, 13.77it/s]


6 epochs | train loss: 5.816 | train acc: 0.975


100%|██████████| 69/69 [00:05<00:00, 13.33it/s]


7 epochs | train loss: 6.451 | train acc: 0.976


100%|██████████| 69/69 [00:05<00:00, 13.52it/s]


8 epochs | train loss: 4.485 | train acc: 0.981


100%|██████████| 69/69 [00:04<00:00, 14.03it/s]


9 epochs | train loss: 3.330 | train acc: 0.986


100%|██████████| 69/69 [00:06<00:00, 11.43it/s]


10 epochs | train loss: 9.300 | train acc: 0.973


100%|██████████| 69/69 [00:05<00:00, 13.60it/s]


11 epochs | train loss: 26.615 | train acc: 0.924


100%|██████████| 69/69 [00:04<00:00, 14.18it/s]


12 epochs | train loss: 12.142 | train acc: 0.955


100%|██████████| 69/69 [00:04<00:00, 13.96it/s]


13 epochs | train loss: 12.968 | train acc: 0.970


100%|██████████| 69/69 [00:04<00:00, 14.05it/s]


14 epochs | train loss: 21.540 | train acc: 0.939


100%|██████████| 69/69 [00:05<00:00, 13.44it/s]


15 epochs | train loss: 6.418 | train acc: 0.971


100%|██████████| 69/69 [00:04<00:00, 14.24it/s]


16 epochs | train loss: 3.144 | train acc: 0.986


100%|██████████| 69/69 [00:15<00:00,  4.40it/s]


17 epochs | train loss: 1.222 | train acc: 0.995


100%|██████████| 69/69 [00:04<00:00, 14.23it/s]


18 epochs | train loss: 2.346 | train acc: 0.993


100%|██████████| 69/69 [00:04<00:00, 14.05it/s]


19 epochs | train loss: 1.643 | train acc: 0.991


100%|██████████| 69/69 [00:04<00:00, 14.33it/s]


20 epochs | train loss: 1.270 | train acc: 0.995


100%|██████████| 69/69 [00:05<00:00, 13.71it/s]


21 epochs | train loss: 1.330 | train acc: 0.995


100%|██████████| 69/69 [00:05<00:00, 13.52it/s]


22 epochs | train loss: 1.705 | train acc: 0.993


100%|██████████| 69/69 [00:04<00:00, 14.15it/s]


23 epochs | train loss: 0.784 | train acc: 0.997


100%|██████████| 69/69 [00:04<00:00, 14.12it/s]


24 epochs | train loss: 2.145 | train acc: 0.991


100%|██████████| 69/69 [00:05<00:00, 13.51it/s]


25 epochs | train loss: 1.421 | train acc: 0.994


100%|██████████| 69/69 [00:05<00:00, 13.55it/s]


26 epochs | train loss: 0.582 | train acc: 0.998


100%|██████████| 69/69 [00:04<00:00, 14.39it/s]


27 epochs | train loss: 1.711 | train acc: 0.993


100%|██████████| 69/69 [00:04<00:00, 13.85it/s]


28 epochs | train loss: 1.476 | train acc: 0.994


100%|██████████| 69/69 [00:05<00:00, 13.47it/s]


29 epochs | train loss: 1.424 | train acc: 0.994


100%|██████████| 69/69 [00:04<00:00, 13.95it/s]


30 epochs | train loss: 1.709 | train acc: 0.993


100%|██████████| 69/69 [00:05<00:00, 13.52it/s]


31 epochs | train loss: 1.213 | train acc: 0.995


100%|██████████| 69/69 [00:04<00:00, 14.16it/s]


32 epochs | train loss: 2.983 | train acc: 0.988


100%|██████████| 69/69 [00:04<00:00, 14.41it/s]


33 epochs | train loss: 2.463 | train acc: 0.991


100%|██████████| 69/69 [00:04<00:00, 14.13it/s]


34 epochs | train loss: 2.896 | train acc: 0.986


100%|██████████| 69/69 [00:05<00:00, 13.52it/s]


35 epochs | train loss: 6.245 | train acc: 0.977


100%|██████████| 69/69 [00:05<00:00, 13.52it/s]


36 epochs | train loss: 7.827 | train acc: 0.975


100%|██████████| 69/69 [00:05<00:00, 13.52it/s]

Early stopping
best loss 0.5815675258636475
best acc 0.9977282881736755





In [None]:
# Train several epochs with batch size one
NUM_EPOCHS = 10
BATCH_SIZE = 1
PATIENCE = 2
WARMUP = 0

epochs = np.arange(1, NUM_EPOCHS + 1, 1)
loss_lst = np.zeros([NUM_EPOCHS])
gen_loss_lst = np.zeros([NUM_EPOCHS])
disc_loss_lst = np.zeros([NUM_EPOCHS])
acc_lst = np.zeros([NUM_EPOCHS])

SC_Wrapper = SoundClassificationWrapper()
SC_Wrapper.load(f'{MODEL_DIR}model.ckpt')

cnt = 0
best_loss = np.inf
best_acc = 0
for i in range(NUM_EPOCHS):
    lbl = []
    pred = []
    for x,y in tqdm(dev_ds):
        result = SC_Wrapper.finetune(x, y)
        lbl.append(y)
        pred.append(result['logits'])
        loss_lst[i] += result['loss']

    acc_lst[i] = categoricalAcc(tf.concat(lbl,axis=0), tf.concat(pred,axis=0))

    if i > WARMUP:
        if loss_lst[i] <= best_loss:
            best_loss = loss_lst[i]
            best_acc = acc_lst[i]
            cnt = 0
            SC_Wrapper.save(f'{MODEL_DIR}model_finetune.ckpt')
            print("save")
        else:
            cnt += 1
            if cnt > PATIENCE:
                print('Early stopping')
                print(f'best loss {best_loss}')
                print(f'best acc {best_acc}')
                break

    print(f"{i+1} epochs | train loss: {loss_lst[i]:.3f} | train acc: {acc_lst[i]:.3f}")


  0%|          | 0/548 [00:00<?, ?it/s][A
  0%|          | 1/548 [00:02<20:13,  2.22s/it][A
  3%|▎         | 19/548 [00:02<00:46, 11.31it/s][A
  7%|▋         | 38/548 [00:02<00:20, 25.43it/s][A
 10%|█         | 57/548 [00:02<00:11, 42.07it/s][A
 14%|█▍        | 76/548 [00:02<00:07, 60.45it/s][A
 17%|█▋        | 93/548 [00:02<00:05, 77.09it/s][A
 20%|██        | 110/548 [00:02<00:04, 93.28it/s][A
 23%|██▎       | 128/548 [00:02<00:03, 110.83it/s][A
 27%|██▋       | 146/548 [00:03<00:03, 124.16it/s][A
 30%|███       | 165/548 [00:03<00:02, 138.82it/s][A
 33%|███▎      | 183/548 [00:03<00:02, 147.89it/s][A
 37%|███▋      | 202/548 [00:03<00:02, 157.96it/s][A
 40%|████      | 221/548 [00:03<00:01, 164.55it/s][A
 44%|████▎     | 239/548 [00:03<00:01, 163.41it/s][A
 47%|████▋     | 257/548 [00:03<00:01, 155.08it/s][A
 50%|█████     | 274/548 [00:03<00:01, 147.77it/s][A
 53%|█████▎    | 290/548 [00:03<00:01, 146.49it/s][A
 56%|█████▌    | 306/548 [00:04<00:01, 144.83it/s][

1 epochs | train loss: 407.134 | train acc: 0.843



  0%|          | 0/548 [00:00<?, ?it/s][A
  2%|▏         | 11/548 [00:00<00:05, 104.46it/s][A
  4%|▍         | 22/548 [00:00<00:05, 104.10it/s][A
  7%|▋         | 37/548 [00:00<00:04, 123.16it/s][A
  9%|▉         | 50/548 [00:00<00:03, 124.50it/s][A
 11%|█▏        | 63/548 [00:00<00:03, 126.33it/s][A
 15%|█▍        | 81/548 [00:00<00:03, 143.37it/s][A
 18%|█▊        | 99/548 [00:00<00:02, 154.43it/s][A
 22%|██▏       | 118/548 [00:00<00:02, 163.20it/s][A
 25%|██▍       | 136/548 [00:00<00:02, 166.52it/s][A
 28%|██▊       | 155/548 [00:01<00:02, 172.24it/s][A
 32%|███▏      | 173/548 [00:01<00:02, 169.08it/s][A
 35%|███▌      | 192/548 [00:01<00:02, 174.27it/s][A
 39%|███▊      | 211/548 [00:01<00:01, 177.15it/s][A
 42%|████▏     | 229/548 [00:01<00:01, 175.52it/s][A
 45%|████▌     | 247/548 [00:01<00:01, 176.20it/s][A
 48%|████▊     | 265/548 [00:01<00:01, 175.74it/s][A
 52%|█████▏    | 284/548 [00:01<00:01, 178.27it/s][A
 55%|█████▌    | 302/548 [00:01<00:01, 178.24

save
2 epochs | train loss: 63.025 | train acc: 0.969



  0%|          | 0/548 [00:00<?, ?it/s][A
  2%|▏         | 11/548 [00:00<00:05, 106.59it/s][A
  5%|▌         | 28/548 [00:00<00:03, 140.61it/s][A
  9%|▊         | 47/548 [00:00<00:03, 159.42it/s][A
 12%|█▏        | 65/548 [00:00<00:02, 164.09it/s][A
 15%|█▍        | 82/548 [00:00<00:02, 166.04it/s][A
 18%|█▊        | 100/548 [00:00<00:02, 168.08it/s][A
 22%|██▏       | 119/548 [00:00<00:02, 174.82it/s][A
 25%|██▌       | 139/548 [00:00<00:02, 179.89it/s][A
 29%|██▊       | 157/548 [00:00<00:02, 176.42it/s][A
 32%|███▏      | 175/548 [00:01<00:02, 177.47it/s][A
 35%|███▌      | 194/548 [00:01<00:01, 180.07it/s][A
 39%|███▉      | 213/548 [00:01<00:01, 175.58it/s][A
 42%|████▏     | 232/548 [00:01<00:01, 178.37it/s][A
 46%|████▌     | 251/548 [00:01<00:01, 180.23it/s][A
 49%|████▉     | 270/548 [00:01<00:01, 179.31it/s][A
 53%|█████▎    | 288/548 [00:01<00:01, 173.44it/s][A
 56%|█████▌    | 307/548 [00:01<00:01, 177.53it/s][A
 59%|█████▉    | 325/548 [00:01<00:01, 178.

save
3 epochs | train loss: 16.561 | train acc: 0.989



  0%|          | 0/548 [00:00<?, ?it/s][A
  3%|▎         | 16/548 [00:00<00:03, 158.15it/s][A
  6%|▌         | 33/548 [00:00<00:03, 160.66it/s][A
  9%|▉         | 51/548 [00:00<00:02, 167.46it/s][A
 13%|█▎        | 69/548 [00:00<00:02, 170.52it/s][A
 16%|█▌        | 88/548 [00:00<00:02, 176.25it/s][A
 19%|█▉        | 106/548 [00:00<00:02, 171.86it/s][A
 23%|██▎       | 125/548 [00:00<00:02, 175.23it/s][A
 26%|██▋       | 144/548 [00:00<00:02, 178.70it/s][A
 30%|██▉       | 163/548 [00:00<00:02, 179.85it/s][A
 33%|███▎      | 182/548 [00:01<00:02, 181.26it/s][A
 37%|███▋      | 201/548 [00:01<00:01, 182.60it/s][A
 40%|████      | 220/548 [00:01<00:01, 180.55it/s][A
 44%|████▎     | 239/548 [00:01<00:01, 177.05it/s][A
 47%|████▋     | 258/548 [00:01<00:01, 179.79it/s][A
 51%|█████     | 277/548 [00:01<00:01, 175.35it/s][A
 54%|█████▍    | 295/548 [00:01<00:01, 173.47it/s][A
 57%|█████▋    | 313/548 [00:01<00:01, 172.98it/s][A
 60%|██████    | 331/548 [00:01<00:01, 174.

save
4 epochs | train loss: 1.678 | train acc: 1.000



  0%|          | 0/548 [00:00<?, ?it/s][A
  2%|▏         | 12/548 [00:00<00:04, 119.54it/s][A
  5%|▌         | 30/548 [00:00<00:03, 154.76it/s][A
  9%|▉         | 49/548 [00:00<00:02, 168.72it/s][A
 12%|█▏        | 67/548 [00:00<00:02, 171.20it/s][A
 16%|█▌        | 85/548 [00:00<00:02, 158.38it/s][A
 18%|█▊        | 101/548 [00:00<00:02, 152.73it/s][A
 21%|██▏       | 117/548 [00:00<00:02, 149.23it/s][A
 24%|██▍       | 132/548 [00:00<00:02, 149.36it/s][A
 27%|██▋       | 148/548 [00:00<00:02, 150.16it/s][A
 30%|██▉       | 164/548 [00:01<00:02, 149.95it/s][A
 33%|███▎      | 180/548 [00:01<00:02, 147.02it/s][A
 36%|███▌      | 195/548 [00:01<00:02, 145.99it/s][A
 38%|███▊      | 210/548 [00:01<00:02, 144.89it/s][A
 41%|████      | 225/548 [00:01<00:02, 143.67it/s][A
 44%|████▍     | 240/548 [00:01<00:02, 139.10it/s][A
 47%|████▋     | 255/548 [00:01<00:02, 139.42it/s][A
 49%|████▉     | 270/548 [00:01<00:01, 140.80it/s][A
 52%|█████▏    | 285/548 [00:01<00:01, 135.

save
5 epochs | train loss: 0.808 | train acc: 1.000



  0%|          | 0/548 [00:00<?, ?it/s][A
  2%|▏         | 13/548 [00:00<00:04, 128.45it/s][A
  6%|▌         | 31/548 [00:00<00:03, 151.07it/s][A
  9%|▉         | 49/548 [00:00<00:03, 161.34it/s][A
 12%|█▏        | 67/548 [00:00<00:02, 165.84it/s][A
 15%|█▌        | 84/548 [00:00<00:02, 166.15it/s][A
 19%|█▊        | 102/548 [00:00<00:02, 168.01it/s][A
 22%|██▏       | 120/548 [00:00<00:02, 170.53it/s][A
 25%|██▌       | 138/548 [00:00<00:02, 171.97it/s][A
 29%|██▊       | 157/548 [00:00<00:02, 175.88it/s][A
 32%|███▏      | 175/548 [00:01<00:02, 176.21it/s][A
 35%|███▌      | 193/548 [00:01<00:02, 175.12it/s][A
 39%|███▊      | 212/548 [00:01<00:01, 177.34it/s][A
 42%|████▏     | 230/548 [00:01<00:01, 176.95it/s][A
 45%|████▌     | 249/548 [00:01<00:01, 179.79it/s][A
 49%|████▊     | 267/548 [00:01<00:01, 177.73it/s][A
 52%|█████▏    | 286/548 [00:01<00:01, 180.80it/s][A
 56%|█████▌    | 305/548 [00:01<00:01, 161.83it/s][A
 59%|█████▉    | 322/548 [00:01<00:01, 157.

save
6 epochs | train loss: 0.456 | train acc: 1.000



  0%|          | 0/548 [00:00<?, ?it/s][A
  2%|▏         | 10/548 [00:00<00:05, 98.37it/s][A
  5%|▍         | 25/548 [00:00<00:04, 127.76it/s][A
  8%|▊         | 44/548 [00:00<00:03, 152.37it/s][A
 11%|█▏        | 62/548 [00:00<00:02, 162.72it/s][A
 15%|█▍        | 81/548 [00:00<00:02, 170.56it/s][A
 18%|█▊        | 100/548 [00:00<00:02, 175.90it/s][A
 22%|██▏       | 119/548 [00:00<00:02, 173.38it/s][A
 25%|██▌       | 138/548 [00:00<00:02, 178.01it/s][A
 29%|██▊       | 157/548 [00:00<00:02, 179.32it/s][A
 32%|███▏      | 175/548 [00:01<00:02, 174.32it/s][A
 35%|███▌      | 194/548 [00:01<00:02, 176.31it/s][A
 39%|███▉      | 213/548 [00:01<00:01, 179.00it/s][A
 42%|████▏     | 232/548 [00:01<00:01, 180.18it/s][A
 46%|████▌     | 251/548 [00:01<00:01, 180.56it/s][A
 49%|████▉     | 270/548 [00:01<00:01, 181.26it/s][A
 53%|█████▎    | 289/548 [00:01<00:01, 183.70it/s][A
 56%|█████▌    | 308/548 [00:01<00:01, 181.30it/s][A
 60%|█████▉    | 327/548 [00:01<00:01, 182.0

save
7 epochs | train loss: 0.331 | train acc: 1.000



  0%|          | 0/548 [00:00<?, ?it/s][A
  3%|▎         | 16/548 [00:00<00:03, 156.71it/s][A
  6%|▌         | 33/548 [00:00<00:03, 162.87it/s][A
  9%|▉         | 52/548 [00:00<00:02, 173.06it/s][A
 13%|█▎        | 71/548 [00:00<00:02, 178.22it/s][A
 17%|█▋        | 91/548 [00:00<00:02, 183.28it/s][A
 20%|██        | 110/548 [00:00<00:02, 180.15it/s][A
 24%|██▎       | 129/548 [00:00<00:02, 181.19it/s][A
 27%|██▋       | 148/548 [00:00<00:02, 179.42it/s][A
 30%|███       | 166/548 [00:00<00:02, 175.77it/s][A
 34%|███▎      | 184/548 [00:01<00:02, 174.55it/s][A
 37%|███▋      | 202/548 [00:01<00:01, 176.07it/s][A
 40%|████      | 221/548 [00:01<00:01, 179.68it/s][A
 44%|████▍     | 240/548 [00:01<00:01, 181.33it/s][A
 47%|████▋     | 259/548 [00:01<00:01, 182.67it/s][A
 51%|█████     | 278/548 [00:01<00:01, 183.71it/s][A
 54%|█████▍    | 297/548 [00:01<00:01, 184.97it/s][A
 58%|█████▊    | 316/548 [00:01<00:01, 180.68it/s][A
 61%|██████    | 335/548 [00:01<00:01, 182.

save
8 epochs | train loss: 0.263 | train acc: 1.000



  0%|          | 0/548 [00:00<?, ?it/s][A
  3%|▎         | 15/548 [00:00<00:03, 143.44it/s][A
  6%|▌         | 34/548 [00:00<00:03, 168.31it/s][A
 10%|▉         | 53/548 [00:00<00:02, 176.18it/s][A
 13%|█▎        | 71/548 [00:00<00:02, 176.28it/s][A
 16%|█▋        | 90/548 [00:00<00:02, 180.49it/s][A
 20%|█▉        | 109/548 [00:00<00:02, 178.23it/s][A
 23%|██▎       | 128/548 [00:00<00:02, 179.88it/s][A
 27%|██▋       | 147/548 [00:00<00:02, 166.33it/s][A
 30%|███       | 165/548 [00:00<00:02, 168.88it/s][A
 34%|███▎      | 184/548 [00:01<00:02, 173.29it/s][A
 37%|███▋      | 204/548 [00:01<00:01, 178.67it/s][A
 41%|████      | 223/548 [00:01<00:01, 180.51it/s][A
 44%|████▍     | 242/548 [00:01<00:01, 181.85it/s][A
 48%|████▊     | 261/548 [00:01<00:01, 179.89it/s][A
 51%|█████     | 280/548 [00:01<00:01, 179.68it/s][A
 55%|█████▍    | 299/548 [00:01<00:01, 182.02it/s][A
 58%|█████▊    | 318/548 [00:01<00:01, 180.79it/s][A
 61%|██████▏   | 337/548 [00:01<00:01, 175.

save
9 epochs | train loss: 0.215 | train acc: 1.000



  0%|          | 0/548 [00:00<?, ?it/s][A
  3%|▎         | 15/548 [00:00<00:03, 144.72it/s][A
  6%|▌         | 34/548 [00:00<00:03, 168.60it/s][A
 10%|▉         | 54/548 [00:00<00:02, 179.33it/s][A
 13%|█▎        | 72/548 [00:00<00:02, 178.20it/s][A
 17%|█▋        | 91/548 [00:00<00:02, 179.43it/s][A
 20%|█▉        | 109/548 [00:00<00:02, 179.22it/s][A
 23%|██▎       | 127/548 [00:00<00:02, 173.77it/s][A
 27%|██▋       | 146/548 [00:00<00:02, 176.60it/s][A
 30%|██▉       | 164/548 [00:00<00:02, 169.77it/s][A
 33%|███▎      | 182/548 [00:01<00:02, 171.39it/s][A
 36%|███▋      | 200/548 [00:01<00:02, 172.18it/s][A
 40%|███▉      | 219/548 [00:01<00:01, 175.46it/s][A
 43%|████▎     | 238/548 [00:01<00:01, 177.42it/s][A
 47%|████▋     | 256/548 [00:01<00:01, 177.84it/s][A
 50%|█████     | 274/548 [00:01<00:01, 178.18it/s][A
 53%|█████▎    | 293/548 [00:01<00:01, 178.49it/s][A
 57%|█████▋    | 311/548 [00:01<00:01, 173.32it/s][A
 60%|██████    | 329/548 [00:01<00:01, 174.

save
10 epochs | train loss: 0.176 | train acc: 1.000


In [None]:
SC_Wrapper = SoundClassificationWrapper()
SC_Wrapper.load(f'{MODEL_DIR}model_finetune.ckpt')
preds = []
lbls = []
for x,y in test_ds:
    result = SC_Wrapper.inference(x)
    preds.append(result['logits'])
    lbls.append(y)
preds = tf.concat(preds,axis=0)
lbls = tf.concat(lbls,axis=0)
print(f"  test acc: {categoricalAcc(lbls,preds):.3f}")



  test acc: 0.923


### Save TF-lite model

In [None]:
SC_Wrapper = SoundClassificationWrapper()
SC_Wrapper.load(f'{MODEL_DIR}model_finetune.ckpt')
SAVED_MODEL_DIR = f'{MODEL_DIR}model_wrapper'

tf.saved_model.save(
    SC_Wrapper,
    SAVED_MODEL_DIR,
    signatures={
        'train':
            SC_Wrapper.train.get_concrete_function(),
        'finetune':
            SC_Wrapper.finetune.get_concrete_function(),
        'inference':
            SC_Wrapper.inference.get_concrete_function(),
        'save':
            SC_Wrapper.save.get_concrete_function(),
        'load':
            SC_Wrapper.load.get_concrete_function(),
    })


# Convert the model
converter = tf.lite.TFLiteConverter.from_saved_model(SAVED_MODEL_DIR)
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,  # enable TensorFlow Lite ops.
    tf.lite.OpsSet.SELECT_TF_OPS  # enable TensorFlow ops.
]
converter.experimental_enable_resource_variables = True
tflite_model = converter.convert()

model_file_path = f'{MODEL_DIR}sc_model.tflite'
with open(model_file_path, 'wb') as model_file:
    model_file.write(tflite_model)



In [None]:
interpreter = tf.lite.Interpreter(model_path=f'{MODEL_DIR}sc_model.tflite')
interpreter.allocate_tensors()

infer = interpreter.get_signature_runner("inference")

In [None]:
import scipy
w = get_waveform(f'{DATA_PATH}maml-data-one-sec/maml_data_one_group/6_354180_2079872-hq_chunk1_chunk0_000.wav')
resample_w = tf.expand_dims(scipy.signal.resample(w, 16000),axis=0)
w = tf.expand_dims(w,axis=0)

# print(w.shape)
# print(resample_w.shape)

class_original = SC_Wrapper.inference(x=w)['class']
class_lite = infer(x=w)['class']
print("Check if these two are the same:")
print(class_original)
print(class_lite)

Check if these two are the same:
tf.Tensor([1], shape=(1,), dtype=int64)
[1]


## Experiments

+ The whole model v.s. prediction head only (prediction head only)
+ Batch size of testing data (small but same distribution is okay)
+ Data imbalance problem (it looks okay (maybe no need of cGAN))

### Finetuning

In [None]:
train_ds = tf.data.Dataset.from_tensor_slices((tf.cast(train_data.data,tf.float32),tf.keras.utils.to_categorical(train_data.labels)))
dev_ds = tf.data.Dataset.from_tensor_slices((tf.cast(valid_data.data,tf.float32),tf.keras.utils.to_categorical(valid_data.labels)))
dev1_ds = dev_ds.shuffle(SHUFFLE_BUFFER_SIZE,reshuffle_each_iteration=True).batch(1)
dev64_ds = dev_ds.shuffle(SHUFFLE_BUFFER_SIZE,reshuffle_each_iteration=True).batch(64)
train1_ds = dev_ds.shuffle(SHUFFLE_BUFFER_SIZE,reshuffle_each_iteration=True).batch(1)
train64_ds = dev_ds.shuffle(SHUFFLE_BUFFER_SIZE,reshuffle_each_iteration=True).batch(64)
test_ds = tf.data.Dataset.from_tensor_slices((tf.cast(test_data.data,tf.float32),tf.keras.utils.to_categorical(test_data.labels)))
test_ds = test_ds.batch(1)


In [None]:
# data_sample = [d for i,d in enumerate(test_data.data) if i%40==0]
# lbl_sample = [l for i,l in enumerate(test_data.labels) if i%40==0]
data_sample = test_data.data[-14:]
lbl_sample = test_data.labels[-14:]
print(lbl_sample)
sample_ds = tf.data.Dataset.from_tensor_slices((tf.cast(data_sample,tf.float32),tf.keras.utils.to_categorical(lbl_sample,num_classes=10)))
sample_ds = sample_ds.shuffle(SHUFFLE_BUFFER_SIZE,reshuffle_each_iteration=True).batch(1)

[9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9]


In [None]:
NUM_EPOCHS = 5
BATCH_SIZE = 1
PATIENCE = 1
WARMUP = 0

epochs = np.arange(1, NUM_EPOCHS + 1, 1)
loss_lst = np.zeros([NUM_EPOCHS])
gen_loss_lst = np.zeros([NUM_EPOCHS])
disc_loss_lst = np.zeros([NUM_EPOCHS])
acc_lst = np.zeros([NUM_EPOCHS])

cnt = 0
best_loss = np.inf
best_acc = 0

SC_Wrapper = SoundClassificationWrapper()
SC_Wrapper.load(f'{MODEL_DIR}model_finetune.ckpt')

for i in range(NUM_EPOCHS):
    lbl = []
    pred = []
    for x,y in tqdm(sample_ds):
        # result = SC_Wrapper.train(x, y)
        result = SC_Wrapper.finetune(x, y)
        lbl.append(y)
        pred.append(result['logits'])
        print(result['loss'])

    loss_lst[i] = result['loss']
    acc_lst[i] = categoricalAcc(tf.concat(lbl,axis=0), tf.concat(pred,axis=0))

    if i > WARMUP:
        if loss_lst[i] <= best_loss:
            best_loss = loss_lst[i]
            best_acc = acc_lst[i]
            cnt = 0
            # SC_Wrapper.save(f'{MODEL_DIR}model_finetune.ckpt')
            print("save")
        else:
            cnt += 1
            if cnt > PATIENCE:
                print('Early stopping')
                break

    print(f"{i+1} epochs | finetune loss: {loss_lst[i]:.3f} | finetune acc: {acc_lst[i]:.3f}")
print(f'best loss {best_loss}')
print(f'best acc {best_acc}')

100%|██████████| 14/14 [00:04<00:00,  3.18it/s]


tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(19.409819, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(1.0251946e-05, shape=(), dtype=float32)
tf.Tensor(42.59387, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
1 epochs | finetune loss: 0.000 | finetune acc: 0.857


  0%|          | 0/14 [00:00<?, ?it/s]

tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.03522619, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)


100%|██████████| 14/14 [00:00<00:00, 184.45it/s]


tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
save
2 epochs | finetune loss: 0.000 | finetune acc: 1.000


100%|██████████| 14/14 [00:00<00:00, 187.27it/s]


tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(1.7881378e-06, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
save
3 epochs | finetune loss: 0.000 | finetune acc: 1.000


  0%|          | 0/14 [00:00<?, ?it/s]

tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)


100%|██████████| 14/14 [00:00<00:00, 169.50it/s]


tf.Tensor(0.0, shape=(), dtype=float32)
save
4 epochs | finetune loss: 0.000 | finetune acc: 1.000


  0%|          | 0/14 [00:00<?, ?it/s]

tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)


100%|██████████| 14/14 [00:00<00:00, 162.69it/s]


tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
save
5 epochs | finetune loss: 0.000 | finetune acc: 1.000
best loss 0.0
best acc 1.0


In [None]:
SC_Wrapper = SoundClassificationWrapper()
SC_Wrapper.load(f'{MODEL_DIR}model_finetune.ckpt')
preds = []
lbls = []
for x,y in test_ds:
    result = SC_Wrapper.inference(x)
    preds.append(result['logits'])
    lbls.append(y)
preds = tf.concat(preds,axis=0)
lbls = tf.concat(lbls,axis=0)
print(f"  test acc: {categoricalAcc(lbls,preds):.3f}")



  test acc: 0.824


In [None]:
SC_Wrapper.load(f'{MODEL_DIR}model_finetune.ckpt')
preds = []
lbls = []
for x,y in dev64_ds:
    result = SC_Wrapper.inference(x)
    preds.append(result['logits'])
    lbls.append(y)
preds = tf.concat(preds,axis=0)
lbls = tf.concat(lbls,axis=0)
print(f"  test acc: {categoricalAcc(lbls,preds):.3f}")

  test acc: 0.976


In [21]:
sample_file = open(f"{DATA_PATH}phone_samples/knock.txt",'r')
array_string = sample_file.read()
array = eval(w_string)
knock_waveform = tf.convert_to_tensor(array, dtype=tf.float32)

In [None]:
sample_file = open(f"{DATA_PATH}phone_samples/knock.bin",'rb')
array = np.fromfile(sample_file, dtype=np.float32)
knock_waveform = tf.convert_to_tensor(array, dtype=tf.float32)

In [30]:
w = tf.expand_dims(knock_waveform,axis=0)
l = tf.keras.utils.to_categorical([7.],num_classes=10)
print(w.shape)
print(array)

(1, 44100)
[0.0021362305, 0.0021362305, 0.0024108887, 0.0019226074, 0.002105713, 0.0021362305, 0.0022583008, 0.0016479492, 0.002166748, 0.002319336, 0.0014648438, 0.0020446777, 0.0018310547, 0.0016784668, 0.0016479492, 0.0018005371, 0.0010681152, 0.0010986328, 0.0014343262, 0.0015869141, 0.001739502, 0.0009460449, 0.0012207031, 0.0009460449, 0.0008239746, 0.0014953613, 0.0013122559, 0.0011291504, 0.00048828125, 0.0007324219, 0.0007324219, 0.000579834, 0.0002746582, 0.0009460449, 0.0013427734, 0.0009765625, 0.0010070801, 0.0004272461, 0.00076293945, 0.00061035156, 0.00064086914, 0.00030517578, 0.00064086914, 0.00064086914, 0.00018310547, 0.00012207031, 0.00048828125, 0.0012512207, 0.00024414062, 0.00039672852, 0.0010375977, 0.0008239746, 0.00061035156, 0.0007019043, 0.00079345703, 0.00079345703, 0.0006713867, 0.00088500977, 0.0006713867, 0.00039672852, 0.00024414062, 0.00045776367, 0.00021362305, 0.00076293945, 0.0010375977, 0.0010986328, 0.0009765625, 0.0011291504, 0.001373291, 0.00125

In [31]:
SC_Wrapper = SoundClassificationWrapper()
SC_Wrapper.load(f'{MODEL_DIR}model_finetune.ckpt')

SC_Wrapper.finetune(w,l)




{'loss': <tf.Tensor: shape=(), dtype=float32, numpy=6.7949063e-06>,
 'logits': <tf.Tensor: shape=(1, 10), dtype=float32, numpy=
 array([[-10.466609 , -24.02761  ,  -9.934458 , -23.613977 , -19.20146  ,
          -9.88102  , -15.613243 ,   2.9464667, -14.0475235, -23.545776 ]],
       dtype=float32)>}