In [5]:
# !pip install datasets
# !pip install librosa
# !pip install pydub

In [6]:
import IPython.display as pds
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import warnings
import random
import zipfile
import librosa
import os
import re
import json
import subprocess
import tensorflow as tf
import tensorflow_hub as hub
warnings.filterwarnings("ignore")

from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from scipy import signal

In [7]:
GIT_DIR = "IOH-Chat-App/"
GIT_URL = "https://github.com/Bangkit-Capstone-Team/IOH-Chat-App.git"

if not os.path.exists(GIT_DIR):
  subprocess.call(["git", "clone", GIT_URL])

In [8]:
SPEECH_DATA_DIR = "/content/IOH-Chat-App/Machine Learning/datasets/speech/audio.zip"

zf = zipfile.ZipFile(SPEECH_DATA_DIR)
zf.extractall("/tmp")
zf.close()

In [9]:
DATASET_PATH = "/tmp/audio/"

In [10]:
def decode_audio(audio_path):
  # audio, _ = wav.read(audio_path)
  audio, _ = librosa.load(audio_path)
  return audio

def get_label(file_path):
  string_split = os.path.split(file_path)
  result = re.findall(r"^\w*", string_split[-1])[0]
  return result

def audio_signal_and_label(file_path):
  label = get_label(file_path)
  feature = decode_audio(file_path)
  return feature, label

def prepare_dataset(dataset_path):

  json_data = {
      "labels": list(),
      "features": list(),
      "labels_id": list(),
  }

  filenames = os.listdir(dataset_path)
  audio_path = sorted([os.path.join(dataset_path, filename) for filename in filenames])

  for i, path in enumerate(audio_path):
    if os.path.getsize(path) != 0:
      features, labels = audio_signal_and_label(path)

      json_data["labels"].append(labels)
      json_data["features"].append(features)
      json_data["labels_id"].append(i)
      print(f"{labels}: {i}")

      i =+ 1

  return json_data

In [11]:
dataset = prepare_dataset(DATASET_PATH)

1: 0
10: 1
100: 2
100000: 3
15: 4
16: 5
17: 6
18: 7
1910: 8
1974: 9
1989: 10
1990: 11
1994: 12
2: 13
200: 14
2000: 15
2013: 16
22: 17
250: 18
3: 19
30: 20
300: 21
300000: 22
4: 23
40: 24
5: 25
5000: 26
50000: 27
6: 28
60: 29
630: 30
7: 31
8: 32
80: 33
800: 34
810: 35
82: 36
abad: 37
abaikan: 38
abu: 39
ac: 40
acara: 41
acuh: 42
ada: 43
adakah: 44
adalah: 45
adanya: 46
adat: 47
adikku: 48
adikmu: 49
adiknya: 50
adil: 51
afrika: 52
agak: 53
agar: 54
agresif: 55
ahlinya: 56
aids: 57
air: 58
airnya: 59
aja: 60
ajar: 61
akal: 62
akan: 63
akankah: 64
akhir: 65
akhir: 66
akhirnya: 67
akibat: 68
akibatnya: 69
akordion: 70
akrab: 71
aksara: 72
akta: 73
aktor: 74
aktris: 75
aku: 76
akui: 77
akulah: 78
akuntan: 79
akurasi: 80
akurat: 81
alam: 82
alamat: 83
alamatmu: 84
alamatnya: 85
alami: 86
alasan: 87
alasannya: 88
album: 89
alergi: 90
alice: 91
alih: 92
alkitab: 93
aluminium: 94
alunan: 95
aman: 96
amatir: 97
ambil: 98
ambilkan: 99
ambulans: 100
amerika: 101
amplopnya: 102
anak: 103
anak: 104


In [12]:
df = pd.DataFrame(dataset)
df.tail()

Unnamed: 0,labels,features,labels_id
4083,youtube,"[-7.479727e-09, -5.0465445e-09, 2.1381076e-08,...",4084
4084,yunani,"[-7.479727e-09, -5.0465445e-09, 2.1381076e-08,...",4085
4085,zamenhof,"[-3.8278063e-06, -2.630517e-05, 5.958403e-07, ...",4086
4086,zoologi,"[-3.8521825e-06, -2.6262665e-05, 5.318503e-07,...",4087
4087,ésuk,"[-7.479727e-09, -5.0465445e-09, 2.1381076e-08,...",4088


In [13]:
def get_spectrogram(features, sr):
  spectrogram_list = list()

  for feature in features:
    frequencies, times, spectrogram = signal.spectrogram(features[0], sr)
    spectrogram = spectrogram[:, :, np.newaxis]
    spectrogram_list.append(spectrogram)

  return np.array(spectrogram_list)

In [14]:
SAMPLING_RATE = 22050

features = df.features.values
labels = df.labels.values

spectrogram = get_spectrogram(features, SAMPLING_RATE)

random_idx = random.randint(0, len(features))
pds.Audio(features[random_idx], rate=SAMPLING_RATE)

print(spectrogram[0].shape)

(129, 106, 1)


In [15]:
random_idx = random.randint(0, len(features))
pds.Audio(features[random_idx], rate=SAMPLING_RATE)

train_ds = spectrogram, labels

In [16]:
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_layer = hub.KerasLayer(yamnet_model_handle,
                                trainable=False, name='yamnet')

def getModel(n_class, input_shape):
  model = tf.keras.models.Sequential()

  model.add(yamnet_layer)
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(128, activation='relu'))
  model.add(tf.keras.layers.Dropout(0.5))

  model.add(layers.Dense(n_class, activation='softmax'))

  return model

In [17]:
LR = 1e-4
OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=LR)
LOSS = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),

input_shape = train_ds[0][0].shape
num_classes = len(labels)

model = getModel(num_classes, input_shape)

model.compile(optimizer=OPTIMIZER,
              loss=LOSS,
              metrics=["accuracy"])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 127, 104, 64)      640       
                                                                 
 batch_normalization (BatchN  (None, 127, 104, 64)     256       
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 64, 52, 64)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 62, 50, 32)        18464     
                                                                 
 batch_normalization_1 (Batc  (None, 62, 50, 32)       128       
 hNormalization)                                                 
                                                        

In [18]:
EPOCHS = 15

model.fit(spectrogram,
          labels,
          epochs=EPOCHS, 
          batch_size=128)

Epoch 1/15


UnimplementedError: ignored