<a href="https://colab.research.google.com/github/Hildone/SecuCare/blob/main/Voice_Recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Machine Learning Of SecuCare

## Table Of Content

1. Prepare Data
2. Preprocessing Data
3. Training Data
4. Prediction

[Dataset Audio (RAW)](https://drive.google.com/drive/folders/1EBHCdvuKrRydTHwajohGZNZNV10BsdS6)

[Dataset Audio (CSV)](https://github.com/Hildone/SecuCare/tree/main/Machine%20Learning/proper_dataset)

[Model Machine Learning](https://github.com/Hildone/SecuCare/tree/main/Machine%20Learning/saved_model)


### Prepare Data
Kode dibawah adalah kode untuk membuat dataset dengan merekam audio **"Tolong"** dan **Selain** **"Tolong"** dengan format wav

In [None]:
import sounddevice as sd
from scipy.io.wavfile import write


def rekam_wakeword_audio(save_path, n_kali=20):
    input("Untuk merekam audio Tolong tekan Enter : ")
    for i in range (n_kali):
        fs = 44100
        detik = 3
        rekaman = sd.rec(int(detik*fs), samplerate = fs, channels=2)
        sd.wait()
        write(save_path + str(i) + ".wav", fs, rekaman)
        input(f"Tekan Untuk merekam Kembali atau stop dengan ctrl + c ({i+1}/{n_kali})")


def rekam_audio_background(save_path, n_kali=33):
    input("Untuk merekam audio background tekan Enter : ")
    for i in range (n_kali):
        fs = 44100
        detik = 3
        rekaman = sd.rec(int(detik*fs), samplerate = fs, channels=2)
        sd.wait()
        write(save_path + str(120+i) + ".wav", fs, rekaman)
        print(f"Sekarang Sedang Berada Di : {i+1}/{n_kali}")

# Membagi Audio Kedalam Folder Yang Berbeda
print("Merekam Tolong Audio: \n")
rekam_wakeword_audio("dataaudio/")

print("Merekam non-Tolong Audio: \n")
rekam_audio_background("bgaudio/")


### Preprocessing Data
Kode dibawah adalah kode untuk mengelompokkan serta melabeli dataset audio yang sudah dikumpulkan sehingga siap untuk tahap Training

In [None]:
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sample = "bgaudio/1.wav"
data, sample_rate = librosa.load(sample)

plt.title(" Gelombang Suara ")
librosa.display.waveshow(data, sr=sample_rate)
plt.show()

mfccs = librosa.feature.mfcc(y=data,sr = sample_rate, n_mfcc=40)
print("Bentuk MfCC: ", mfccs.shape)

plt.title('MFCC')
librosa.display.specshow(mfccs, sr=sample_rate, x_axis='time')
plt.show()

all_data = []

lokasi_data = {
    0:["bgaudio/" + lokasi_file for lokasi_file in os.listdir("bgaudio/")],
    1:["dataaudio/" + lokasi_file for lokasi_file in os.listdir("dataaudio/")]
}

for label_kelas, list_file in lokasi_data.items():
    for data_tunggal in list_file:
        data, sample_rate = librosa.load(data_tunggal)
        mfccs = librosa.feature.mfcc(y=data,sr = sample_rate, n_mfcc=40)
        mfcc_preprocess = np.mean(mfccs.T, axis=0)
        all_data.append([mfcc_preprocess, label_kelas])

    print(f"Sukses Melabeli Data {label_kelas}")

df = pd.DataFrame(all_data, columns=["fitur", 'label_kelas'])

df.to_pickle('proper_dataset/audio_data.csv')

### Training Data
Kode dibawah ini digunakan untuk tahap training data dengan dataset yang sudah proper

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

df = pd.read_pickle("proper_dataset/audio_data.csv")

x = df['fitur'].values
x = np.concatenate(x, axis=0).reshape(len(x), 40)

y = np.array(df["label_kelas"].tolist())
y = tf.keras.utils.to_categorical(y)

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=42)

model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(512, activation = 'relu', input_shape=x_train[0].shape, kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(512, activation = 'relu',  kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(2, activation = 'sigmoid'),
])

print(model.summary())
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.0001, decay_steps=10000, decay_rate=0.9)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

model.compile(
    optimizer = optimizer, loss='binary_crossentropy', metrics = ['accuracy']
)

history = model.fit(x_train,y_train, epochs=1000)
model.save("saved_model/Model.h5")
print(model.evaluate(x_test, y_test))

y_pred = np.argmax(model.predict(x_test), axis=1)
print(classification_report(np.argmax(y_test, axis=1), y_pred))
print(y_pred)

### Prediction
Kode dibawah ini digunakan untuk memprediksi terkait input suara user, dimana model akan memprediksi apakah input user termasuk Tolong atau Tidak

In [None]:
import sounddevice as sd
from scipy.io.wavfile import write
import librosa
import numpy as np
from keras.models import load_model

####### ALL CONSTANTS #####
fs = 44100
seconds = 2
filename = "prediction.wav"

##### LOADING OUR SAVED MODEL and PREDICTING ###
model = load_model("saved_model/Model.h5")

print("Prediksi Dimulai: ")
i = 0
while True:
    print("Bicara Sekarang : ")
    myrecording = sd.rec(int(seconds * fs), samplerate=fs, channels=2)
    sd.wait()
    write(filename, fs, myrecording)

    audio, sample_rate = librosa.load(filename)
    mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfcc_processed = np.mean(mfcc.T, axis=0)

    prediction = model.predict(np.expand_dims(mfcc_processed, axis=0))
    if prediction[:, 1] > 0.99:
        print(f"Tolong Detected ({i})")
        print("Confidence:", prediction[:, 1])
        i += 1

    else:
        print(f"Tolong Not Detected")
        print("Confidence:", prediction[:, 0])

    input(f"Enter")