<a href="https://colab.research.google.com/github/PUTG/AntiSmokingAssistant/blob/main/lighter_sound_classifier%EC%9D%98_%EC%82%AC%EB%B3%B8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Copyright 2021 The TensorFlow Hub Authors.
Licensed under the Apache License, Version 2.0 (the "License");

In [None]:
from google.colab import drive

# Google Drive 마운트
drive.mount('/content/drive')

In [None]:
file_path = '/content/drive/MyDrive/requirements.txt'

# 파일 읽어오기
with open(file_path, 'r') as file:
    requirements = file.read()

print(requirements)


In [None]:
!pip install -r '/content/drive/MyDrive/requirements.txt'


In [None]:
!pip install tflite-model-maker==0.4.2 --no-dependencies

In [None]:
!pip install tensorflowjs

In [None]:
import tensorflow as tf
import tflite_model_maker as mm
from tflite_model_maker import audio_classifier
import os

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import itertools
import glob
import random

from IPython.display import Audio, Image
from scipy.io import wavfile

print(f"TensorFlow Version: {tf.__version__}")
print(f"Model Maker Version: {mm.__version__}")

In [None]:
import pathlib

DATASET_PATH =  '/content/drive/MyDrive/lighter'  # 라이터 데이터셋 폴더 경로
data_dir = pathlib.Path(DATASET_PATH)

if not data_dir.exists():
  lighter_dataset_folder = tf.keras.utils.get_file('lighter.zip',
                                                'https://drive.google.com/drive/folders/1debtw0CY8HdnT1HZsMDa7OhWJDQVIHNr?usp=sharing',
                                                cache_dir='./',
                                                cache_subdir='dataset',
                                                extract=True)

train_ds, val_ds = tf.keras.utils.audio_dataset_from_directory(
    directory=data_dir,
    batch_size=1,
    validation_split=0.3,
    seed=0,
    output_sequence_length=16000,
    subset='both')

class_names = os.listdir(DATASET_PATH)
print("클래스 목록:", class_names)


In [None]:
import glob
import os

# 데이터셋 폴더 경로
DATASET_PATH = '/content/drive/MyDrive/lighter'

# 훈련 데이터셋 경로
train_lighter_path = os.path.join(DATASET_PATH, 'train/turbo_train/*.wav')
train_background_path = os.path.join(DATASET_PATH, 'train/background_train/*.wav')

# 테스트 데이터셋 경로
test_lighter_path = os.path.join(DATASET_PATH, 'test/turbo_test/*.wav')
test_background_path = os.path.join(DATASET_PATH, 'test/background_test/*.wav')

# 훈련 데이터셋 파일 목록 출력
print("훈련 데이터셋 디렉토리의 파일 목록:")
train_lighter_files = glob.glob(train_lighter_path)
for file_path in train_lighter_files:
    print(file_path)

# 훈련 배경음 파일 목록 출력
print("\n훈련 배경음 디렉토리의 파일 목록:")
train_background_files = glob.glob(train_background_path)
for file_path in train_background_files:
    print(file_path)

# 테스트 데이터셋 파일 목록 출력
print("\n테스트 데이터셋 디렉토리의 파일 목록:")
test_lighter_files = glob.glob(test_lighter_path)
for file_path in test_lighter_files:
    print(file_path)

# 테스트 배경음 파일 목록 출력
print("\n테스트 배경음 디렉토리의 파일 목록:")
test_background_files = glob.glob(test_background_path)
for file_path in test_background_files:
    print(file_path)


In [None]:
import os
import glob
import random
from scipy.io import wavfile
import matplotlib.pyplot as plt
from IPython.display import Audio, display

lighter_code_to_name = {
    'turbo_lighter': 'lighter',
    'background': 'BackgroundSound'
}

data_dir = '/content/drive/MyDrive/lighter'

test_files = os.path.join(data_dir, 'turbo_lighter/*.wav')

def get_random_audio_file():
    test_list = glob.glob(test_files)
    if not test_list:
        print("No audio files found.")
        return None
    random_audio_path = random.choice(test_list)
    return random_audio_path

def get_random_audio_by_class(data_dir, class_name, split='train'):
    class_path = os.path.join(data_dir, f'{split}/{class_name.lower()}_{split}/*.wav')
    class_files = glob.glob(class_path)

    if not class_files:
        print(f'No audio files found for class: {class_name}, split: {split}')
        return None

    random_audio_path = random.choice(class_files)
    return random_audio_path

def show_lighter_data(audio_path):
    if audio_path is None:
        return

    sample_rate, audio_data = wavfile.read(audio_path)

    lighter_code = audio_path.split('/')[-2]
    print(f'lighter name: {lighter_code_to_name.get(lighter_code, "Unknown")}')
    print(f'lighter code: {lighter_code}')

    plttitle = f'{lighter_code_to_name.get(lighter_code, "Unknown")} ({lighter_code})'
    plt.title(plttitle)
    plt.plot(audio_data)
    display(Audio(audio_data, rate=sample_rate))

print('Functions and data structures created')

# 트레인 데이터셋에서 turbo_lighter 클래스의 랜덤한 오디오 선택 및 표시
random_audio_turbo_lighter_train = get_random_audio_by_class(data_dir, 'turbo_lighter', split='train')
show_lighter_data(random_audio_turbo_lighter_train)

# 테스트 데이터셋에서 background 클래스의 랜덤한 오디오 선택 및 표시
random_audio_background_test = get_random_audio_by_class(data_dir, 'background', split='test')
show_lighter_data(random_audio_background_test)


In [None]:
print('Before function call')
random_audio = get_random_audio_file()
show_lighter_data(random_audio)
print('After function call')



### Playing some audio

To have a better understanding about the data, lets listen to a random audio files from the test split.

Note: later in this notebook you'll run inference on this audio for testing

In [None]:
test_files = os.path.join(DATASET_PATH, 'turbo_lighter/*.wav')


In [None]:
random_audio = get_random_audio_file()
show_lighter_data(random_audio)

In [None]:
spec = audio_classifier.YamNetSpec(
    keep_yamnet_and_custom_heads=True,
    frame_step=3 * audio_classifier.YamNetSpec.EXPECTED_WAVEFORM_LENGTH,
    frame_length=6 * audio_classifier.YamNetSpec.EXPECTED_WAVEFORM_LENGTH)

## Loading the data

Model Maker has the API to load the data from a folder and have it in the expected format for the model spec.

The train and test split are based on the folders. The validation dataset will be created as 20% of the train split.

Note: The `cache=True` is important to make training later faster but it will also require more RAM to hold the data. For the birds dataset that is not a problem since it's only 300MB, but if you use your own data you have to pay attention to it.


In [None]:
# 데이터셋 경로 설정
train_data = audio_classifier.DataLoader.from_folder(
    spec, '/content/drive/MyDrive/lighter/train', cache=True)
train_data, validation_data = train_data.split(0.8)
test_data = audio_classifier.DataLoader.from_folder(
    spec, '/content/drive/MyDrive/lighter/test', cache=True)


## Training the model

the audio_classifier has the [`create`](https://www.tensorflow.org/lite/api_docs/python/tflite_model_maker/audio_classifier/create) method that creates a model and already start training it.

You can customize many parameterss, for more information you can read more details in the documentation.

On this first try you'll use all the default configurations and train for 100 epochs.

Note: The first epoch takes longer than all the other ones because it's when the cache is created. After that each epoch takes close to 1 second.

In [None]:
batch_size = 1
epochs = 100

print('Training the model')
model = audio_classifier.create(
    train_data,
    spec,
    validation_data,
    batch_size=batch_size,
    epochs=epochs)

The accuracy looks good but it's important to run the evaluation step on the test data and vefify your model achieved good results on unseed data.

In [None]:
print('Evaluating the model')
model.evaluate(test_data)

In [None]:
def show_confusion_matrix(confusion, test_labels):
  """Compute confusion matrix and normalize."""
  confusion_normalized = confusion.astype("float") / confusion.sum(axis=1)
  axis_labels = test_labels
  ax = sns.heatmap(
      confusion_normalized, xticklabels=axis_labels, yticklabels=axis_labels,
      cmap='Blues', annot=True, fmt='.2f', square=True)
  plt.title("Confusion matrix")
  plt.ylabel("True label")
  plt.xlabel("Predicted label")

confusion_matrix = model.confusion_matrix(test_data)
show_confusion_matrix(confusion_matrix.numpy(), test_data.index_to_label)

In [None]:
serving_model = model.create_serving_model()

print(f'Model\'s input shape and type: {serving_model.inputs}')
print(f'Model\'s output shape and type: {serving_model.outputs}')

The model created has a fixed input window.

For a given audio file, you'll have to split it in windows of data of the expected size. The last window might need to be filled with zeros.

In [None]:
sample_rate, audio_data = wavfile.read(random_audio, 'rb')

audio_data = np.array(audio_data) / tf.int16.max
input_size = serving_model.input_shape[1]

splitted_audio_data = tf.signal.frame(audio_data, input_size, input_size, pad_end=True, pad_value=0)

print(f'Test audio path: {random_audio}')
print(f'Original size of the audio data: {len(audio_data)}')
print(f'Number of windows for inference: {len(splitted_audio_data)}')

You'll loop over all the splitted audio and apply the model for each one of them.

The model you've just trained has 2 outputs: The original YAMNet's output and the one you've just trained. This is important because the real world environment is more complicated than just bird sounds. You can use the YAMNet's output to filter out non relevant audio, for example, on the birds use case, if YAMNet is not classifying Birds or Animals, this might show that the output from your model might have an irrelevant classification.


Below both outpus are printed to make it easier to understand their relation. Most of the mistakes that your model make are when YAMNet's prediction is not related to your domain (eg: birds).

In [None]:
print(random_audio)

results = []
print('Result of the window ith:  your model class -> score,  (spec class -> score)')
for i, data in enumerate(splitted_audio_data):
  yamnet_output, inference = serving_model(data)
  results.append(inference[0].numpy())
  result_index = tf.argmax(inference[0])
  spec_result_index = tf.argmax(yamnet_output[0])
  t = spec._yamnet_labels()[spec_result_index]
  result_str = f'Result of the window {i}: ' \
  f'\t{test_data.index_to_label[result_index]} -> {inference[0][result_index].numpy():.3f}, ' \
  f'\t({spec._yamnet_labels()[spec_result_index]} -> {yamnet_output[0][spec_result_index]:.3f})'
  print(result_str)


results_np = np.array(results)
mean_results = results_np.mean(axis=0)
result_index = mean_results.argmax()
print(f'Mean result: {test_data.index_to_label[result_index]} -> {mean_results[result_index]}')

## Exporting the model

The last step is exporting your model to be used on embedded devices or on the browser.

The `export` method export both formats for you.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# 내보낼 디렉토리 경로 설정 (예시: Google 드라이브의 "lighters_models" 폴더)
models_path = '/content/drive/MyDrive/lighters_models'


In [None]:

model.export(models_path, tflite_filename='lighter_model.tflite')

You can also export the SavedModel version for serving or using on a Python environment.

## Next Steps

You did it.

Now your new model can be deployed on  mobile devices using [TFLite Audio Tasks API](https://www.tensorflow.org/lite/inference_with_metadata/task_library/overview).

You can also try the same process with your own data with different classes and here is the documentation for [Model Maker for Audio Classification](https://www.tensorflow.org/lite/api_docs/python/tflite_model_maker/audio_classifier).

This notebook is part of the [ODML Learning path: Customizing an Audio Model](https://codelabs.developers.google.com/codelabs/tflite-audio-classification-custom-model-android). If you need more information follow the rest of the codelabs to have a clear understanding on how to use Machine Learning for Audio Classification.

For more information about Model Maker for Audio Classification