In [7]:
from pydub import AudioSegment

def mp3_2_wav(file_path):
    """Convert mp3 to wav.
    Args:
        file_path: mp3 file path.
    Returns:
        wav file path.
    """
    # convert mp3 to wav
    wav_file_path = file_path.replace('.mp3', '.wav')
    sound = AudioSegment.from_mp3(file_path)
    sound.export(wav_file_path, format="wav")
    return wav_file_path

In [14]:
from pydub import AudioSegment

def stereo2mono(file_path):
    # Open the stereo sound
    stereo_sound = AudioSegment.from_wav(file_path)
    
    # Calling the split_to_mono() method on the stereo sound will return a tuple
    mono_audios = stereo_sound.split_to_mono()
    # Export the two mono channels as separate wav files
    mono_left = mono_audios[0].export(file_path.replace('.wav', '_left.wav'), format="wav")
    mono_right = mono_audios[1].export(file_path.replace('.wav', '_right.wav'), format="wav")
    return file_path.replace('.wav', '_right.wav')

In [15]:
# Imports
from tflite_support.task import audio
from tflite_support.task import core
from tflite_support.task import processor
import os

# Initialization
model_path = "./converted_tflite/soundclassifier_with_metadata.tflite"
base_options = core.BaseOptions(file_name=model_path)
classification_options = processor.ClassificationOptions(max_results=2)
options = audio.AudioClassifierOptions(base_options=base_options, classification_options=classification_options)
classifier = audio.AudioClassifier.create_from_options(options)

# Alternatively, you can create an audio classifier in the following manner:
# classifier = audio.AudioClassifier.create_from_file(model_path)

# Run inference
audio_path = "./audio/cafe/cafe-ambience-9263.mp3"
audio_path = mp3_2_wav(audio_path)
mono_audio_path = stereo2mono(audio_path)
audio_file = audio.TensorAudio.create_from_wav_file(mono_audio_path, classifier.required_input_buffer_size)
audio_result = classifier.classify(audio_file)
print(audio_result)

ClassificationResult(classifications=[Classifications(categories=[Category(index=8, score=0.6196929812431335, display_name='', category_name='8 카페'), Category(index=2, score=0.09672852605581284, display_name='', category_name='2 police')], head_index=0, head_name='probability')])
