# Accent Localizer

Read README.md for problem description.

## Imports

In [1]:
import os
import random

import pandas as pd

from pydub import AudioSegment
from pydub.playback import play

from allosaurus.app import read_recognizer

## Play a random audio file from the dataset

In [2]:
# Read tsv file
tsv_path = './__dataset/validated.tsv'
tsv = pd.read_csv(tsv_path, sep='\t', dtype={'sentence_domain': str, 'segment': str})

# Get and load a random audio file from the file
path, sentence = random.choice(tsv[['path', 'sentence']].values)

audio_dir = './__dataset/clips/'
audio_path = os.path.join(audio_dir, path)

# Load the audio file
audio = AudioSegment.from_file(audio_path, format='mp3')

# Play the audio file
print(f"Playing audio file: {audio_path}")
print(f"Sentence: {sentence}")
play(audio)

Playing audio file: ./__dataset/clips/common_voice_en_24384091.mp3
Sentence: It is discovered that Orfieu's assistant, Scudamour, has a double in Othertime.


## Attempt to transcribe the audio file into phonemes using allosaurus

In [3]:
def convert_mp3_to_wav(mp3_path, wav_path):
    audio = AudioSegment.from_mp3(mp3_path)
    audio.export(wav_path, format='wav')

# Load the default model
recognizer = read_recognizer()

# Write a temp path which will hold the wav file
tmp_dir = './__dataset/tmp_wav/'
os.makedirs(tmp_dir, exist_ok=True)
wav_path = os.path.join(tmp_dir, 'tmp.wav')

# Convert the mp3 file to wav
convert_mp3_to_wav(audio_path, wav_path)

# Recognize the audio
text = recognizer.recognize(wav_path, timestamp=True)
print(text)

# Remove the temp wav directory and files
os.remove(wav_path)
os.rmdir(tmp_dir)

0.600 0.045 ɪ
0.690 0.045 tʂ
0.750 0.045 ɛ
0.960 0.045 tʲ
1.020 0.045 ʌ
1.110 0.045 s
1.200 0.045 k
1.260 0.045 æ
1.350 0.045 b̞
1.410 0.045 a
1.800 0.045 ð
1.860 0.045 ɛ
2.100 0.045 s
2.580 0.045 ɔ
2.670 0.045 ɹ
2.820 0.045 f
2.940 0.045 i
3.150 0.045 l
3.420 0.045 z
3.750 0.045 ə
3.870 0.045 s
3.960 0.045 ɯ
4.050 0.045 s
4.140 0.045 t
4.230 0.045 ɹ̩
4.320 0.045 n
4.590 0.045 s
5.370 0.045 k
5.430 0.045 ɑ
5.520 0.045 d
5.610 0.045 ɹ̩
5.760 0.045 m
5.820 0.045 uə
6.660 0.045 h
6.720 0.045 æ
6.810 0.045 z
6.900 0.045 ə
7.050 0.045 d
7.110 0.045 ɑ
7.230 0.045 b
7.290 0.045 ə
7.320 0.045 l
7.470 0.045 ɪ
7.560 0.045 n
7.860 0.045 ʔ
7.920 0.045 ʌ
8.010 0.045 v
8.100 0.045 ə
8.250 0.045 t
8.370 0.045 a
8.610 0.045 n
