In [1]:
from IPython.display import display as dp
from IPython.display import Audio
import subprocess
import os

In [2]:
# define a path to a Kazakh TTS model
# a list of other languages can be found here:
# https://github.com/rhasspy/piper/releases/tag/v0.0.2 
tts_model_path = 'piper/voice-kk-issai-high/kk-issai-high.onnx'

# number of voices in the model
# this information can be found here:
# https://rhasspy.github.io/piper-samples/
n_voices = 6

# define a path to save synthetic dataset
dataset_path = 'synthetic_speech_commands'

In [3]:
# 35 commands in Kazakh
commands = ["артқа", "алға", "оңға", "солға", "төмен", "жоғары", "жүр", "тоқта", "қос", "өшір", "иә", "жоқ", 
            "үйрен", "орында", "нөл", "бір", "екі", "үш", "төрт", "бес", "алты", "жеті", "сегіз", "тоғыз", "төсек", 
            "құс", "мысық", "ит", "бақытты", "үй", "оқы", "жаз", "ағаш", "көрнекі", "мәссаған"]

In [4]:
# create a new directory for each command
for command in commands:
    if not os.path.exists(os.path.join(dataset_path, command)):
        os.makedirs(os.path.join(dataset_path, command))

In [5]:
# generate speech commands using the TTS models
# and save them in the corresponding directories
for i in range(n_voices):
    for command in commands:
        output_wav = "speaker_{}.wav".format(i)
        output_file = os.path.join(dataset_path, command, output_wav)
        cmd = f'echo "{command}" | ./piper/piper --model {tts_model_path} --output_file {output_file} --speaker {i}'
        subprocess.run(cmd, shell=True, check=True)
        print("Speaker: {}, command: {}".format(i, command))

Load time: 0.316399 sec
Real-time factor: 0.382177 (infer=0.221854 sec, audio=0.580499 sec)


Speaker: 0, command: артқа


Load time: 0.3158 sec
Real-time factor: 0.359565 (infer=0.196203 sec, audio=0.545669 sec)


Speaker: 0, command: алға


Load time: 0.328611 sec
Real-time factor: 0.35756 (infer=0.190958 sec, audio=0.534059 sec)


Speaker: 0, command: оңға


Load time: 0.314965 sec
Real-time factor: 0.317138 (infer=0.2246 sec, audio=0.708209 sec)


Speaker: 0, command: солға


Load time: 0.316839 sec
Real-time factor: 0.41634 (infer=0.232018 sec, audio=0.557279 sec)


Speaker: 0, command: төмен


Load time: 0.332 sec
Real-time factor: 0.334212 (infer=0.213411 sec, audio=0.638549 sec)


Speaker: 0, command: жоғары


Load time: 0.328218 sec
Real-time factor: 0.44589 (infer=0.150126 sec, audio=0.336689 sec)


Speaker: 0, command: жүр


Load time: 0.333561 sec
Real-time factor: 0.379991 (infer=0.158821 sec, audio=0.417959 sec)


Speaker: 0, command: тоқта


Load time: 0.333503 sec
Real-time factor: 0.455584 (infer=0.15868 sec, audio=0.348299 sec)


Speaker: 0, command: қос


Load time: 0.338194 sec
Real-time factor: 0.333378 (infer=0.255453 sec, audio=0.766259 sec)


Speaker: 0, command: өшір


Load time: 0.333656 sec
Real-time factor: 0.487348 (infer=0.0905297 sec, audio=0.18576 sec)


Speaker: 0, command: иә


Load time: 0.335256 sec
Real-time factor: 0.411107 (infer=0.205236 sec, audio=0.499229 sec)


Speaker: 0, command: жоқ


Load time: 0.322088 sec
Real-time factor: 0.219993 (infer=0.0842858 sec, audio=0.383129 sec)


Speaker: 0, command: үйрен


Load time: 0.314008 sec
Real-time factor: 0.277837 (infer=0.251603 sec, audio=0.905578 sec)


Speaker: 0, command: орында


Load time: 0.320925 sec
Real-time factor: 0.213622 (infer=0.0843248 sec, audio=0.394739 sec)


Speaker: 0, command: нөл


Load time: 0.334466 sec
Real-time factor: 0.48055 (infer=0.128321 sec, audio=0.267029 sec)


Speaker: 0, command: бір


Load time: 0.320819 sec
Real-time factor: 0.326446 (infer=0.204662 sec, audio=0.626939 sec)


Speaker: 0, command: екі


Load time: 0.331949 sec
Real-time factor: 0.388949 (infer=0.058704 sec, audio=0.15093 sec)


Speaker: 0, command: үш


Load time: 0.336377 sec
Real-time factor: 0.368865 (infer=0.209843 sec, audio=0.568889 sec)


Speaker: 0, command: төрт


Load time: 0.335277 sec
Real-time factor: 0.464514 (infer=0.156397 sec, audio=0.336689 sec)


Speaker: 0, command: бес


Load time: 0.309344 sec
Real-time factor: 0.217124 (infer=0.136124 sec, audio=0.626939 sec)


Speaker: 0, command: алты


Load time: 0.325163 sec
Real-time factor: 0.346322 (infer=0.217123 sec, audio=0.626939 sec)


Speaker: 0, command: жеті


Load time: 0.317183 sec
Real-time factor: 0.376222 (infer=0.371274 sec, audio=0.986848 sec)


Speaker: 0, command: сегіз


Load time: 0.320682 sec
Real-time factor: 0.20838 (infer=0.10161 sec, audio=0.487619 sec)


Speaker: 0, command: тоғыз


Load time: 0.318622 sec
Real-time factor: 0.190495 (infer=0.10837 sec, audio=0.568889 sec)


Speaker: 0, command: төсек


Load time: 0.317309 sec
Real-time factor: 0.381124 (infer=0.176993 sec, audio=0.464399 sec)


Speaker: 0, command: құс


Load time: 0.316392 sec
Real-time factor: 0.403934 (infer=0.196966 sec, audio=0.487619 sec)


Speaker: 0, command: мысық


Load time: 0.32241 sec
Real-time factor: 0.514444 (infer=0.167235 sec, audio=0.325079 sec)


Speaker: 0, command: ит


Load time: 0.315601 sec
Real-time factor: 0.377817 (infer=0.337756 sec, audio=0.893968 sec)


Speaker: 0, command: бақытты


Load time: 0.305559 sec
Real-time factor: 0.418141 (infer=0.169911 sec, audio=0.406349 sec)


Speaker: 0, command: үй


Load time: 0.323018 sec
Real-time factor: 0.210342 (infer=0.0952405 sec, audio=0.452789 sec)


Speaker: 0, command: оқы


Load time: 0.306793 sec
Real-time factor: 0.421307 (infer=0.180981 sec, audio=0.429569 sec)


Speaker: 0, command: жаз


Load time: 0.323831 sec
Real-time factor: 0.219263 (infer=0.0865518 sec, audio=0.394739 sec)


Speaker: 0, command: ағаш


Load time: 0.328918 sec
Real-time factor: 0.339966 (infer=0.319707 sec, audio=0.940408 sec)


Speaker: 0, command: көрнекі


Load time: 0.328514 sec
Real-time factor: 0.334597 (infer=0.23308 sec, audio=0.696599 sec)


Speaker: 0, command: мәссаған


Load time: 0.337374 sec
Real-time factor: 0.359611 (infer=0.329831 sec, audio=0.917188 sec)


Speaker: 1, command: артқа


Load time: 0.307787 sec
Real-time factor: 0.168829 (infer=0.137207 sec, audio=0.812698 sec)


Speaker: 1, command: алға


Load time: 0.323165 sec
Real-time factor: 0.243655 (infer=0.183874 sec, audio=0.754649 sec)


Speaker: 1, command: оңға


Load time: 0.316402 sec
Real-time factor: 0.186097 (infer=0.125314 sec, audio=0.673379 sec)


Speaker: 1, command: солға


Load time: 0.3119 sec
Real-time factor: 0.168582 (infer=0.123306 sec, audio=0.731429 sec)


Speaker: 1, command: төмен


Load time: 0.314629 sec
Real-time factor: 0.335251 (infer=0.284135 sec, audio=0.847528 sec)


Speaker: 1, command: жоғары


Load time: 0.327833 sec
Real-time factor: 0.362417 (infer=0.189345 sec, audio=0.522449 sec)


Speaker: 1, command: жүр


Load time: 0.314783 sec
Real-time factor: 0.432619 (infer=0.190862 sec, audio=0.441179 sec)


Speaker: 1, command: тоқта


Load time: 0.33847 sec
Real-time factor: 0.382972 (infer=0.204529 sec, audio=0.534059 sec)


Speaker: 1, command: қос


Load time: 0.334934 sec
Real-time factor: 0.329905 (infer=0.214491 sec, audio=0.650159 sec)


Speaker: 1, command: өшір


Load time: 0.3236 sec
Real-time factor: 0.618634 (infer=0.201105 sec, audio=0.325079 sec)


Speaker: 1, command: иә


Load time: 0.328345 sec
Real-time factor: 0.275589 (infer=0.262366 sec, audio=0.952018 sec)


Speaker: 1, command: жоқ


Load time: 0.334153 sec
Real-time factor: 0.292442 (infer=0.227482 sec, audio=0.777868 sec)


Speaker: 1, command: үйрен


Load time: 0.320472 sec
Real-time factor: 0.324986 (infer=0.230158 sec, audio=0.708209 sec)


Speaker: 1, command: орында


Load time: 0.321302 sec
Real-time factor: 0.554308 (infer=0.1995 sec, audio=0.359909 sec)


Speaker: 1, command: нөл


Load time: 0.335069 sec
Real-time factor: 0.368414 (infer=0.14115 sec, audio=0.383129 sec)


Speaker: 1, command: бір


Load time: 0.315884 sec
Real-time factor: 0.167804 (infer=0.120789 sec, audio=0.719819 sec)


Speaker: 1, command: екі


Load time: 0.316896 sec
Real-time factor: 0.462719 (infer=0.0859545 sec, audio=0.18576 sec)


Speaker: 1, command: үш


Load time: 0.319279 sec
Real-time factor: 0.393169 (infer=0.168893 sec, audio=0.429569 sec)


Speaker: 1, command: төрт


Load time: 0.329796 sec
Real-time factor: 0.186535 (infer=0.116946 sec, audio=0.626939 sec)


Speaker: 1, command: бес


Load time: 0.341801 sec
Real-time factor: 0.189806 (infer=0.101368 sec, audio=0.534059 sec)


Speaker: 1, command: алты


Load time: 0.334327 sec
Real-time factor: 0.379985 (infer=0.185288 sec, audio=0.487619 sec)


Speaker: 1, command: жеті


Load time: 0.338032 sec
Real-time factor: 0.361797 (infer=0.285631 sec, audio=0.789478 sec)


Speaker: 1, command: сегіз


Load time: 0.329909 sec
Real-time factor: 0.201869 (infer=0.0960914 sec, audio=0.476009 sec)


Speaker: 1, command: тоғыз


Load time: 0.318741 sec
Real-time factor: 0.370284 (infer=0.206351 sec, audio=0.557279 sec)


Speaker: 1, command: төсек


Load time: 0.319591 sec
Real-time factor: 0.440628 (infer=0.0869666 sec, audio=0.19737 sec)


Speaker: 1, command: құс


Load time: 0.333494 sec
Real-time factor: 0.386962 (infer=0.17072 sec, audio=0.441179 sec)


Speaker: 1, command: мысық


Load time: 0.333257 sec
Real-time factor: 0.399731 (infer=0.0788948 sec, audio=0.19737 sec)


Speaker: 1, command: ит


Load time: 0.318703 sec
Real-time factor: 0.298353 (infer=0.301356 sec, audio=1.01007 sec)


Speaker: 1, command: бақытты


Load time: 0.320284 sec
Real-time factor: 0.261763 (infer=0.0850938 sec, audio=0.325079 sec)


Speaker: 1, command: үй


Load time: 0.297345 sec
Real-time factor: 0.376851 (infer=0.227512 sec, audio=0.603719 sec)


Speaker: 1, command: оқы


Load time: 0.324956 sec
Real-time factor: 0.382704 (infer=0.226603 sec, audio=0.592109 sec)


Speaker: 1, command: жаз


Load time: 0.333876 sec
Real-time factor: 0.354575 (infer=0.214063 sec, audio=0.603719 sec)


Speaker: 1, command: ағаш


Load time: 0.325362 sec
Real-time factor: 0.388667 (infer=0.306845 sec, audio=0.789478 sec)


Speaker: 1, command: көрнекі


Load time: 0.335514 sec
Real-time factor: 0.353583 (infer=0.303777 sec, audio=0.859138 sec)


Speaker: 1, command: мәссаған


Load time: 0.340567 sec
Real-time factor: 0.352559 (infer=0.249686 sec, audio=0.708209 sec)


Speaker: 2, command: артқа


Load time: 0.333782 sec
Real-time factor: 0.17212 (infer=0.145877 sec, audio=0.847528 sec)


Speaker: 2, command: алға


Load time: 0.317511 sec
Real-time factor: 0.354527 (infer=0.238731 sec, audio=0.673379 sec)


Speaker: 2, command: оңға


Load time: 0.319974 sec
Real-time factor: 0.346011 (infer=0.241031 sec, audio=0.696599 sec)


Speaker: 2, command: солға


Load time: 0.337878 sec
Real-time factor: 0.357992 (infer=0.170407 sec, audio=0.476009 sec)


Speaker: 2, command: төмен


Load time: 0.326786 sec
Real-time factor: 0.168174 (infer=0.146437 sec, audio=0.870748 sec)


Speaker: 2, command: жоғары


Load time: 0.336878 sec
Real-time factor: 0.344601 (infer=0.240049 sec, audio=0.696599 sec)


Speaker: 2, command: жүр


Load time: 0.342899 sec
Real-time factor: 0.384279 (infer=0.214151 sec, audio=0.557279 sec)


Speaker: 2, command: тоқта


Load time: 0.325882 sec
Real-time factor: 0.48069 (infer=0.161843 sec, audio=0.336689 sec)


Speaker: 2, command: қос


Load time: 0.33892 sec
Real-time factor: 0.337443 (infer=0.344758 sec, audio=1.02168 sec)


Speaker: 2, command: өшір


Load time: 0.338981 sec
Real-time factor: 0.3588 (infer=0.183289 sec, audio=0.510839 sec)


Speaker: 2, command: иә


Load time: 0.321456 sec
Real-time factor: 0.389697 (infer=0.167402 sec, audio=0.429569 sec)


Speaker: 2, command: жоқ


Load time: 0.333141 sec
Real-time factor: 0.376585 (infer=0.170514 sec, audio=0.452789 sec)


Speaker: 2, command: үйрен


Load time: 0.334864 sec
Real-time factor: 0.367066 (infer=0.272744 sec, audio=0.743039 sec)


Speaker: 2, command: орында


Load time: 0.331289 sec
Real-time factor: 0.339583 (infer=0.236553 sec, audio=0.696599 sec)


Speaker: 2, command: нөл


Load time: 0.32782 sec
Real-time factor: 0.168836 (infer=0.133293 sec, audio=0.789478 sec)


Speaker: 2, command: бір


Load time: 0.331275 sec
Real-time factor: 0.268337 (infer=0.274154 sec, audio=1.02168 sec)


Speaker: 2, command: екі


Load time: 0.314723 sec
Real-time factor: 0.546381 (infer=0.16493 sec, audio=0.301859 sec)


Speaker: 2, command: үш


Load time: 0.316452 sec
Real-time factor: 0.339232 (infer=0.224493 sec, audio=0.661769 sec)


Speaker: 2, command: төрт


Load time: 0.324731 sec
Real-time factor: 0.375732 (infer=0.183214 sec, audio=0.487619 sec)


Speaker: 2, command: бес


Load time: 0.328182 sec
Real-time factor: 0.398611 (infer=0.268416 sec, audio=0.673379 sec)


Speaker: 2, command: алты


Load time: 0.315214 sec
Real-time factor: 0.394962 (infer=0.215518 sec, audio=0.545669 sec)


Speaker: 2, command: жеті


Load time: 0.322225 sec
Real-time factor: 0.197916 (infer=0.15625 sec, audio=0.789478 sec)


Speaker: 2, command: сегіз


Load time: 0.330938 sec
Real-time factor: 0.375203 (infer=0.243941 sec, audio=0.650159 sec)


Speaker: 2, command: тоғыз


Load time: 0.327651 sec
Real-time factor: 0.340146 (infer=0.229047 sec, audio=0.673379 sec)


Speaker: 2, command: төсек


Load time: 0.342437 sec
Real-time factor: 0.351588 (infer=0.224506 sec, audio=0.638549 sec)


Speaker: 2, command: құс


Load time: 0.336006 sec
Real-time factor: 0.379031 (infer=0.198024 sec, audio=0.522449 sec)


Speaker: 2, command: мысық


Load time: 0.323905 sec
Real-time factor: 0.465122 (infer=0.172802 sec, audio=0.371519 sec)


Speaker: 2, command: ит


Load time: 0.336958 sec
Real-time factor: 0.281436 (infer=0.251595 sec, audio=0.893968 sec)


Speaker: 2, command: бақытты


Load time: 0.31916 sec
Real-time factor: 0.429947 (infer=0.164725 sec, audio=0.383129 sec)


Speaker: 2, command: үй


Load time: 0.312974 sec
Real-time factor: 0.347484 (infer=0.213817 sec, audio=0.615329 sec)


Speaker: 2, command: оқы


Load time: 0.323234 sec
Real-time factor: 0.211689 (infer=0.135174 sec, audio=0.638549 sec)


Speaker: 2, command: жаз


Load time: 0.340771 sec
Real-time factor: 0.373786 (infer=0.195284 sec, audio=0.522449 sec)


Speaker: 2, command: ағаш


Load time: 0.30547 sec
Real-time factor: 0.281347 (infer=0.254782 sec, audio=0.905578 sec)


Speaker: 2, command: көрнекі


Load time: 0.32404 sec
Real-time factor: 0.324749 (infer=0.297856 sec, audio=0.917188 sec)


Speaker: 2, command: мәссаған


Load time: 0.319835 sec
Real-time factor: 0.371591 (infer=0.28042 sec, audio=0.754649 sec)


Speaker: 3, command: артқа


Load time: 0.33033 sec
Real-time factor: 0.174334 (infer=0.119417 sec, audio=0.684989 sec)


Speaker: 3, command: алға


Load time: 0.320946 sec
Real-time factor: 0.178016 (infer=0.119872 sec, audio=0.673379 sec)


Speaker: 3, command: оңға


Load time: 0.323678 sec
Real-time factor: 0.181055 (infer=0.117714 sec, audio=0.650159 sec)


Speaker: 3, command: солға


Load time: 0.326059 sec
Real-time factor: 0.439326 (infer=0.193821 sec, audio=0.441179 sec)


Speaker: 3, command: төмен


Load time: 0.317126 sec
Real-time factor: 0.345713 (infer=0.240823 sec, audio=0.696599 sec)


Speaker: 3, command: жоғары


Load time: 0.331321 sec
Real-time factor: 0.380528 (infer=0.23415 sec, audio=0.615329 sec)


Speaker: 3, command: жүр


Load time: 0.351705 sec
Real-time factor: 0.363656 (infer=0.20688 sec, audio=0.568889 sec)


Speaker: 3, command: тоқта


Load time: 0.315719 sec
Real-time factor: 0.334727 (infer=0.159333 sec, audio=0.476009 sec)


Speaker: 3, command: қос


Load time: 0.323253 sec
Real-time factor: 0.355109 (infer=0.214386 sec, audio=0.603719 sec)


Speaker: 3, command: өшір


Load time: 0.344982 sec
Real-time factor: 0.365157 (infer=0.211973 sec, audio=0.580499 sec)


Speaker: 3, command: иә


Load time: 0.337078 sec
Real-time factor: 0.348334 (infer=0.210296 sec, audio=0.603719 sec)


Speaker: 3, command: жоқ


Load time: 0.322708 sec
Real-time factor: 0.357775 (infer=0.236764 sec, audio=0.661769 sec)


Speaker: 3, command: үйрен


Load time: 0.319155 sec
Real-time factor: 0.177373 (infer=0.125617 sec, audio=0.708209 sec)


Speaker: 3, command: орында


Load time: 0.308606 sec
Real-time factor: 0.448325 (infer=0.176971 sec, audio=0.394739 sec)


Speaker: 3, command: нөл


Load time: 0.311716 sec
Real-time factor: 0.399153 (infer=0.199269 sec, audio=0.499229 sec)


Speaker: 3, command: бір


Load time: 0.335202 sec
Real-time factor: 0.382887 (infer=0.266718 sec, audio=0.696599 sec)


Speaker: 3, command: екі


Load time: 0.325294 sec
Real-time factor: 0.493188 (infer=0.114518 sec, audio=0.2322 sec)


Speaker: 3, command: үш


Load time: 0.318889 sec
Real-time factor: 0.455234 (infer=0.174413 sec, audio=0.383129 sec)


Speaker: 3, command: төрт


Load time: 0.330441 sec
Real-time factor: 0.374251 (infer=0.199872 sec, audio=0.534059 sec)


Speaker: 3, command: бес


Load time: 0.328274 sec
Real-time factor: 0.352577 (infer=0.204671 sec, audio=0.580499 sec)


Speaker: 3, command: алты


Load time: 0.321491 sec
Real-time factor: 0.358848 (infer=0.25414 sec, audio=0.708209 sec)


Speaker: 3, command: жеті


Load time: 0.309729 sec
Real-time factor: 0.173934 (infer=0.131259 sec, audio=0.754649 sec)


Speaker: 3, command: сегіз


Load time: 0.335144 sec
Real-time factor: 0.360484 (infer=0.280409 sec, audio=0.777868 sec)


Speaker: 3, command: тоғыз


Load time: 0.336409 sec
Real-time factor: 0.36658 (infer=0.212799 sec, audio=0.580499 sec)


Speaker: 3, command: төсек


Load time: 0.319197 sec
Real-time factor: 0.263166 (infer=0.0947159 sec, audio=0.359909 sec)


Speaker: 3, command: құс


Load time: 0.308679 sec
Real-time factor: 0.3889 (infer=0.18512 sec, audio=0.476009 sec)


Speaker: 3, command: мысық


Load time: 0.310117 sec
Real-time factor: 0.205735 (infer=0.0931544 sec, audio=0.452789 sec)


Speaker: 3, command: ит


Load time: 0.316583 sec
Real-time factor: 0.300538 (infer=0.244247 sec, audio=0.812698 sec)


Speaker: 3, command: бақытты


Load time: 0.318569 sec
Real-time factor: 0.593119 (infer=0.213469 sec, audio=0.359909 sec)


Speaker: 3, command: үй


Load time: 0.315075 sec
Real-time factor: 0.37132 (infer=0.228484 sec, audio=0.615329 sec)


Speaker: 3, command: оқы


Load time: 0.331275 sec
Real-time factor: 0.193834 (infer=0.11252 sec, audio=0.580499 sec)


Speaker: 3, command: жаз


Load time: 0.326291 sec
Real-time factor: 0.174338 (infer=0.121444 sec, audio=0.696599 sec)


Speaker: 3, command: ағаш


Load time: 0.329309 sec
Real-time factor: 0.220541 (infer=0.176673 sec, audio=0.801088 sec)


Speaker: 3, command: көрнекі


Load time: 0.319153 sec
Real-time factor: 0.170688 (infer=0.150608 sec, audio=0.882358 sec)


Speaker: 3, command: мәссаған


Load time: 0.308307 sec
Real-time factor: 0.357017 (infer=0.236263 sec, audio=0.661769 sec)


Speaker: 4, command: артқа


Load time: 0.318924 sec
Real-time factor: 0.355379 (infer=0.214549 sec, audio=0.603719 sec)


Speaker: 4, command: алға


Load time: 0.312621 sec
Real-time factor: 0.42334 (infer=0.211343 sec, audio=0.499229 sec)


Speaker: 4, command: оңға


Load time: 0.314023 sec
Real-time factor: 0.226493 (infer=0.12359 sec, audio=0.545669 sec)


Speaker: 4, command: солға


Load time: 0.31658 sec
Real-time factor: 0.339442 (infer=0.240395 sec, audio=0.708209 sec)


Speaker: 4, command: төмен


Load time: 0.327661 sec
Real-time factor: 0.193539 (infer=0.155042 sec, audio=0.801088 sec)


Speaker: 4, command: жоғары


Load time: 0.318257 sec
Real-time factor: 0.407983 (infer=0.208414 sec, audio=0.510839 sec)


Speaker: 4, command: жүр


Load time: 0.323366 sec
Real-time factor: 0.379671 (infer=0.220398 sec, audio=0.580499 sec)


Speaker: 4, command: тоқта


Load time: 0.318713 sec
Real-time factor: 0.398081 (infer=0.18949 sec, audio=0.476009 sec)


Speaker: 4, command: қос


Load time: 0.332396 sec
Real-time factor: 0.291125 (infer=0.212937 sec, audio=0.731429 sec)


Speaker: 4, command: өшір


Load time: 0.322355 sec
Real-time factor: 0.543923 (infer=0.164188 sec, audio=0.301859 sec)


Speaker: 4, command: иә


Load time: 0.321767 sec
Real-time factor: 0.400323 (infer=0.218444 sec, audio=0.545669 sec)


Speaker: 4, command: жоқ


Load time: 0.323049 sec
Real-time factor: 0.362824 (infer=0.273804 sec, audio=0.754649 sec)


Speaker: 4, command: үйрен


Load time: 0.334991 sec
Real-time factor: 0.367984 (infer=0.316149 sec, audio=0.859138 sec)


Speaker: 4, command: орында


Load time: 0.34034 sec
Real-time factor: 0.355125 (infer=0.214396 sec, audio=0.603719 sec)


Speaker: 4, command: нөл


Load time: 0.326082 sec
Real-time factor: 0.210922 (infer=0.132235 sec, audio=0.626939 sec)


Speaker: 4, command: бір


Load time: 0.311079 sec
Real-time factor: 0.381969 (infer=0.252775 sec, audio=0.661769 sec)


Speaker: 4, command: екі


Load time: 0.314912 sec
Real-time factor: 0.320731 (infer=0.0446841 sec, audio=0.13932 sec)


Speaker: 4, command: үш


Load time: 0.319598 sec
Real-time factor: 0.340729 (infer=0.209661 sec, audio=0.615329 sec)


Speaker: 4, command: төрт


Load time: 0.32086 sec
Real-time factor: 0.40665 (infer=0.19829 sec, audio=0.487619 sec)


Speaker: 4, command: бес


Load time: 0.317131 sec
Real-time factor: 0.344041 (infer=0.243653 sec, audio=0.708209 sec)


Speaker: 4, command: алты


Load time: 0.323388 sec
Real-time factor: 0.347848 (infer=0.250387 sec, audio=0.719819 sec)


Speaker: 4, command: жеті


Load time: 0.322218 sec
Real-time factor: 0.190667 (infer=0.123964 sec, audio=0.650159 sec)


Speaker: 4, command: сегіз


Load time: 0.312955 sec
Real-time factor: 0.345423 (infer=0.248642 sec, audio=0.719819 sec)


Speaker: 4, command: тоғыз


Load time: 0.317479 sec
Real-time factor: 0.38331 (infer=0.226961 sec, audio=0.592109 sec)


Speaker: 4, command: төсек


Load time: 0.323007 sec
Real-time factor: 0.452705 (infer=0.157677 sec, audio=0.348299 sec)


Speaker: 4, command: құс


Load time: 0.321315 sec
Real-time factor: 0.405839 (infer=0.197895 sec, audio=0.487619 sec)


Speaker: 4, command: мысық


Load time: 0.321239 sec
Real-time factor: 0.383987 (infer=0.169407 sec, audio=0.441179 sec)


Speaker: 4, command: ит


Load time: 0.312003 sec
Real-time factor: 0.337229 (infer=0.266235 sec, audio=0.789478 sec)


Speaker: 4, command: бақытты


Load time: 0.332187 sec
Real-time factor: 0.401146 (infer=0.176977 sec, audio=0.441179 sec)


Speaker: 4, command: үй


Load time: 0.334017 sec
Real-time factor: 0.430925 (infer=0.175106 sec, audio=0.406349 sec)


Speaker: 4, command: оқы


Load time: 0.316934 sec
Real-time factor: 0.166751 (infer=0.12003 sec, audio=0.719819 sec)


Speaker: 4, command: жаз


Load time: 0.319869 sec
Real-time factor: 0.398076 (infer=0.268056 sec, audio=0.673379 sec)


Speaker: 4, command: ағаш


Load time: 0.314173 sec
Real-time factor: 0.32845 (infer=0.266931 sec, audio=0.812698 sec)


Speaker: 4, command: көрнекі


Load time: 0.345068 sec
Real-time factor: 0.377625 (infer=0.284974 sec, audio=0.754649 sec)


Speaker: 4, command: мәссаған


Load time: 0.325526 sec
Real-time factor: 0.281555 (infer=0.22555 sec, audio=0.801088 sec)


Speaker: 5, command: артқа


Load time: 0.320463 sec
Real-time factor: 0.27376 (infer=0.266981 sec, audio=0.975238 sec)


Speaker: 5, command: алға


Load time: 0.323403 sec
Real-time factor: 0.334156 (infer=0.279327 sec, audio=0.835918 sec)


Speaker: 5, command: оңға


Load time: 0.32129 sec
Real-time factor: 0.412576 (infer=0.17723 sec, audio=0.429569 sec)


Speaker: 5, command: солға


Load time: 0.33515 sec
Real-time factor: 0.376391 (infer=0.266563 sec, audio=0.708209 sec)


Speaker: 5, command: төмен


Load time: 0.315783 sec
Real-time factor: 0.236736 (infer=0.156665 sec, audio=0.661769 sec)


Speaker: 5, command: жоғары


Load time: 0.351252 sec
Real-time factor: 0.37684 (infer=0.179379 sec, audio=0.476009 sec)


Speaker: 5, command: жүр


Load time: 0.329555 sec
Real-time factor: 0.422762 (infer=0.265046 sec, audio=0.626939 sec)


Speaker: 5, command: тоқта


Load time: 0.321757 sec
Real-time factor: 0.521601 (infer=0.169562 sec, audio=0.325079 sec)


Speaker: 5, command: қос


Load time: 0.323702 sec
Real-time factor: 0.237205 (infer=0.123928 sec, audio=0.522449 sec)


Speaker: 5, command: өшір


Load time: 0.33213 sec
Real-time factor: 0.543013 (infer=0.132392 sec, audio=0.24381 sec)


Speaker: 5, command: иә


Load time: 0.316824 sec
Real-time factor: 0.280889 (infer=0.254367 sec, audio=0.905578 sec)


Speaker: 5, command: жоқ


Load time: 0.312584 sec
Real-time factor: 0.371543 (infer=0.211366 sec, audio=0.568889 sec)


Speaker: 5, command: үйрен


Load time: 0.321756 sec
Real-time factor: 0.356633 (infer=0.24429 sec, audio=0.684989 sec)


Speaker: 5, command: орында


Load time: 0.326714 sec
Real-time factor: 0.246729 (infer=0.154684 sec, audio=0.626939 sec)


Speaker: 5, command: нөл


Load time: 0.321023 sec
Real-time factor: 0.49347 (infer=0.143229 sec, audio=0.290249 sec)


Speaker: 5, command: бір


Load time: 0.324393 sec
Real-time factor: 0.363117 (infer=0.198142 sec, audio=0.545669 sec)


Speaker: 5, command: екі


Load time: 0.330221 sec
Real-time factor: 0.567388 (infer=0.111985 sec, audio=0.19737 sec)


Speaker: 5, command: үш


Load time: 0.323529 sec
Real-time factor: 0.37941 (infer=0.185007 sec, audio=0.487619 sec)


Speaker: 5, command: төрт


Load time: 0.308844 sec
Real-time factor: 0.381991 (infer=0.177396 sec, audio=0.464399 sec)


Speaker: 5, command: бес


Load time: 0.317812 sec
Real-time factor: 0.348186 (infer=0.238503 sec, audio=0.684989 sec)


Speaker: 5, command: алты


Load time: 0.313547 sec
Real-time factor: 0.395271 (infer=0.165207 sec, audio=0.417959 sec)


Speaker: 5, command: жеті


Load time: 0.321963 sec
Real-time factor: 0.167604 (infer=0.136211 sec, audio=0.812698 sec)


Speaker: 5, command: сегіз


Load time: 0.331435 sec
Real-time factor: 0.35394 (infer=0.189025 sec, audio=0.534059 sec)


Speaker: 5, command: тоғыз


Load time: 0.325971 sec
Real-time factor: 0.382311 (infer=0.204177 sec, audio=0.534059 sec)


Speaker: 5, command: төсек


Load time: 0.331171 sec
Real-time factor: 0.466706 (infer=0.124624 sec, audio=0.267029 sec)


Speaker: 5, command: құс


Load time: 0.330709 sec
Real-time factor: 0.338503 (infer=0.227941 sec, audio=0.673379 sec)


Speaker: 5, command: мысық


Load time: 0.311745 sec
Real-time factor: 0.375408 (infer=0.20049 sec, audio=0.534059 sec)


Speaker: 5, command: ит


Load time: 0.32576 sec
Real-time factor: 0.312894 (infer=0.286983 sec, audio=0.917188 sec)


Speaker: 5, command: бақытты


Load time: 0.306897 sec
Real-time factor: 0.592256 (infer=0.165026 sec, audio=0.278639 sec)


Speaker: 5, command: үй


Load time: 0.332652 sec
Real-time factor: 0.376411 (infer=0.209766 sec, audio=0.557279 sec)


Speaker: 5, command: оқы


Load time: 0.316797 sec
Real-time factor: 0.335322 (infer=0.233585 sec, audio=0.696599 sec)


Speaker: 5, command: жаз


Load time: 0.322894 sec
Real-time factor: 0.370686 (infer=0.189361 sec, audio=0.510839 sec)


Speaker: 5, command: ағаш


Load time: 0.321937 sec
Real-time factor: 0.177411 (infer=0.133883 sec, audio=0.754649 sec)


Speaker: 5, command: көрнекі
Speaker: 5, command: мәссаған


Load time: 0.321304 sec
Real-time factor: 0.207007 (infer=0.139394 sec, audio=0.673379 sec)


In [6]:
# play some speech commands
example_commands = ['артқа', 'алға']

for i in range(n_voices):
    for example_command in example_commands:
        print('Speaker: {}, Command: {}'.format(i, example_command))
        dp(Audio(os.path.join(dataset_path,example_command,'speaker_{}.wav'.format(i))))

Speaker: 0, Command: артқа


Speaker: 0, Command: алға


Speaker: 1, Command: артқа


Speaker: 1, Command: алға
Speaker: 2, Command: артқа


Speaker: 2, Command: алға


Speaker: 3, Command: артқа


Speaker: 3, Command: алға


Speaker: 4, Command: артқа


Speaker: 4, Command: алға


Speaker: 5, Command: артқа


Speaker: 5, Command: алға
