In [1]:
import librosa
import numpy as np
import pandas as pd
import parselmouth
from parselmouth.praat import call
from scipy.signal import find_peaks, lfilter, hamming
from scipy.io import wavfile
from scipy.fftpack import fft
import features

In [2]:
# Load an audio file
audio_path = '/Users/imdohyeon/Documents/PythonWorkspace/Lieon-ai/Dataset/data2.wav'  # Replace with your audio file path
y, sr = librosa.load(audio_path, sr=44100)  # 다운샘플링 하지 않음

In [4]:
f0 = features.extract_f0(y, sr)
f0

array([  0.        ,   0.        ,   0.        , ..., 165.76853496,
       169.64319079, 168.66611791])

In [5]:
# formants = features.extract_formants(audio_path)
formants = features.extract_formants_for_frames
formants

<function features.extract_formants_for_frames(audio_file, frame_length=0.025, hop_length=0.01)>

In [3]:
# 함수 사용 예시
formants = features.extract_formants_praat(audio_path)
formants

Unnamed: 0,times,F1,F2,F3,F4,F5,F0(pitch),filename
0,0.067449,838.418375,1742.820797,3004.278130,3530.450432,,,data2.wav
1,0.167449,802.914010,1756.187974,2532.414115,3321.675721,,,data2.wav
2,0.267449,550.499866,1506.950439,2149.751029,3431.365438,,,data2.wav
3,0.367449,519.449973,1579.796344,2457.830722,3197.789147,4124.466777,170.629059,data2.wav
4,0.467449,462.718279,1772.549816,2251.231361,3550.314202,,156.181662,data2.wav
...,...,...,...,...,...,...,...,...
3765,376.567449,824.324183,2337.881964,2979.030259,4687.754512,,270.260443,data2.wav
3766,376.667449,862.648479,1073.719199,3041.878223,3831.589163,4936.897093,301.893814,data2.wav
3767,376.767449,1039.120188,2172.697376,2914.820940,4689.116488,,333.447364,data2.wav
3768,376.867449,470.549898,1258.034277,2222.669437,3472.739877,5179.495517,,data2.wav


In [9]:
spectral_flux = features.extract_spectral_flux(y, sr)
spectral_flux

array([0.06530739, 0.06066087, 0.10064038, ..., 0.6623334 , 0.551874  ,
       0.551874  ], dtype=float32)

In [3]:
spectral_entropy = features.extract_spectral_entropy(y, sr)
spectral_entropy

  ps = S / np.sum(S, axis=0)


array([3.4040468, 3.1216648, 3.2640953, ..., 2.2702453, 4.448911 ,
       5.1298194], dtype=float32)

In [11]:
speech_rate, pause_durations = features.extract_prosody_features(y, sr)
speech_rate

array([40., 40., 40., ..., 40., 40., 40.])

In [12]:
pause_durations

array([0, 0, 0, ..., 0, 0, 0])

In [4]:
mfcc = features.extract_mfcc(y, sr)
mfcc

Unnamed: 0,MFCC_1,MFCC_2,MFCC_3,MFCC_4,MFCC_5,MFCC_6,MFCC_7,MFCC_8,MFCC_9,MFCC_10,MFCC_11,MFCC_12,MFCC_13,MFCC_14,MFCC_15,MFCC_16,MFCC_17,MFCC_18,MFCC_19,MFCC_20
0,-495.417206,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,-495.187866,0.324098,0.323434,0.322336,0.320816,0.318881,0.316557,0.313865,0.310836,0.307484,0.303854,0.299978,0.295892,0.291630,0.287234,0.282744,0.278202,0.273638,0.269099,0.264621
2,-494.760254,0.903556,0.834057,0.739228,0.642890,0.565826,0.518680,0.498489,0.490005,0.471398,0.422302,0.331354,0.200634,0.045377,-0.110893,-0.244160,-0.337866,-0.388276,-0.405342,-0.408813
3,-492.381958,3.716152,2.200505,0.305203,-1.275120,-1.972219,-1.556628,-0.220901,1.496771,2.918305,3.481720,2.950285,1.496510,-0.368650,-1.992079,-2.824135,-2.620312,-1.524205,-0.003528,1.333023
4,-490.777588,5.584886,3.028855,-0.131758,-2.699138,-3.715747,-2.830625,-0.426951,2.535915,4.900361,5.757985,4.791626,2.382073,-0.555967,-2.937652,-3.925256,-3.236431,-1.229005,1.261328,3.257974
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32471,-476.653046,20.230606,7.351832,-1.064027,-1.029781,2.104553,1.887050,-2.264232,-6.511621,-7.946602,-6.951889,-5.772226,-6.207402,-8.241718,-10.187572,-10.361738,-8.720877,-6.465870,-4.306704,-1.940418
32472,-477.100403,20.072380,8.245855,0.375465,-0.601585,0.473562,-0.636445,-3.034463,-4.390537,-4.895039,-5.895292,-7.249595,-7.945090,-7.961936,-8.044268,-8.361979,-8.368439,-7.531447,-5.729403,-3.326677
32473,-478.840332,17.821251,7.057763,1.148241,1.336689,1.094638,-2.571450,-5.343132,-3.766730,-1.285857,-2.891763,-7.503045,-9.836198,-8.245914,-6.558857,-7.869699,-9.979229,-8.810524,-4.333773,-0.481491
32474,-455.896118,29.438046,-3.439192,-4.192606,1.600491,2.155888,-1.595652,-7.557739,-3.620239,1.014191,-4.226951,-7.853891,-6.946503,-6.590614,-8.422939,-11.324759,-8.956722,-4.992223,-5.797482,-5.584272
