tbd: 
- tune the parameters
    - fine-tune the pitch extraction
    - use extracted pitch for hnr (and jitter & shimmer?)
- record the temporal pitches and hnrs
- post processing (normalization, zero-padding)
- determine which model to use (LSTM?)
- data augmentation?

for SSH

In [None]:
%pip install parselmouth
%pip install tqdm

In [None]:
import parselmouth
import parselmouth.praat as praat
import numpy as np
import os
import glob
from tqdm import tqdm
import pandas as pd

In [None]:
def get_pitch(sound, pitch_floor, pitch_ceiling):
    pitch = sound.to_pitch(
            time_step=0.01,
            pitch_floor=pitch_floor,
            pitch_ceiling=pitch_ceiling,
        )
    pitch_values = pitch.selected_array['frequency']
    pitch_values[pitch_values == 0] = np.nan

    ave_pitch = np.nanmean(pitch_values)
    std_pitch = np.nanstd(pitch_values)

    return ave_pitch, std_pitch

def get_hnr(sound):
    harmonicity = sound.to_harmonicity(time_step=0.01)
    hnr_values = harmonicity.values.T

    ave_hnr = np.nanmean(hnr_values)
    std_hnr = np.nanstd(hnr_values)

    return ave_hnr, std_hnr

def get_gitter_shimmer(sound, pitch_floor, pitch_ceiling):
    pointProcess = praat.call(sound, "To PointProcess (periodic, cc)", pitch_floor, pitch_ceiling)

    # --- Get Jitter Measures ---
    lj = praat.call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3) # returns percentage
    laj = praat.call(pointProcess, "Get jitter (local, absolute)", 0, 0, 0.0001, 0.02, 1.3) # returns seconds
    rap = praat.call(pointProcess, "Get jitter (rap)", 0, 0, 0.0001, 0.02, 1.3) # returns percentage
    ppq5 = praat.call(pointProcess, "Get jitter (ppq5)", 0, 0, 0.0001, 0.02, 1.3) # returns percentage
    ddp = praat.call(pointProcess, "Get jitter (ddp)", 0, 0, 0.0001, 0.02, 1.3) # returns percentage

    jitter_values = np.array([
        lj, laj, rap, ppq5, ddp
    ], dtype=np.float32)

    # --- Get Shimmer Measures ---
    ls = praat.call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6) # returns percentage
    lsdb = praat.call([sound, pointProcess], "Get shimmer (local_dB)", 0, 0, 0.0001, 0.02, 1.3, 1.6) # returns dB
    apq3 = praat.call([sound, pointProcess], "Get shimmer (apq3)", 0, 0, 0.0001, 0.02, 1.3, 1.6) # returns percentage
    apq5 = praat.call([sound, pointProcess], "Get shimmer (apq5)",0, 0, 0.0001, 0.02, 1.3, 1.6) # returns percentage
    apq11 = praat.call([sound, pointProcess], "Get shimmer (apq11)", 0, 0, 0.0001, 0.02, 1.3, 1.6) # returns percentage
    dda = praat.call([sound, pointProcess], "Get shimmer (dda)", 0, 0, 0.0001, 0.02, 1.3, 1.6) # returns percentage

    shimmer_values = np.array([
        ls, lsdb, apq3, apq5, apq11, dda
    ], dtype=np.float32)

    return jitter_values, shimmer_values

### Extracting feature

In [None]:
def process_audios(audio_files):

    file_ids = []
    ave_pitches = []
    std_pitches = []
    ave_hnrs = []
    std_hnrs = []
    jitters = []
    shimmers =[]


    i = 0
    for audio_file in tqdm(audio_files, desc="Processing Audio Files"):
        sound = parselmouth.Sound(audio_file)
        #sound.pre_emphasize()   ## don't need this right?
        pitch_floor=75,
        pitch_ceiling=600,
        
        ave_pitch, std_pitch = get_pitch(sound, pitch_floor, pitch_ceiling)
        ave_hnr, std_hnr = get_hnr(sound)
        jitter, shimmer = get_gitter_shimmer(sound, pitch_floor, pitch_ceiling)

        file_ids.append(os.path.basename(audio_file).replace('.flac', ''))
        ave_pitches.append(ave_pitch)
        std_pitches.append(std_pitch)
        ave_hnrs.append(ave_hnr)
        std_hnrs.append(std_hnr)
        jitters.append(jitter)
        shimmers.append(shimmer)


    data = {'AUDIO_ID':file_ids,
            'AVE_PITCH':ave_pitches,
            'STD_PITCH':std_pitches,
            'AVE_HNR':ave_hnrs,
            'STD_HNR':std_hnrs,
            'JITTER':jitters,
            'SHIMMER':shimmers}

    i += 1
    #if i == 10: break
        
    return data

Processing Audio Files: 100%|██████████| 25379/25379 [29:46<00:00, 14.21it/s]


### train_audios

In [None]:
train_audios = glob.glob(r'C:\Users\ivyap\Desktop\25SU\TEAMLAB\LA\ASVspoof2019_LA_train\flac\*.flac')
train_features = process_audios(train_audios)

df = pd.DataFrame(train_features)

df.to_pickle(r'C:\Users\ivyap\Desktop\25SU\TEAMLAB\toy_feature\prosody_features.pkl')

### validation_audios

In [None]:
dev_audios = glob.glob(r'C:\Users\ivyap\Desktop\25SU\TEAMLAB\LA\ASVspoof2019_LA_dev\flac\*.flac')
dev_features = process_audios(dev_audios)

df = pd.DataFrame(dev_features)

# to be changed
df.to_pickle(r'C:\Users\ivyap\Desktop\25SU\TEAMLAB\toy_feature\prosody_features.pkl')

### evaluation_audios

In [None]:
eval_audios = glob.glob(r'C:\Users\ivyap\Desktop\25SU\TEAMLAB\LA\ASVspoof2019_LA_eval\flac\*.flac')
eval_features = process_audios(eval_audios)

df = pd.DataFrame(eval_features)

# to be changed
df.to_pickle(r'C:\Users\ivyap\Desktop\25SU\TEAMLAB\toy_feature\prosody_features.pkl')

### Inspect saved features

In [19]:
pkl_file_path = r'C:\Users\ivyap\Desktop\25SU\TEAMLAB\toy_feature\prosody_features.pkl'

try:
    if not os.path.exists(pkl_file_path):
        print(f"Error: The file was not found at {pkl_file_path}")
    else:
        loaded_df = pd.read_pickle(pkl_file_path)

        print(f"Successfully loaded DataFrame from: {pkl_file_path}")

        # --- Inspect the loaded DataFrame ---

        # 1. Display the first few rows
        print("\n--- First 5 rows (df.head()) ---")
        print(loaded_df.head())

        # 2. Display the last few rows
        print("\n--- Last 5 rows (df.tail()) ---")
        print(loaded_df.tail())

        # 3. Get concise summary of the DataFrame (index, columns, dtypes, non-null values, memory usage)
        print("\n--- DataFrame Info (df.info()) ---")
        loaded_df.info()

        # 4. Get descriptive statistics for numerical columns (count, mean, std, min, max, quartiles)
        print("\n--- Descriptive Statistics (df.describe()) ---")
        print(loaded_df.describe())

        # 5. Check the shape of the DataFrame (number of rows, number of columns)
        print("\n--- DataFrame Shape (df.shape) ---")
        print(f"Shape: {loaded_df.shape}")
        print(f"Number of files (rows): {loaded_df.shape[0]}")
        print(f"Number of columns (features + metadata): {loaded_df.shape[1]}")

        # 6. List the column names
        print("\n--- Column Names (df.columns) ---")
        print(loaded_df.columns.tolist())

        # 7. Check for missing values (NaNs)
        print("\n--- Missing Values (df.isnull().sum()) ---")
        print(loaded_df.isnull().sum()) # Sum of True (missing) values per column

        # 8. Access a specific column (e.g., 'AVE_PITCH')
        # print("\n--- Example: 'AVE_PITCH' column ---")
        # print(loaded_df['AVE_PITCH'].head())

except Exception as e:
    print(f"An error occurred while loading or inspecting the DataFrame: {e}")


Successfully loaded DataFrame from: C:\Users\ivyap\Desktop\25SU\TEAMLAB\toy_feature\prosody_features.pkl

--- First 5 rows (df.head()) ---
       AUDIO_ID   AVE_PITCH  STD_PITCH     AVE_HNR     STD_HNR  \
0  LA_T_1000137  129.703246  19.223900  -92.736529  104.786689   
1  LA_T_1000406  151.166201  18.754230 -138.341504   97.483901   
2  LA_T_1000648  240.929243  14.829958  -70.618649  104.616700   
3  LA_T_1000824  106.986026  14.755938  -97.834652  104.542879   
4  LA_T_1001074  194.306253  54.477061 -101.088008  106.743714   

                                              JITTER  \
0  [0.03256748, 0.0002516887, 0.013475794, 0.0195...   
1  [0.018233655, 0.00012101376, 0.0048870053, 0.0...   
2  [0.017310124, 7.244774e-05, 0.0066327276, 0.00...   
3  [0.030686794, 0.00028997014, 0.011766828, 0.01...   
4  [0.017426047, 8.990323e-05, 0.0041441307, 0.00...   

                                             SHIMMER  
0  [0.10375512, 1.0035236, 0.030025614, 0.0499179...  
1  [0.06479659, 0