In [6]:
import parselmouth
import numpy as np
from parselmouth.praat import call
import glob
import pandas as pd

def extract_features(audio_path):
    snd = parselmouth.Sound(audio_path)

    # Compute fundamental frequency (F0)
    pitch = snd.to_pitch()
    f0_values = pitch.selected_array['frequency']
    f0_values = f0_values[f0_values > 0]

    meanF0 = np.mean(f0_values) if len(f0_values) > 0 else 0
    stdevF0 = np.std(f0_values) if len(f0_values) > 0 else 0

    # Compute Harmonics-to-Noise Ratio (HNR)
    hnr = snd.to_harmonicity()
    hnr_values = hnr.values[hnr.values > 0]
    meanHNR = np.mean(hnr_values) if len(hnr_values) > 0 else 0

    # Compute Jitter & Shimmer (Handle errors)
    pointProcess = parselmouth.praat.call(snd, "To PointProcess (periodic, cc)", 75, 300)
    localJitter = call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
    localShimmer =  call([snd, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    return {
        "meanF0Hz": meanF0,
        "stdevF0Hz": stdevF0,
        "HNR": meanHNR,
        "localJitter": localJitter,
        "localShimmer": localShimmer
    }
    

# Example usage
# features = extract_features("CT001_phonationA.wav")
# print(features)

In [163]:
import os
import pandas as pd
from tkinter import Tk
from tkinter.filedialog import askopenfilenames

# Your extract_features function should be defined earlier

# Helper to open file dialog
def select_files():
    root = Tk()
    root.withdraw()
    root.call('wm', 'attributes', '.', '-topmost', True)
    return askopenfilenames(title="Select Audio File", filetypes=[("WAV files", "*.wav")])

# Labels for each of the 8 files (based on your dataset)
file_labels = ['A', 'E', 'I', 'O', 'U', 'PA', 'TA', 'KA']

# Collect info and process
if __name__ == "__main__":
    # Create an empty list to hold rows
    rows = []

    # Collect meta info
    age = input("Enter Age: ")
    sex = input("Enter Sex (M/F): ")

    person_data = {
        "Age (years)": age,
        "Sex": sex
    }

    for i, label in enumerate(file_labels):
        print(f"\nUpload file for phonation/rhythm: {label}")
        file_paths = select_files()
        if file_paths and len(file_paths) > 0:
            file_path = str(file_paths[0])  # Use only the first selected file
            try:
                features = extract_features(file_path)
                for feat, val in features.items():
                    person_data[f"{feat}_{label}"] = val
            except Exception as e:
                print(f"Error processing {label}: {e}")

    # Convert to DataFrame
    final_df = pd.DataFrame([person_data])
    print("\nExtracted Features:")
    print(final_df)

    # Save to CSV
    final_df.to_csv("voice_features_person.csv", index=False)
    print("\nSaved as 'voice_features_person.csv'")


Enter Age:  36
Enter Sex (M/F):  M



Upload file for phonation/rhythm: A

Upload file for phonation/rhythm: E

Upload file for phonation/rhythm: I

Upload file for phonation/rhythm: O

Upload file for phonation/rhythm: U

Upload file for phonation/rhythm: PA

Upload file for phonation/rhythm: TA

Upload file for phonation/rhythm: KA

Extracted Features:
  Age (years) Sex  meanF0Hz_A  stdevF0Hz_A      HNR_A  localJitter_A  \
0          36   M  132.727707     3.394081  13.880769       0.005547   

   localShimmer_A  meanF0Hz_E  stdevF0Hz_E      HNR_E  ...  meanF0Hz_TA  \
0        0.058267   146.16624     3.360383  12.372608  ...   168.362888   

   stdevF0Hz_TA     HNR_TA  localJitter_TA  localShimmer_TA  meanF0Hz_KA  \
0      5.067805  13.260158        0.013345         0.086907   168.201467   

   stdevF0Hz_KA     HNR_KA  localJitter_KA  localShimmer_KA  
0      5.079586  12.261247        0.014937         0.113921  

[1 rows x 42 columns]

Saved as 'voice_features_person.csv'


In [165]:
final_df

Unnamed: 0,Age (years),Sex,meanF0Hz_A,stdevF0Hz_A,HNR_A,localJitter_A,localShimmer_A,meanF0Hz_E,stdevF0Hz_E,HNR_E,...,meanF0Hz_TA,stdevF0Hz_TA,HNR_TA,localJitter_TA,localShimmer_TA,meanF0Hz_KA,stdevF0Hz_KA,HNR_KA,localJitter_KA,localShimmer_KA
0,36,M,132.727707,3.394081,13.880769,0.005547,0.058267,146.16624,3.360383,12.372608,...,168.362888,5.067805,13.260158,0.013345,0.086907,168.201467,5.079586,12.261247,0.014937,0.113921


In [167]:
final_df['Sex'] = final_df['Sex'].map({'M': 0, 'F': 1})

In [169]:
from sklearn.preprocessing import StandardScaler

In [171]:
feature_columns = final_df.columns.drop([])  # drop nothing if you want to include all
X = final_df[feature_columns]
with open("als_scaler2.pkl", "rb") as f:
    scaler = pickle.load(f)
X_scaled = scaler.transform(X)  # use .transform() if using a saved scaler

In [173]:
import pandas as pd
import pickle

# Step 1: Load your trained XGBoost model
with open("xgb_model_rem.pkl", "rb") as f:
    model = pickle.load(f)

In [153]:
import joblib
model = joblib.load("model_rem.pkl")

In [175]:
prediction = model.predict(X_scaled)

In [177]:
prediction

array([0])

In [179]:
prediction_probs = model.predict_proba(X_scaled)
print(prediction_probs)  # e.g., [[0.85 0.15]]

[[0.54225826 0.45774174]]
