In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier

In [3]:
df = pd.read_excel("VOC-ALS/VOC-ALS.xlsx", sheet_name="VOC-ALS_Data",header=1)

In [4]:
vowels = ['A', 'E', 'I', 'O', 'U']
syllables = ['PA', 'TA', 'KA']
metrics = ['meanF0Hz', 'stdevF0Hz', 'HNR', 'localJitter', 'localShimmer']


In [5]:
acoustic_features = []
for sound in vowels + syllables:
    for metric in metrics:
        feature_name = f"{metric}_{sound}"
        acoustic_features.append(feature_name)

In [6]:
acoustic_features
len(acoustic_features)

40

In [7]:
X = df[acoustic_features].copy()
y = df['Category'].map({'ALS': 1, 'HC': 0})

In [8]:
X = X.fillna(X.mean())

In [9]:
X['Age'] = df['Age (years)']
X['Sex'] = df['Sex'].map({'M': 1, 'F': 0})

In [10]:
for sound in vowels + syllables:
    X[f'jitter_shimmer_ratio_{sound}'] = X[f'localJitter_{sound}'] / X[f'localShimmer_{sound}']
    X[f'f0_variability_{sound}'] = X[f'stdevF0Hz_{sound}'] / X[f'meanF0Hz_{sound}']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [12]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [13]:
models = {
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'SVM': SVC(kernel='rbf', probability=True, random_state=42),
    'XGBoost': XGBClassifier(random_state=42),
    'Neural Network': MLPClassifier(hidden_layer_sizes=(100,50), max_iter=500, random_state=42)
}

In [14]:
results = {}
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    results[name] = accuracy
    print(f"{name} Accuracy: {accuracy:.4f}")
    print(classification_report(y_test, y_pred))

Random Forest Accuracy: 0.7097
              precision    recall  f1-score   support

           0       0.55      0.60      0.57        10
           1       0.80      0.76      0.78        21

    accuracy                           0.71        31
   macro avg       0.67      0.68      0.68        31
weighted avg       0.72      0.71      0.71        31

SVM Accuracy: 0.6774
              precision    recall  f1-score   support

           0       0.50      0.10      0.17        10
           1       0.69      0.95      0.80        21

    accuracy                           0.68        31
   macro avg       0.59      0.53      0.48        31
weighted avg       0.63      0.68      0.60        31

XGBoost Accuracy: 0.6452
              precision    recall  f1-score   support

           0       0.45      0.50      0.48        10
           1       0.75      0.71      0.73        21

    accuracy                           0.65        31
   macro avg       0.60      0.61      0.60        

In [15]:
best_model_name = max(results, key=results.get)
print(f"\nTuning hyperparameters for {best_model_name}...")


Tuning hyperparameters for Random Forest...


In [16]:
best_model_name = max(results, key=results.get)
print(f"\nTuning hyperparameters for {best_model_name}...")

if best_model_name == 'XGBoost':
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.01, 0.1, 0.2]
    }
    grid_search = GridSearchCV(XGBClassifier(random_state=42), param_grid, cv=5, scoring='accuracy')
    
elif best_model_name == 'Random Forest':
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5, 10]
    }
    grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, scoring='accuracy')
    
elif best_model_name == 'SVM':
    param_grid = {
        'C': [0.1, 1, 10, 100],
        'gamma': ['scale', 'auto', 0.1, 0.01]
    }
    grid_search = GridSearchCV(SVC(kernel='rbf', probability=True, random_state=42), param_grid, cv=5, scoring='accuracy')
    
else:  # Neural Network
    param_grid = {
        'hidden_layer_sizes': [(50,), (100,), (100,50)],
        'alpha': [0.0001, 0.001, 0.01],
        'learning_rate_init': [0.001, 0.01]
    }
    grid_search = GridSearchCV(MLPClassifier(max_iter=500, random_state=42), param_grid, cv=5, scoring='accuracy')

grid_search.fit(X_train_scaled, y_train)
print(f"Best parameters: {grid_search.best_params_}")
best_model = grid_search.best_estimator_
best_pred = best_model.predict(X_test_scaled)
best_accuracy = accuracy_score(y_test, best_pred)
print(f"Tuned {best_model_name} Accuracy: {best_accuracy:.4f}")
print(classification_report(y_test, best_pred))


Tuning hyperparameters for Random Forest...
Best parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}
Tuned Random Forest Accuracy: 0.6774
              precision    recall  f1-score   support

           0       0.50      0.60      0.55        10
           1       0.79      0.71      0.75        21

    accuracy                           0.68        31
   macro avg       0.64      0.66      0.65        31
weighted avg       0.70      0.68      0.68        31



In [17]:
import joblib
joblib.dump(best_model, 'als_detection_model.pkl')
joblib.dump(scaler, 'als_detection_scaler.pkl')

['als_detection_scaler.pkl']

In [29]:
def predict_als(new_data):
    """
    Predict ALS from acoustic features
    
    Parameters:
    new_data (dict): Dictionary with acoustic features
    
    Returns:
    tuple: (prediction, probability, result_string)
    """
    # Convert to DataFrame
    new_df = pd.DataFrame([new_data])
    
    # Fill missing values
    new_df = new_df.fillna(X.mean())
    
    # Add engineered features for each sound
    for sound in vowels + syllables:
        new_df[f'jitter_shimmer_ratio_{sound}'] = new_df[f'localJitter_{sound}'] / new_df[f'localShimmer_{sound}']
        new_df[f'f0_variability_{sound}'] = new_df[f'stdevF0Hz_{sound}'] / new_df[f'meanF0Hz_{sound}']
    
    # Ensure columns match training data
    missing_cols = set(X.columns) - set(new_df.columns)
    for col in missing_cols:
        new_df[col] = X[col].mean()  # Fill with mean values from training data
    
    new_df = new_df[X.columns]  # Reorder columns to match training data
    
    # Scale features
    new_scaled = scaler.transform(new_df)
    
    # Predict
    prediction = best_model.predict(new_scaled)[0]
    probability = best_model.predict_proba(new_scaled)[0][1]
    
    # Generate result string
    result = "ALS detected" if prediction == 1 else "No ALS detected"
    
    return prediction, probability, result

In [19]:
import parselmouth
import glob
import os
import pandas as pd
from parselmouth.praat import call

In [20]:
def measurePitch(sound, f0min=75, f0max=500, unit="Hertz"):
    """Extract acoustic features from a sound object"""
    pitch = call(sound, "To Pitch", 0.0, f0min, f0max)
    meanF0 = call(pitch, "Get mean", 0, 0, unit)
    stdevF0 = call(pitch, "Get standard deviation", 0, 0, unit)
    harmonicity = call(sound, "To Harmonicity (cc)", 0.01, 75, 0.1, 1.0)
    hnr = call(harmonicity, "Get mean", 0, 0)
    pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
    localJitter = call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
    localShimmer = call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    
    return meanF0, stdevF0, hnr, localJitter, localShimmer

In [48]:
folder_path = "test_voice/no_als"

In [49]:
wav_files = glob.glob(os.path.join(folder_path, "*.wav"))

In [50]:
features = {}

# Mapping for sound types
sound_mapping = {
    'phonationA': 'A',
    'phonationE': 'E',
    'phonationI': 'I',
    'phonationO': 'O',
    'phonationU': 'U',
    'rhythmKA': 'KA',
    'rhythmPA': 'PA',
    'rhythmTA': 'TA'
}


In [51]:
for wav_file in wav_files:
    try:
        sound = parselmouth.Sound(wav_file)
        meanF0, stdevF0, hnr, localJitter, localShimmer = measurePitch(sound)
        
        # Extract the sound label from filename
        base_name = os.path.basename(wav_file)
        file_parts = base_name.split('_')
        
        # Get the sound type (A, E, I, O, U, PA, TA, KA)
        for key, value in sound_mapping.items():
            if key in base_name:
                sound_type = value
                break
        
        # Store features in dictionary
        features[f'meanF0Hz_{sound_type}'] = meanF0
        features[f'stdevF0Hz_{sound_type}'] = stdevF0
        features[f'HNR_{sound_type}'] = hnr
        features[f'localJitter_{sound_type}'] = localJitter
        features[f'localShimmer_{sound_type}'] = localShimmer
        
        print(f"Processed {base_name} successfully")
    except Exception as e:
        print(f"Error processing {wav_file}: {e}")

Processed CT001_phonationA.wav successfully
Processed CT001_phonationE.wav successfully
Processed CT001_phonationI.wav successfully
Processed CT001_phonationO.wav successfully
Processed CT001_phonationU.wav successfully
Processed CT001_rhythmKA.wav successfully
Processed CT001_rhythmPA.wav successfully
Processed CT001_rhythmTA.wav successfully


In [52]:
# Add after existing feature extraction code
features['Age'] = 50  # Default age value
features['Sex'] = 1   # Default to male (1) or 0 for female

In [53]:
print("\nExtracted features:")
for key, value in features.items():
    print(f"{key}: {value}")


Extracted features:
meanF0Hz_A: 178.21550084384813
stdevF0Hz_A: 1.7800228581107524
HNR_A: 17.10114757449144
localJitter_A: 0.003916510772002931
localShimmer_A: 0.10771010582614468
meanF0Hz_E: 188.70480320952183
stdevF0Hz_E: 5.1026171350033716
HNR_E: 15.753448452274073
localJitter_E: 0.004096614037388618
localShimmer_E: 0.0809132701141244
meanF0Hz_I: 195.08696386765288
stdevF0Hz_I: 2.5452836578815172
HNR_I: 24.789677439188686
localJitter_I: 0.0022795998295045954
localShimmer_I: 0.038205745571530746
meanF0Hz_O: 193.33143706379306
stdevF0Hz_O: 1.6491704069794209
HNR_O: 20.352912413989788
localJitter_O: 0.002307045670463888
localShimmer_O: 0.04459131193962331
meanF0Hz_U: 199.12141891503242
stdevF0Hz_U: 5.086864722879646
HNR_U: 30.954966643168046
localJitter_U: 0.00159987416843971
localShimmer_U: 0.023504165214585638
meanF0Hz_KA: 171.1639262574513
stdevF0Hz_KA: 17.065878907071614
HNR_KA: 9.622981708901271
localJitter_KA: 0.01764665708456529
localShimmer_KA: 0.1015362704135749
meanF0Hz_PA: 

In [46]:
len(features)

42

In [54]:
prediction, probability, result = predict_als(features)
print(f"\nPrediction Result: {result}")
print(f"Probability of ALS: {probability:.2f}")
print(f"Binary Prediction: {prediction}")


Prediction Result: No ALS detected
Probability of ALS: 0.18
Binary Prediction: 0
