In [None]:
%pip install --upgrade \
    librosa scikit-learn numpy pandas matplotlib \
    gensim torch torchaudio torchvision transformers==4.40.0 \
    accelerate evaluate

In [1]:
import os, glob, random, math, json, warnings
import numpy as np
import pandas as pd
import librosa, librosa.display
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from gensim.models import Word2Vec, KeyedVectors
from transformers import (
    Wav2Vec2Processor,
    Wav2Vec2Model,
    Wav2Vec2ForSequenceClassification,
)

warnings.filterwarnings("ignore")

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
RNG_SEED = 42
random.seed(RNG_SEED)
np.random.seed(RNG_SEED)
torch.manual_seed(RNG_SEED)


2025-05-19 05:04:47.764020: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747631087.790530     148 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747631087.798550     148 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


<torch._C.Generator at 0x7d564809df70>

In [2]:
# Folders
DATA_DIR = "/kaggle/input/voc-als/VOC-ALS/content/VOC-ALS"
       # dataset/{als,control}/*.wav
# Audio
SR = 16_000               # Hz
N_MFCC = 40
MAX_MFCC_FRAMES = 250     # pad/trim frames

# Training
BATCH_SIZE_CNN = 16
EPOCHS_CNN = 10
EPOCHS_W2V2 = 4

In [3]:
meta_df = pd.read_excel("/kaggle/input/voc-als/VOC-ALS (1).xlsx")
meta_df['ID'] = meta_df['ID'].astype(str)

y, paths = [], []
for subdir, dirs, files in os.walk(DATA_DIR):
    for file in files:
        if file.endswith('.wav'):
            filepath = os.path.join(subdir, file)
            participant_id = file.split('_')[0]

            row = meta_df[meta_df['ID'] == participant_id]
            if not row.empty:
                label = 0 if row['Category'].values[0] != 'ALS' else 1
            else:
                label = None

            if label is None:
                continue

            print('ID' ,participant_id, 'label' ,row['Category'].values[0])
            y.append(label)
            paths.append(filepath)

ID PZ017 label ALS
ID CT013 label HC
ID CT053 label HC
ID PZ087 label ALS
ID PZ043 label ALS
ID PZ013 label ALS
ID PZ056 label ALS
ID PZ020 label ALS
ID PZ057 label ALS
ID PZ044 label ALS
ID PZ052 label ALS
ID PZ048 label ALS
ID PZ014 label ALS
ID PZ025 label ALS
ID PZ100 label ALS
ID PZ010 label ALS
ID PZ091 label ALS
ID PZ035 label ALS
ID PZ090 label ALS
ID PZ033 label ALS
ID PZ088 label ALS
ID PZ039 label ALS
ID CT069 label HC
ID PZ110 label ALS
ID PZ108 label ALS
ID CT004 label HC
ID PZ028 label ALS
ID PZ051 label ALS
ID PZ019 label ALS
ID CT039 label HC
ID PZ037 label ALS
ID CT035 label HC
ID PZ103 label ALS
ID CT052 label HC
ID CT061 label HC
ID PZ092 label ALS
ID CT001 label HC
ID PZ060 label ALS
ID CT020 label HC
ID PZ063 label ALS
ID PZ068 label ALS
ID CT037 label HC
ID PZ036 label ALS
ID PZ102 label ALS
ID CT021 label HC
ID CT029 label HC
ID PZ061 label ALS
ID CT050 label HC
ID PZ009 label ALS
ID CT040 label HC
ID PZ112 label ALS
ID PZ089 label ALS
ID CT033 label HC
ID PZ012 

In [4]:
path = np.array(paths)
y = np.array(y)

In [5]:
df = pd.DataFrame(path)
df['label'] = y

In [6]:
df.columns=['path','y']
df

Unnamed: 0,path,y
0,/kaggle/input/voc-als/VOC-ALS/content/VOC-ALS/...,1
1,/kaggle/input/voc-als/VOC-ALS/content/VOC-ALS/...,0
2,/kaggle/input/voc-als/VOC-ALS/content/VOC-ALS/...,0
3,/kaggle/input/voc-als/VOC-ALS/content/VOC-ALS/...,1
4,/kaggle/input/voc-als/VOC-ALS/content/VOC-ALS/...,1
...,...,...
1219,/kaggle/input/voc-als/VOC-ALS/content/VOC-ALS/...,1
1220,/kaggle/input/voc-als/VOC-ALS/content/VOC-ALS/...,0
1221,/kaggle/input/voc-als/VOC-ALS/content/VOC-ALS/...,1
1222,/kaggle/input/voc-als/VOC-ALS/content/VOC-ALS/...,0


In [23]:
def extract_mfcc(path, sr=SR, n_mfcc=N_MFCC, max_frames=MAX_MFCC_FRAMES):
    y, sr = librosa.load(path, sr=sr)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    delta = librosa.feature.delta(mfcc)
    delta2 = librosa.feature.delta(mfcc, order=2)
    feat = np.vstack([mfcc, delta, delta2])       # (3*n_mfcc, T)
    # Pad/trim
    if feat.shape[1] < max_frames:
        feat = np.pad(feat, ((0, 0), (0, max_frames - feat.shape[1])), mode="constant")
    else:
        feat = feat[:, :max_frames]
    return feat.astype(np.float32)                # (120, max_frames)

In [24]:
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
w2v2_base = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h").to(DEVICE)
w2v2_base.eval()

@torch.inference_mode()
def wav2vec_embedding(path):
    speech, _ = librosa.load(path, sr=SR)
    inputs = processor(speech, sampling_rate=SR, return_tensors="pt", padding=True)
    for k in inputs:
        inputs[k] = inputs[k].to(DEVICE)
    hidden = w2v2_base(**inputs).last_hidden_state      # (1, T, 768)
    return hidden.mean(dim=1).cpu().squeeze().numpy()   # (768,)

Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [25]:
%%time
mfcc_features = np.stack([extract_mfcc(p).flatten() for p in df.path])
w2v2_features = np.stack([wav2vec_embedding(p)        for p in df.path])

np.save("mfcc_flat.npy", mfcc_features)
np.save("w2v2_emb.npy", w2v2_features)
np.save("labels.npy", df.y.values)

CPU times: user 1min 30s, sys: 1.25 s, total: 1min 31s
Wall time: 1min 31s


In [26]:
X_mfcc = np.load("mfcc_flat.npy",allow_pickle=True)
X_w2v2 = np.load("w2v2_emb.npy",allow_pickle=True)
y = np.load("labels.npy",allow_pickle=True)

X_mfcc_tr, X_mfcc_te, y_tr, y_te = train_test_split(
    X_mfcc, y, test_size=0.2, random_state=RNG_SEED, stratify=y
)
X_w2v_tr, X_w2v_te, _, _ = train_test_split(
    X_w2v2, y, test_size=0.2, random_state=RNG_SEED, stratify=y
)

In [27]:
rf_mfcc = RandomForestClassifier(n_estimators=400, random_state=RNG_SEED, n_jobs=-1)
rf_mfcc.fit(X_mfcc_tr, y_tr)
pred_rf_mfcc = rf_mfcc.predict(X_mfcc_te)

print("RF MFCC  Accuracy:", accuracy_score(y_te, pred_rf_mfcc))
print(classification_report(y_te, pred_rf_mfcc))

RF MFCC  Accuracy: 0.6653061224489796
              precision    recall  f1-score   support

           0       0.50      0.05      0.09        82
           1       0.67      0.98      0.79       163

    accuracy                           0.67       245
   macro avg       0.59      0.51      0.44       245
weighted avg       0.61      0.67      0.56       245



In [29]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

# Define parameter grid for Random Forest
rf_params = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2']
}

# Create and fit GridSearchCV
rf_grid = GridSearchCV(
    RandomForestClassifier(),
    rf_params,
    cv=5,
    n_jobs=-1,
    verbose=1
)
rf_grid.fit(X_mfcc_tr, y_tr)

print("Random Forest Best params:", rf_grid.best_params_)
print("Random Forest Best score:", rf_grid.best_score_)

Fitting 5 folds for each of 216 candidates, totalling 1080 fits


KeyboardInterrupt: 

In [30]:
lr = LogisticRegression(max_iter=1000)
lr.fit(X_mfcc_tr, y_tr)
pred_lr = lr.predict(X_mfcc_te)

print("RF MFCC  Accuracy:", accuracy_score(y_te, pred_lr))
print(classification_report(y_te, pred_lr))

RF MFCC  Accuracy: 0.6
              precision    recall  f1-score   support

           0       0.40      0.38      0.39        82
           1       0.69      0.71      0.70       163

    accuracy                           0.60       245
   macro avg       0.55      0.54      0.55       245
weighted avg       0.60      0.60      0.60       245



In [31]:
svm_w2v = SVC(kernel="rbf", C=10, gamma="auto")
svm_w2v.fit(X_w2v_tr, y_tr)
pred_svm_w2v = svm_w2v.predict(X_w2v_te)

print("SVM wav2vec  Accuracy:", accuracy_score(y_te, pred_svm_w2v))
print(classification_report(y_te, pred_svm_w2v))

SVM wav2vec  Accuracy: 0.6653061224489796
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        82
           1       0.67      1.00      0.80       163

    accuracy                           0.67       245
   macro avg       0.33      0.50      0.40       245
weighted avg       0.44      0.67      0.53       245



In [None]:
from imblearn.over_sampling import SMOTE

# Apply on training sets ONLY (never on test set!)
smote = SMOTE(random_state=RNG_SEED)

X_w2v_tr_bal, y_tr_bal = smote.fit_resample(X_w2v_tr, y_tr)
X_mfcc_tr_bal, y_tr_bal_mfcc = smote.fit_resample(X_mfcc_tr, y_tr)

In [None]:
# Re-train SVM on Wav2Vec2 features
svm_w2v = SVC(kernel="rbf", C=10, gamma="auto")
svm_w2v.fit(X_w2v_tr_bal, y_tr_bal)
pred_svm_w2v = svm_w2v.predict(X_w2v_te)

print("SVM wav2vec + SMOTE Accuracy:", accuracy_score(y_te, pred_svm_w2v))
print(classification_report(y_te, pred_svm_w2v))

# Re-train RF on MFCC features
rf_mfcc = RandomForestClassifier(n_estimators=400, random_state=RNG_SEED, n_jobs=-1)
rf_mfcc.fit(X_mfcc_tr_bal, y_tr_bal_mfcc)
pred_rf_mfcc = rf_mfcc.predict(X_mfcc_te)

print("RF MFCC + SMOTE Accuracy:", accuracy_score(y_te, pred_rf_mfcc))
print(classification_report(y_te, pred_rf_mfcc))

In [36]:
def extract_mfcc_augmented(filepath, n_mfcc=40, max_len=120):
    y, sr = librosa.load(filepath, sr=None)
    if np.random.rand() < 0.5:  # 50% chance to apply noise
        noise = 0.005 * np.random.randn(len(y))
        y = y + noise
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    if mfcc.shape[1] < max_len:
        pad = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, ((0,0),(0,pad)), mode='constant')
    else:
        mfcc = mfcc[:, :max_len]
    return mfcc


In [37]:
df_tr, df_val = train_test_split(df, test_size=0.2, stratify=df.y, random_state=RNG_SEED)

In [9]:
class MFCC2DDataset(Dataset):
    def __init__(self, df, augment=False):
        self.filepaths = df.path.values
        self.labels = df.y.values
        self.augment = augment

    def __len__(self): return len(self.filepaths)

    def __getitem__(self, idx):
        mfcc = extract_mfcc_augmented(self.filepaths[idx]) if self.augment else extract_mfcc_augmented(self.filepaths[idx])
        mfcc = (mfcc - mfcc.mean()) / (mfcc.std() + 1e-6)
        return torch.tensor(mfcc).unsqueeze(0).float(), torch.tensor(self.labels[idx]).float()


class CNNMFCC(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(), nn.BatchNorm2d(32), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.BatchNorm2d(64), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.BatchNorm2d(128), nn.AdaptiveAvgPool2d((1,1)),
            nn.Flatten(), nn.Dropout(0.3),
            nn.Linear(128, 1)
        )
    def forward(self, x): return self.net(x).squeeze(1)

def train_cnn(df_train, df_val, epochs=EPOCHS_CNN):
    model = CNNMFCC().to(DEVICE)
    train_ds = MFCC2DDataset(df_train, augment=True)
    val_ds   = MFCC2DDataset(df_val, augment=False)
    train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE_CNN, shuffle=True)
    val_dl   = DataLoader(val_ds, batch_size=BATCH_SIZE_CNN)

    # Compute class weights
    pos_weight = torch.tensor([len(df_train[df_train.y == 0]) / len(df_train[df_train.y == 1])]).to(DEVICE)
    loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    opt = optim.Adam(model.parameters(), lr=1e-3)
    for ep in range(epochs):
        model.train()
        for xb, yb in train_dl:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            loss = loss_fn(model(xb), yb)
            opt.zero_grad(); loss.backward(); opt.step()
        # quick val
        model.eval(); preds, gts = [], []
        with torch.no_grad():
            for xb, yb in val_dl:
                preds += (torch.sigmoid(model(xb.to(DEVICE)))>0.5).cpu().tolist()
                gts   += yb.tolist()
        print(f"Epoch {ep+1}: val acc {accuracy_score(gts,preds):.3f}")
    return model

cnn_model = train_cnn(df_tr, df_val)   # uncomment to train


Epoch 1: val acc 0.531
Epoch 2: val acc 0.633
Epoch 3: val acc 0.604
Epoch 4: val acc 0.604
Epoch 5: val acc 0.690
Epoch 6: val acc 0.665
Epoch 7: val acc 0.629
Epoch 8: val acc 0.682
Epoch 9: val acc 0.653
Epoch 10: val acc 0.637


In [None]:
%pip install accelerate

In [None]:
pip install -i https://pypi.org/simple/ bitsandbytes

In [None]:
pip install transformers==4.30.0

In [None]:
%pip install datasets

In [38]:
from datasets import Dataset
data = pd.DataFrame({
    'file_path': paths,
    'labels': y
})
data=Dataset.from_pandas(data)

In [None]:
data

In [None]:
from transformers import Wav2Vec2ForSequenceClassification

model = Wav2Vec2ForSequenceClassification.from_pretrained(
    "facebook/wav2vec2-base-960h",
    num_labels=2
)
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
model=model.to(DEVICE)

In [None]:
import torchaudio
def preprocess(dataset):
    waveform, sr = torchaudio.load(dataset['file_path'])
    if sr != 16000:
        resampler = torchaudio.transforms.Resample(sr, 16000)
        waveform = resampler(waveform)

    dataset['input_values'] = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_values[0]
    return dataset

data=data.map(preprocess)

In [None]:
data = data.train_test_split(test_size=0.2)
data

In [None]:
from transformers import TrainingArguments, Trainer, DataCollatorWithPadding
import os
data_collator = DataCollatorWithPadding(tokenizer=processor, padding=True)


training_args = TrainingArguments(
    output_dir="/content/wav2vec2-als-finetuned",
    eval_strategy="epoch",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=2,
    num_train_epochs=5,
    save_steps=10,
    logging_steps=10,
    learning_rate=1e-5,
    fp16=True,
    report_to="none"
)



trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=data["train"],
    eval_dataset=data["test"],
    tokenizer=processor,
    data_collator=data_collator
)

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"


trainer.train()


In [21]:
# LSTM for Wav2Vec
class Wav2Vec_LSTM(nn.Module):
    def __init__(self, input_dim=768):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, 256, bidirectional=True, batch_first=True)
        self.head = nn.Sequential(
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )
    
    def forward(self, x):
        x = x.unsqueeze(1)  # Add time dim
        lstm_out, _ = self.lstm(x)
        return self.head(lstm_out[:, -1, :]).squeeze(1)

# Simple MLP (for both)
class SimpleMLP(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )
    
    def forward(self, x):
        return self.net(x).squeeze(1)

In [33]:
from torch.utils.data import TensorDataset, DataLoader
import torch
from sklearn.preprocessing import StandardScaler
import numpy as np

# 1. Normalize Wav2Vec2 features (critical)
w2v_scaler = StandardScaler()
X_w2v_tr = w2v_scaler.fit_transform(X_w2v_tr)
X_w2v_te = w2v_scaler.transform(X_w2v_te)

# Convert numpy arrays to PyTorch tensors
X_mfcc_tr_t = torch.FloatTensor(X_mfcc_tr)
X_mfcc_te_t = torch.FloatTensor(X_mfcc_te)
X_w2v_tr_t = torch.FloatTensor(X_w2v_tr)
X_w2v_te_t = torch.FloatTensor(X_w2v_te)
y_tr_t = torch.FloatTensor(y_tr)
y_te_t = torch.FloatTensor(y_te)



# Create datasets
mfcc_train_ds = TensorDataset(X_mfcc_tr_t, y_tr_t)
mfcc_test_ds = TensorDataset(X_mfcc_te_t, y_te_t)
w2v_train_ds = TensorDataset(X_w2v_tr_t, y_tr_t)
w2v_test_ds = TensorDataset(X_w2v_te_t, y_te_t)

# Create dataloaders
batch_size = 64
mfcc_train_loader = DataLoader(mfcc_train_ds, batch_size=batch_size, shuffle=True)
mfcc_test_loader = DataLoader(mfcc_test_ds, batch_size=batch_size)
w2v_train_loader = DataLoader(w2v_train_ds, batch_size=batch_size, shuffle=True)
w2v_test_loader = DataLoader(w2v_test_ds, batch_size=batch_size)

In [34]:
from sklearn.metrics import accuracy_score, f1_score
import torch.optim as optim

def train_and_evaluate(model, train_loader, test_loader, feature_name):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.BCEWithLogitsLoss()
    
    # Training
    for epoch in range(15):
        model.train()
        for X, y in train_loader:
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(X)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()
    
    # Evaluation
    model.eval()
    all_preds, all_true = [], []
    with torch.no_grad():
        for X, y in test_loader:
            X, y = X.to(device), y.to(device)
            preds = torch.sigmoid(model(X)) > 0.5
            all_preds.extend(preds.cpu().numpy())
            all_true.extend(y.cpu().numpy())
    
    acc = accuracy_score(all_true, all_preds)
    f1 = f1_score(all_true, all_preds)
    print(f"{feature_name} - Test Acc: {acc:.4f}, F1: {f1:.4f}")
    return acc, f1

In [35]:
# MFCC Features
print("=== MFCC Features ===")
mlp_mfcc = SimpleMLP(input_dim=X_mfcc_tr.shape[1])
acc_mlp_mfcc, f1_mlp_mfcc = train_and_evaluate(mlp_mfcc, mfcc_train_loader, mfcc_test_loader, "MLP")

# Wav2Vec Features
print("\n=== Wav2Vec Features ===")
models = {
    "MLP": SimpleMLP(input_dim=X_w2v_tr.shape[1]),
    "LSTM": Wav2Vec_LSTM(input_dim=X_w2v_tr.shape[1]),
}

results = {}
for name, model in models.items():
    acc, f1 = train_and_evaluate(model, w2v_train_loader, w2v_test_loader, name)
    results[name] = (acc, f1)

=== MFCC Features ===
MLP - Test Acc: 0.6653, F1: 0.7588

=== Wav2Vec Features ===
MLP - Test Acc: 0.6245, F1: 0.7401
LSTM - Test Acc: 0.6041, F1: 0.7188


In [10]:
# Save BOTH the state_dict AND the model class info
torch.save({
    'model_state_dict': cnn_model.state_dict(),
    'model_class': CNNMFCC  # Store the actual class
}, 'als_model_safe.pt')

In [None]:
pip install gradio

In [40]:
import gradio as gr
import torch
import librosa
import numpy as np
from torch import nn

# 1. Define your model class (must match exactly)
class CNNMFCC(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(), nn.BatchNorm2d(32), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.BatchNorm2d(64), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.BatchNorm2d(128), nn.AdaptiveAvgPool2d((1,1)),
            nn.Flatten(), nn.Dropout(0.3),
            nn.Linear(128, 1)
        )
    def forward(self, x): return self.net(x).squeeze(1)

# 2. Safe loading function
def load_model_safely(path='als_model_safe.pt'):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    checkpoint = torch.load(path, map_location=device, weights_only=False)  # weights_only=False for trusted sources
    
    # Recreate model
    model = checkpoint['model_class']().to(device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    return model

# 3. Load the model
cnn_model = load_model_safely()

def extract_mfcc(filepath, n_mfcc=40, max_len=120):
    y, sr = librosa.load(filepath, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    if mfcc.shape[1] < max_len:
        pad = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, ((0,0),(0,pad)), mode='constant')
    return (mfcc - mfcc.mean()) / (mfcc.std() + 1e-6)

def predict_als(audio_path):
    mfcc = extract_mfcc(audio_path)
    inputs = torch.tensor(mfcc).unsqueeze(0).unsqueeze(0).float().to(DEVICE)
    with torch.no_grad():
        prob = torch.sigmoid(cnn_model(inputs)).item()
    return "ALS Detected" if prob > 0.5 else "Healthy Control"

gr.Interface(
    fn=predict_als,
    inputs=gr.Audio(type="filepath", label="Record or Upload Audio"),
    outputs="text",
    title="ALS Voice Classifier",
    description="Speak or upload a short audio sample to check for ALS indicators.",
    live=True
).launch()

* Running on local URL:  http://127.0.0.1:7866
It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

* Running on public URL: https://7d38290b77a9cb8d9c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


