In [3]:
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
import librosa.display

# Paths
INPUT_ROOT = "PARK_DATA"  # contains parkinson/ and healthy/
OUTPUT_ROOT = "dataset_spectrograms"
TARGET_DURATION = 5.0   # seconds
SR = 22050              # sampling rate for spectrogram
N_MELS = 128
IMG_SIZE = (224, 224)

os.makedirs(OUTPUT_ROOT, exist_ok=True)

def save_melspec(y, sr, out_path):
    mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=N_MELS)
    mel_db = librosa.power_to_db(mel, ref=np.max)

    plt.figure(figsize=(3, 3))
    librosa.display.specshow(mel_db, sr=sr, cmap="magma")
    plt.axis('off')
    plt.tight_layout()
    plt.savefig(out_path, dpi=100, bbox_inches='tight', pad_inches=0)
    plt.close()

for label in ["PD_segments", "HC_segments"]:
    input_dir = os.path.join(INPUT_ROOT, label)
    output_dir = os.path.join(OUTPUT_ROOT, label)
    os.makedirs(output_dir, exist_ok=True)

    for fname in os.listdir(input_dir):
        if not fname.endswith(".wav"):
            continue

        fpath = os.path.join(input_dir, fname)
        y, _ = librosa.load(fpath, sr=SR)

        # segment into 5-second windows
        window_len = int(TARGET_DURATION * SR)
        total_len = len(y)
        num_segments = total_len // window_len

        for i in range(num_segments):
            start = i * window_len
            end = start + window_len
            segment = y[start:end]

            # Skip segments shorter than 5 seconds
            if len(segment) < window_len:
                continue

            out_name = fname.replace(".wav", f"_{i}.png")
            out_path = os.path.join(output_dir, out_name)

            save_melspec(segment, SR, out_path)

print("✨ Spectrogram generation complete!")


✨ Spectrogram generation complete!


### DATA PREPROCESSING

In [4]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam

# Paths
DATASET_DIR = "dataset_spectrograms"  # contains Healthy_Control/ and Parkinson/
BATCH_SIZE = 16
IMG_SIZE = (224, 224)
EPOCHS = 10   # increase later if needed
LR = 1e-4

# Image data generator with augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
)

train_generator = train_datagen.flow_from_directory(
    DATASET_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training',
    shuffle=True
)

val_generator = train_datagen.flow_from_directory(
    DATASET_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation',
    shuffle=False
)





Found 516 images belonging to 2 classes.
Found 128 images belonging to 2 classes.


### TRAINING

In [5]:
# Load pretrained MobileNetV2 without top
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(*IMG_SIZE, 3))

# Freeze the base model
base_model.trainable = False

# Add custom classification head
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.2)(x)
predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Compile
model.compile(optimizer=Adam(LR), loss='binary_crossentropy', metrics=['accuracy'])

# Train
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS
)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step 


  self._warn_if_super_not_called()


Epoch 1/10
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 926ms/step - accuracy: 0.5795 - loss: 0.7211 - val_accuracy: 0.5703 - val_loss: 0.7037
Epoch 2/10
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 735ms/step - accuracy: 0.6202 - loss: 0.6563 - val_accuracy: 0.6016 - val_loss: 0.6473
Epoch 3/10
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 766ms/step - accuracy: 0.6550 - loss: 0.6481 - val_accuracy: 0.5547 - val_loss: 0.6559
Epoch 4/10
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 789ms/step - accuracy: 0.6977 - loss: 0.5720 - val_accuracy: 0.6250 - val_loss: 0.6320
Epoch 5/10
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 708ms/step - accuracy: 0.6977 - loss: 0.5722 - val_accuracy: 0.6406 - val_loss: 0.6230
Epoch 6/10
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 855ms/step - accuracy: 0.7461 - loss: 0.5393 - val_accuracy: 0.6641 - val_loss: 0.6359
Epoch 7/10
[1m33/33[

### FIE TUNING

In [6]:
# Unfreeze some layers for fine-tuning
base_model.trainable = True
for layer in base_model.layers[:-50]:  # freeze early layers, fine-tune later layers
    layer.trainable = False

# Recompile for fine-tuning
model.compile(optimizer=Adam(LR/10), loss='binary_crossentropy', metrics=['accuracy'])

# Fine-tune
history_ft = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS
)



Epoch 1/10
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 1s/step - accuracy: 0.4593 - loss: 1.2713 - val_accuracy: 0.6094 - val_loss: 0.6664
Epoch 2/10
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 1s/step - accuracy: 0.5097 - loss: 0.9063 - val_accuracy: 0.5859 - val_loss: 0.6899
Epoch 3/10
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 1s/step - accuracy: 0.6298 - loss: 0.6589 - val_accuracy: 0.5625 - val_loss: 0.6841
Epoch 4/10
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 1s/step - accuracy: 0.6996 - loss: 0.5692 - val_accuracy: 0.6250 - val_loss: 0.7081
Epoch 5/10
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 1s/step - accuracy: 0.6783 - loss: 0.5621 - val_accuracy: 0.6016 - val_loss: 0.7185
Epoch 6/10
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 1s/step - accuracy: 0.7267 - loss: 0.5422 - val_accuracy: 0.5859 - val_loss: 0.7757
Epoch 7/10
[1m33/33[0m [32m━━━━━━━━━━

### YAMNET INITIALIZATION

In [8]:
!pip install tensorflow tensorflow-hub



  You can safely remove it manually.
  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
label-studio 1.21.0 requires numpy<2.0.0,>=1.26.4, but you have numpy 2.3.5 which is incompatible.
gradio 5.49.1 requires pillow<12.0,>=8.0, but you have pillow 12.0.0 which is incompatible.
gradio 5.49.1 requires pydantic<2.12,>=2.0, but you have pydantic 2.12.4 which is incompatible.
numba 0.61.0 requires numpy<2.2,>=1.24, but you have numpy 2.3.5 which is incompatible.
streamlit 1.45.1 requires cachetools<6,>=4.0, but you have cachetools 6.2.2 which is incompatible.
streamlit 1.45.1 requires packaging<25,>=20, but you have packaging 25.0 which is incompatible.
streamlit 1.45.1 requires pillow<12,>=7.1.0, but you have pillow 12.0.0 which is incompatible.



Collecting tensorflow-hub
  Downloading tensorflow_hub-0.16.1-py2.py3-none-any.whl.metadata (1.3 kB)
Collecting numpy>=1.26.0 (from tensorflow)
  Downloading numpy-2.3.5-cp313-cp313-win_amd64.whl.metadata (60 kB)
Collecting tf-keras>=2.14.1 (from tensorflow-hub)
  Downloading tf_keras-2.20.1-py3-none-any.whl.metadata (1.8 kB)
Collecting tensorflow
  Downloading tensorflow-2.20.0-cp313-cp313-win_amd64.whl.metadata (4.6 kB)
Downloading tensorflow_hub-0.16.1-py2.py3-none-any.whl (30 kB)
Downloading numpy-2.3.5-cp313-cp313-win_amd64.whl (12.8 MB)
   ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
    --------------------------------------- 0.3/12.8 MB ? eta -:--:--
   --- ------------------------------------ 1.0/12.8 MB 3.7 MB/s eta 0:00:04
   ---- ----------------------------------- 1.6/12.8 MB 3.4 MB/s eta 0:00:04
   ------- -------------------------------- 2.4/12.8 MB 3.6 MB/s eta 0:00:03
   --------- ------------------------------ 3.1/12.8 MB 3.7 MB/s eta 0:00:03
  

In [9]:
import tensorflow as tf
import tensorflow_hub as hub
import librosa
import numpy as np
import os

# Paths
  # Healthy_Control/ and Parkinson/
SR = 16000  # YAMNet sample rate
BATCH_SIZE = 16
EPOCHS = 10

# Load YAMNet from TF Hub
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(yamnet_model_handle)

# Helper: extract embeddings from YAMNet
def extract_embedding(waveform):
    """Returns embeddings for the audio waveform using YAMNet"""
    scores, embeddings, spectrogram = yamnet_model(waveform)
    return embeddings  # shape: (num_patches, 1024)




  from pkg_resources import parse_version















### DATA LOADING

In [12]:
# Load your audio dataset and labels
DATASET_DIR = "PARK_DATA"
def load_dataset(dataset_dir):
    X = []
    y = []
    label_map = {'HC_segments': 0, 'PD_segments': 1}
    for label_name, label_id in label_map.items():
        folder = os.path.join(dataset_dir, label_name)
        for fname in os.listdir(folder):
            if not fname.endswith('.wav'):
                continue
            fpath = os.path.join(folder, fname)
            waveform, _ = librosa.load(fpath, sr=SR, mono=True)
            embedding = extract_embedding(waveform)
            X.append(embedding)
            y.append(np.full((embedding.shape[0],), label_id))  # one label per frame
    X = np.concatenate(X, axis=0)
    y = np.concatenate(y, axis=0)
    return X, y

print("Extracting embeddings...")
X, y = load_dataset(DATASET_DIR)
print("Embedding shape:", X.shape)

Extracting embeddings...
Embedding shape: (6956, 1024)


### MODEL TRAINING

In [14]:

# Build a simple classifier on top
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(1024,)),  # YAMNet embeddings
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train
model.fit(X, y, batch_size=BATCH_SIZE, epochs=30, validation_split=0.2)

# Save for mobile deployment
model.save("yamnet_parkinsons_classifier.h5")


Epoch 1/20
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.7761 - loss: 0.5182 - val_accuracy: 0.2730 - val_loss: 1.4785
Epoch 2/20
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.8145 - loss: 0.4376 - val_accuracy: 0.3455 - val_loss: 1.4671
Epoch 3/20
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.8242 - loss: 0.4134 - val_accuracy: 0.3585 - val_loss: 1.3068
Epoch 4/20
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.8280 - loss: 0.3884 - val_accuracy: 0.3182 - val_loss: 1.3881
Epoch 5/20
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8427 - loss: 0.3601 - val_accuracy: 0.4684 - val_loss: 1.0886
Epoch 6/20
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.8456 - loss: 0.3547 - val_accuracy: 0.4181 - val_loss: 1.2544
Epoch 7/20
[1m348/348[0m 



In [15]:
import os
import random
import numpy as np
import librosa
from sklearn.metrics import accuracy_score

# ----------------------------
# CONFIG
# ----------------------------
DATASET_DIR = "PARK_DATA"  # contains Healthy_Control/ and Parkinson/
SR = 16000  # YAMNet sample rate
BATCH_SIZE = 16
EPOCHS = 10
TEST_SPLIT = 0.1
VAL_SPLIT = 0.1
RANDOM_SEED = 42

# ----------------------------
# LOAD YAMNET
# ----------------------------
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(yamnet_model_handle)

def extract_embedding(waveform):
    """Return embeddings from YAMNet for a waveform"""
    scores, embeddings, spectrogram = yamnet_model(waveform)
    return embeddings.numpy()  # shape: (num_patches, 1024)

# ----------------------------
# LOAD AUDIO FILES & SPLIT
# ----------------------------
label_map = {'HC_segments': 0, 'PD_segments': 1}

# Collect files
all_files = []
for label_name, label_id in label_map.items():
    folder = os.path.join(DATASET_DIR, label_name)
    for fname in os.listdir(folder):
        if fname.endswith('.wav'):
            all_files.append((os.path.join(folder, fname), label_id))

# Shuffle
random.seed(RANDOM_SEED)
random.shuffle(all_files)

# Split
num_total = len(all_files)
num_test = int(TEST_SPLIT * num_total)
num_val = int(VAL_SPLIT * num_total)
num_train = num_total - num_val - num_test

train_files = all_files[:num_train]
val_files = all_files[num_train:num_train+num_val]
test_files = all_files[num_train+num_val:]

print(f"Train: {len(train_files)}, Val: {len(val_files)}, Test: {len(test_files)}")

# ----------------------------
# HELPER: Prepare data per file
# ----------------------------
def prepare_data(file_list):
    X = []
    y = []
    for fpath, label_id in file_list:
        waveform, _ = librosa.load(fpath, sr=SR, mono=True)
        embedding = extract_embedding(waveform)  # (num_patches, 1024)
        X.append(embedding)
        y.append(np.full((embedding.shape[0],), label_id))
    X = np.concatenate(X, axis=0)
    y = np.concatenate(y, axis=0)
    return X, y

# ----------------------------
X_train, y_train = prepare_data(train_files)
X_val, y_val = prepare_data(val_files)

# ----------------------------
# SAVE MODEL
# ----------------------------



Train: 44, Val: 5, Test: 5
Epoch 1/10
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.7175 - loss: 0.5826 - val_accuracy: 0.5335 - val_loss: 0.8082
Epoch 2/10
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.7702 - loss: 0.4897 - val_accuracy: 0.5296 - val_loss: 0.8508
Epoch 3/10
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.7886 - loss: 0.4618 - val_accuracy: 0.5112 - val_loss: 0.9076
Epoch 4/10
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.7931 - loss: 0.4421 - val_accuracy: 0.4060 - val_loss: 1.1606
Epoch 5/10
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8094 - loss: 0.4165 - val_accuracy: 0.5821 - val_loss: 0.7904
Epoch 6/10
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8163 - loss: 0.4008 - val_accuracy: 0.6347 - val_loss: 0.7060
E

In [16]:
# TRAINING
# ----------------------------


model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(1024,)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=BATCH_SIZE,
    epochs=30
)

Epoch 1/30
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.7231 - loss: 0.5761 - val_accuracy: 0.5913 - val_loss: 0.7113
Epoch 2/30
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.7663 - loss: 0.4973 - val_accuracy: 0.5138 - val_loss: 0.8846
Epoch 3/30
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.7839 - loss: 0.4545 - val_accuracy: 0.5821 - val_loss: 0.7591
Epoch 4/30
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.7956 - loss: 0.4353 - val_accuracy: 0.3876 - val_loss: 1.1575
Epoch 5/30
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8078 - loss: 0.4169 - val_accuracy: 0.5230 - val_loss: 0.8827
Epoch 6/30
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8191 - loss: 0.3946 - val_accuracy: 0.5558 - val_loss: 0.9115
Epoch 7/30
[1m348/348[0m 

<keras.src.callbacks.history.History at 0x26b43487770>

In [17]:

# ----------------------------
# EVALUATION PER AUDIO FILE
# ----------------------------
def evaluate_per_file(model, file_list):
    y_true = []
    y_pred = []
    for fpath, label_id in file_list:
        waveform, _ = librosa.load(fpath, sr=SR, mono=True)
        embedding = extract_embedding(waveform)
        preds = model.predict(embedding)
        file_pred = float(np.mean(preds))  # aggregate over patches
        y_pred.append(int(file_pred >= 0.5))
        y_true.append(label_id)
    acc = accuracy_score(y_true, y_pred)
    return acc

test_acc = evaluate_per_file(model, test_files)
val_acc = evaluate_per_file(model, val_files)

print(f"✅ File-level Validation Accuracy: {val_acc:.4f}")
print(f"✅ File-level Test Accuracy: {test_acc:.4f}")


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
✅ File-level Validation Accuracy: 0.8000
✅ File-level Test Accuracy: 0.8000


In [19]:
# ----------------------------
DATASET_DIR = "PARK_DATA"  # contains Healthy_Control/ and Parkinson/
SR = 16000  # YAMNet sample rate
BATCH_SIZE = 16
EPOCHS = 30
TEST_SPLIT = 0.1
VAL_SPLIT = 0.1
RANDOM_SEED = 42

# ----------------------------
# LOAD YAMNET
# ----------------------------

def extract_embedding(waveform):
    """Return embeddings from YAMNet for a waveform"""
    scores, embeddings, spectrogram = yamnet_model(waveform)
    return embeddings.numpy()  # shape: (num_patches, 1024)

# ----------------------------
# LOAD AUDIO FILES & SPLIT
# ----------------------------
label_map = {'HC_segments': 0, 'PD_segments': 1}

# Collect files
all_files = []
for label_name, label_id in label_map.items():
    folder = os.path.join(DATASET_DIR, label_name)
    for fname in os.listdir(folder):
        if fname.endswith('.wav'):
            all_files.append((os.path.join(folder, fname), label_id))

# Shuffle
random.seed(RANDOM_SEED)
random.shuffle(all_files)

# Split
num_total = len(all_files)
num_test = int(TEST_SPLIT * num_total)
num_val = int(VAL_SPLIT * num_total)
num_train = num_total - num_val - num_test

train_files = all_files[:num_train]
val_files = all_files[num_train:num_train+num_val]
test_files = all_files[num_train+num_val:]

print(f"Train: {len(train_files)}, Val: {len(val_files)}, Test: {len(test_files)}")

# ----------------------------
# HELPER: Prepare embeddings per file
# ----------------------------
def prepare_embeddings(file_list):
    X_list, y_list, file_lengths = [], [], []
    for fpath, label_id in file_list:
        waveform, _ = librosa.load(fpath, sr=SR, mono=True)
        embeddings = extract_embedding(waveform)  # (num_patches, 1024)
        X_list.append(embeddings)
        y_list.append(np.full((embeddings.shape[0],), label_id))
        file_lengths.append(embeddings.shape[0])
    X = np.concatenate(X_list, axis=0)
    y = np.concatenate(y_list, axis=0)
    return X, y, file_lengths
X_train, y_train, train_lengths = prepare_embeddings(train_files)
X_val, y_val, val_lengths = prepare_embeddings(val_files)
# ----------------------------
# BUILD MODEL
# ----------------------------

# ----------------------------



Train: 44, Val: 5, Test: 5


In [20]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(1024,)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# ----------------------------
# TRAIN
# ----------------------------


history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=BATCH_SIZE,
    epochs=EPOCHS
)


Epoch 1/30
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.7177 - loss: 0.5899 - val_accuracy: 0.6255 - val_loss: 0.7361
Epoch 2/30
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.7648 - loss: 0.4972 - val_accuracy: 0.5072 - val_loss: 0.8426
Epoch 3/30
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.7825 - loss: 0.4588 - val_accuracy: 0.5204 - val_loss: 0.8041
Epoch 4/30
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.7923 - loss: 0.4403 - val_accuracy: 0.5913 - val_loss: 0.7421
Epoch 5/30
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.8064 - loss: 0.4249 - val_accuracy: 0.6018 - val_loss: 0.7281
Epoch 6/30
[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.8164 - loss: 0.4019 - val_accuracy: 0.5401 - val_loss: 0.8518
Epoch 7/30
[1m348/348[0m 

In [21]:
# EVALUATE PER AUDIO FILE
# ----------------------------
def evaluate_per_file(model, file_list):
    y_true, y_pred = [], []
    for fpath, label_id in file_list:
        waveform, _ = librosa.load(fpath, sr=SR, mono=True)
        embeddings = extract_embedding(waveform)
        preds = model.predict(embeddings)
        file_pred = float(np.mean(preds))  # aggregate predictions per file
        y_pred.append(int(file_pred >= 0.5))
        y_true.append(label_id)
    acc = accuracy_score(y_true, y_pred)
    return acc

train_acc_file = evaluate_per_file(model, train_files)
val_acc_file = evaluate_per_file(model, val_files)
test_acc_file = evaluate_per_file(model, test_files)

print(f"✅ File-level Train Accuracy: {train_acc_file:.4f}")
print(f"✅ File-level Validation Accuracy: {val_acc_file:.4f}")
print(f"✅ File-level Test Accuracy: {test_acc_file:.4f}")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10

In [22]:
model.save("yamnet_parkinsons_classifier.h5")
print("Model saved! Ready for TFLite conversion.")



Model saved! Ready for TFLite conversion.


In [23]:
model = tf.keras.models.load_model("yamnet_parkinsons_classifier.h5")

# Convert to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the TFLite model
with open("yamnet_parkinsons_classifier.tflite", "wb") as f:
    f.write(tflite_model)

print("✅ TFLite model saved (no quantization).")



INFO:tensorflow:Assets written to: C:\Users\Felix\AppData\Local\Temp\tmps2zawer6\assets


INFO:tensorflow:Assets written to: C:\Users\Felix\AppData\Local\Temp\tmps2zawer6\assets


Saved artifact at 'C:\Users\Felix\AppData\Local\Temp\tmps2zawer6'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 1024), dtype=tf.float32, name='input_layer_5')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  2658929628304: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2658929636944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2658929630992: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2658929627728: TensorSpec(shape=(), dtype=tf.resource, name=None)
✅ TFLite model saved (no quantization).


In [24]:
import tensorflow as tf
import tensorflow_hub as hub

# Load YAMNet
yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')

# Function to get embeddings from waveform
def yamnet_embeddings(waveform):
    scores, embeddings, spectrogram = yamnet_model(waveform)
    return embeddings

# Create a Keras model for TFLite conversion
class YAMNetEmbeddingModel(tf.keras.Model):
    def __init__(self, yamnet):
        super(YAMNetEmbeddingModel, self).__init__()
        self.yamnet = yamnet

    @tf.function(input_signature=[tf.TensorSpec(shape=[None], dtype=tf.float32)])
    def call(self, waveform):
        _, embeddings, _ = self.yamnet(waveform)
        return embeddings

# Instantiate
model = YAMNetEmbeddingModel(yamnet_model)

# Convert to TFLite
converter = tf.lite.TFLiteConverter.from_concrete_functions([model.call.get_concrete_function()])
converter.optimizations = [tf.lite.Optimize.DEFAULT]  # Optional: optimize size/performance
tflite_model = converter.convert()

# Save TFLite model
with open('yamnet_embeddings.tflite', 'wb') as f:
    f.write(tflite_model)

print("✅ YAMNet embeddings TFLite model saved!")




✅ YAMNet embeddings TFLite model saved!
