In [None]:
import os
import librosa
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import collections

# Function to extract features (MFCC, Chroma, Mel Spectrogram, Spectral Contrast)
def extract_features(file_path, max_pad_length=100):
    y, sr = librosa.load(file_path, sr=22050)
    
    
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    mel = librosa.feature.melspectrogram(y=y, sr=sr)
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    
    # Flatten and concatenate all features
    features = np.concatenate([mfcc.flatten(), chroma.flatten(), mel.flatten(), contrast.flatten()])
    
    # Ensure consistent feature length
    pad_width = max_pad_length - features.shape[0]
    if pad_width > 0:
        features = np.pad(features, (0, pad_width), mode='constant')
    else:
        features = features[:max_pad_length]  # Trim to fixed length
    
    return features


data_dirs = {
    "healthy": r"E:\dverse\singam\healthy\healthy_in",  # Update with correct paths
    "parkinson": r"E:\dverse\singam\parkinson\parkinson_in"
}

# Load the dataset
data = []
labels = []

for label, directory in data_dirs.items():
    for file in os.listdir(directory):
        file_path = os.path.join(directory, file)
        try:
            feature = extract_features(file_path)
            data.append(feature)
            labels.append(label)
        except Exception as e:
            print(f"Error processing {file}: {e}")

# Convert to numpy arrays
data = np.array(data)
labels = np.array(labels)

# Check dataset balance
print("Dataset Distribution:", collections.Counter(labels))

# Encode labels
encoder = LabelEncoder()
labels = encoder.fit_transform(labels)  # 0 = Healthy, 1 = Parkinson

# Normalize features
scaler = StandardScaler()
data = scaler.fit_transform(data)

# Split dataset (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Define an improved neural network model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation='relu', input_shape=(data.shape[1],)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),
    
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),

    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Dense(1, activation='sigmoid')  # Binary classification
])

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss='binary_crossentropy', metrics=['accuracy'])

# Train the model 
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

# Function to predict new audio samples
def predict_audio(file_path):
    feature = extract_features(file_path).reshape(1, -1)
    feature = scaler.transform(feature)  
    prediction = model.predict(feature)
    return "Parkinson Present" if prediction > 0.5 else "Healthy"

# Example usage (predict on a new audio file)
new_audio = r"E:\dverse\singam\parkinson\parkinson_in\AH_264Z_593C20CD-0A54-4177-B031-26EE147080A3.wav"
print(predict_audio(new_audio))


Dataset Distribution: Counter({'parkinson': 41, 'healthy': 40})
Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 222ms/step - accuracy: 0.4271 - loss: 0.9672 - val_accuracy: 0.5294 - val_loss: 0.6849
Epoch 2/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - accuracy: 0.5521 - loss: 0.6864 - val_accuracy: 0.7059 - val_loss: 0.6167
Epoch 3/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.7083 - loss: 0.5832 - val_accuracy: 0.7059 - val_loss: 0.5802
Epoch 4/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - accuracy: 0.7812 - loss: 0.4421 - val_accuracy: 0.7059 - val_loss: 0.5564
Epoch 5/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - accuracy: 0.8021 - loss: 0.3876 - val_accuracy: 0.7647 - val_loss: 0.5378
Epoch 6/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.8021 - loss: 0.4145 - val_accuracy: 0.7647 - val_loss: 0.5272
Epoch 7/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[