In [3]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Function to extract features from audio files
def extract_features(file_path):
    y, sr = librosa.load(file_path)
    mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13), axis=1)
    chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr), axis=1)
    mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr), axis=1)
    features = np.hstack((mfccs, chroma, mel))
    return features

# Function to load data from a folder
def load_data(folder_path, label):
    features = []
    labels = []

    for subfolder in os.listdir(folder_path):
        subfolder_path = os.path.join(folder_path, subfolder)
        if os.path.isdir(subfolder_path):
            for file_name in os.listdir(subfolder_path):
                file_path = os.path.join(subfolder_path, file_name)
                if file_path.endswith('.wav'):
                    feature = extract_features(file_path)
                    features.append(feature)
                    labels.append(label)

    return np.array(features), np.array(labels)

# Input folders
read_text_folder = r"C:\Users\Lenovo\Downloads\PD AUDIO DATA\26-29_09_2017_KCL\ReadText"
spontaneous_dialogue_folder = r"C:\Users\Lenovo\Downloads\PD AUDIO DATA\26-29_09_2017_KCL\SpontaneousDialogue"
# Load data from ReadText folder
read_text_features, read_text_labels = load_data(read_text_folder, label='ReadText')

# Load data from SpontaneousDialogue folder
spontaneous_dialogue_features, spontaneous_dialogue_labels = load_data(spontaneous_dialogue_folder, label='SpontaneousDialogue')

# Combine features and labels for both datasets
all_features = np.vstack((read_text_features, spontaneous_dialogue_features))
all_labels = np.hstack((read_text_labels, spontaneous_dialogue_labels))

# Model Selection (Random Forest Classifier)
model = RandomForestClassifier()

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(all_features, all_labels, test_size=0.2, random_state=42)

# Training
model.fit(X_train, y_train)

# Evaluation
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')


KeyboardInterrupt: 

In [5]:
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
import numpy as np
import os
import librosa

# Function to extract features from audio files
def extract_features(file_path):
    y, sr = librosa.load(file_path)
    mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13), axis=1)
    chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr), axis=1)
    mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr), axis=1)
    features = np.hstack((mfccs, chroma, mel))
    return features

# Function to load data from a folder
def load_data(folder_path, label):
    features = []
    labels = []

    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)
        if file_path.endswith('.wav'):
            feature = extract_features(file_path)
            features.append(feature)
            labels.append(label)

    return np.array(features), np.array(labels)

# Input folders
pd_folder = r"C:\Users\Lenovo\Downloads\PD AUDIO DATA\26-29_09_2017_KCL\ReadText\PD"
healthy_controls_folder = r"C:\Users\Lenovo\Downloads\PD AUDIO DATA\26-29_09_2017_KCL\ReadText\HC"

# Load data from PD patients
pd_features, pd_labels = load_data(pd_folder, label=1)

# Load data from healthy controls
healthy_controls_features, healthy_controls_labels = load_data(healthy_controls_folder, label=0)

# Combine features and labels for both classes
all_features = np.vstack((pd_features, healthy_controls_features))
all_labels = np.hstack((pd_labels, healthy_controls_labels))

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(all_features, all_labels, test_size=0.2, random_state=42)

# Model definition
model = models.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Accuracy on test set: {accuracy * 100:.2f}%')


ModuleNotFoundError: No module named 'tensorflow.python'