In [None]:
import os
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential # type: ignore
from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization, Conv1D, MaxPooling1D # type: ignore
from tensorflow.keras.optimizers import Adam # type: ignore
from tensorflow.keras.losses import BinaryCrossentropy # type: ignore
from tensorflow.keras.metrics import BinaryAccuracy # type: ignore
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau # type: ignore
from sincnet_tensorflow import SincConv1D, LayerNorm
import random
import tensorflow_hub as hub 

seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)
random.seed(seed)

In [None]:
def augment_audio(audio_data, sr):
    """Apply augmentation techniques to the audio data."""
    audio_data_augmented = librosa.effects.time_stretch(audio_data, rate=0.8)
    # pitch_shift_amount = np.random.uniform(low=-2.0, high=2.0)
    # audio_data_augmented = librosa.effects.pitch_shift(audio_data_augmented, sr=sr, n_steps=pitch_shift_amount)
    
    mask_start = np.random.randint(0, len(audio_data_augmented) - 50)
    audio_data_augmented[mask_start:mask_start + 50] = 0

    mask_start_freq = np.random.randint(0, int(len(audio_data_augmented) / 2) - 10)
    audio_data_augmented[mask_start_freq:mask_start_freq + 10] = 0

    # new_sr = np.random.randint(16000, 22050)
    # audio_data_augmented = librosa.resample(audio_data_augmented, orig_sr=sr, target_sr=new_sr)
    
    return audio_data_augmented

In [None]:
def extract_features(file_path, augmentation=False):
    """Extract features from audio file, optionally with augmentation."""
    audio_data, sr = librosa.load(file_path, sr=None)
    if augmentation:
        audio_data = augment_audio(audio_data, sr)
    
    mfccs = librosa.feature.mfcc(y=audio_data, sr=sr, n_mfcc=40, n_fft= 512)
    mfccs_mean = np.mean(mfccs.T, axis=0)

    chroma = librosa.feature.chroma_stft(y=audio_data, sr=sr, n_fft=512)
    chroma_mean = np.mean(chroma.T, axis=0)

    mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sr, n_fft= 512)
    mel_spectrogram_mean = np.mean(librosa.power_to_db(mel_spectrogram).T, axis=0)

    spectral_contrast = librosa.feature.spectral_contrast(y=audio_data, sr=sr, n_fft= 512)
    spectral_contrast_mean = np.mean(spectral_contrast.T, axis=0)

    tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(audio_data), sr=sr)
    tonnetz_mean = np.mean(tonnetz.T, axis=0)

    zero_crossing_rate = librosa.feature.zero_crossing_rate(y=audio_data)
    zero_crossing_rate_mean = np.mean(zero_crossing_rate.T, axis=0)

    features = np.hstack([mfccs_mean, chroma_mean, mel_spectrogram_mean, spectral_contrast_mean, tonnetz_mean, zero_crossing_rate_mean])

    return features

In [None]:
def load_data(dataset_path):
    """Load and preprocess data from the dataset path."""
    features = []
    features_original = []

    for label in ['healthy', 'disease']:
        folder_path = os.path.join(dataset_path, label)
        files = [file for file in os.listdir(folder_path) if file.endswith('.wav')]
        for file_name in tqdm(files, desc=f'Processing {label} files'):
            file_path = os.path.join(folder_path, file_name)
            features_original.append({'feature': extract_features(file_path, augmentation=False), 'class': label})
            features.append({'feature': extract_features(file_path, augmentation=False), 'class': label})
            features.append({'feature': extract_features(file_path, augmentation=True), 'class': label})
    
    df_original = pd.DataFrame(features_original)
    df_augmented = pd.DataFrame(features)

    return df_original, df_augmented

# Loading Data

In [None]:
# Path to the dataset folder of vowel /a/ files
dataset_path_a = 'VCC_a'
df_original_a, df_augmented_a = load_data(dataset_path_a)

In [None]:
# Path to the dataset folder of vowel /i/ files
dataset_path_i = 'VCC_i'
df_original_i, df_augmented_i = load_data(dataset_path_i)

In [None]:
# Path to the dataset folder of vowel /u/ files
dataset_path_u = 'VCC_u'
df_original_u, df_augmented_u = load_data(dataset_path_u)

In [None]:
# Path to the dataset folder of vowel /iau/ files
dataset_path_iau = 'VCC_iau'
# df_original_iau, df_augmented_iau = load_data(dataset_path_iau)

# Split the data into Train-Test dataset

In [None]:
# Prepare the data for vowel /a/
X_train_original_a, X_test_original_a, y_train_original_a, y_test_original_a = train_test_split(df_original_a['feature'].values.tolist(), df_original_a['class'].values, test_size=0.5, random_state=42)
X_train_a, X_test_a, y_train_a, y_test_a = train_test_split(df_augmented_a['feature'].values.tolist(), df_augmented_a['class'].values, test_size=0.3, stratify=df_augmented_a['class'].values, random_state=42)

In [None]:
# Prepare the data for vowel /i/
X_train_original_i, X_test_original_i, y_train_original_i, y_test_original_i = train_test_split(df_original_i['feature'].values.tolist(), df_original_i['class'].values, test_size=0.5, random_state=42)
X_train_i, X_test_i, y_train_i, y_test_i = train_test_split(df_augmented_i['feature'].values.tolist(), df_augmented_i['class'].values, test_size=0.3, stratify=df_augmented_i['class'].values, random_state=42)

In [None]:
# Prepare the data for vowel /u/
X_train_original_u, X_test_original_u, y_train_original_u, y_test_original_u = train_test_split(df_original_u['feature'].values.tolist(), df_original_u['class'].values, test_size=0.5, random_state=42)
X_train_u, X_test_u, y_train_u, y_test_u = train_test_split(df_augmented_u['feature'].values.tolist(), df_augmented_u['class'].values, test_size=0.3, stratify=df_augmented_u['class'].values, random_state=42)

In [None]:
# # Prepare the data for vowel /iau/
# X_train_original_iau, X_test_original_iau, y_train_original_iau, y_test_original_iau = train_test_split(df_original_iau['feature'].values.tolist(), df_original_iau['class'].values, test_size=0.5, random_state=42)
# X_train_iau, X_test_iau, y_train_iau, y_test_iau = train_test_split(df_augmented_iau['feature'].values.tolist(), df_augmented_iau['class'].values, test_size=0.2, stratify=df_augmented_iau['class'].values, random_state=42)

# Data conversion to numpy arrays

In [None]:
# Convert the data for vowel /a/ to numpy arrays 
X_train_original_a = np.array(X_train_original_a)
X_test_original_a = np.array(X_test_original_a)
X_train_a = np.array(X_train_a)
X_test_a = np.array(X_test_a)
X_train_a.shape

In [None]:
# Convert the data for vowel /i/ to numpy arrays 
X_train_original_i = np.array(X_train_original_i)
X_test_original_i = np.array(X_test_original_i)
X_train_i = np.array(X_train_i)
X_test_i = np.array(X_test_i)
X_train_i.shape

In [None]:
# Convert the data for vowel /u/ to numpy arrays 
X_train_original_u = np.array(X_train_original_u)
X_test_original_u = np.array(X_test_original_u)
X_train_u = np.array(X_train_u)
X_test_u = np.array(X_test_u)
X_train_u.shape

In [None]:
# # Convert the data for vowel /iau/ to numpy arrays 
# X_train_original_iau = np.array(X_train_original_iau)
# X_test_original_iau = np.array(X_test_original_iau)
# X_train_iau = np.array(X_train_iau)
# X_test_iau = np.array(X_test_iau)
# X_train_iau.shape

# Label Encoding 

In [None]:
# Encode labels for the vowel /a/ data
y_train_original_a = (np.array(y_train_original_a) == 'disease').astype(int)
y_test_original_a = (np.array(y_test_original_a) == 'disease').astype(int)
y_train_a = (np.array(y_train_a) == 'disease').astype(int)
y_test_a = (np.array(y_test_a) == 'disease').astype(int)

In [None]:
# Encode labels for the vowel /i/ data
y_train_original_i = (np.array(y_train_original_i) == 'disease').astype(int)
y_test_original_i = (np.array(y_test_original_i) == 'disease').astype(int)
y_train_i = (np.array(y_train_i) == 'disease').astype(int)
y_test_i = (np.array(y_test_i) == 'disease').astype(int)

In [None]:
# Encode labels for the vowel /u/ data
y_train_original_u = (np.array(y_train_original_u) == 'disease').astype(int)
y_test_original_u = (np.array(y_test_original_u) == 'disease').astype(int)
y_train_u = (np.array(y_train_u) == 'disease').astype(int)
y_test_u = (np.array(y_test_u) == 'disease').astype(int)

In [None]:
# # Encode labels for the vowel /iau/ data
# y_train_original_iau = (np.array(y_train_original_iau) == 'disease').astype(int)
# y_test_original_iau = (np.array(y_test_original_iau) == 'disease').astype(int)
# y_train_iau = (np.array(y_train_iau) == 'disease').astype(int)
# y_test_iau = (np.array(y_test_iau) == 'disease').astype(int)

# Logistic Regression Model

In [None]:
from sklearn.linear_model import LogisticRegression

# Initialize and fit Logistic Regression model for vowel /a/ data
logreg_model_a = LogisticRegression(max_iter=1000)
logreg_model_a.fit(X_train_a, y_train_a)

# Evaluate on test set for vowel /a/ data
logreg_accuracy_a = logreg_model_a.score(X_test_original_a, y_test_original_a)
print(f'Logistic Regression Test Accuracy for vowel /a/ dataset: {logreg_accuracy_a:.4f}')

In [None]:
# Initialize and fit Logistic Regression model for vowel /i/ data
logreg_model_i = LogisticRegression(max_iter=1000)
logreg_model_i.fit(X_train_i, y_train_i)

# Evaluate on test set for vowel /a/ data
logreg_accuracy_i = logreg_model_i.score(X_test_original_i, y_test_original_i)
print(f'Logistic Regression Test Accuracy for vowel /i/ dataset: {logreg_accuracy_i:.4f}')


In [None]:
# Initialize and fit Logistic Regression model for vowel /u/ data
logreg_model_u = LogisticRegression(max_iter=1000)
logreg_model_u.fit(X_train_u, y_train_u)

# Evaluate on test set for vowel /u/ data
logreg_accuracy_u = logreg_model_u.score(X_test_original_u, y_test_original_u)
print(f'Logistic Regression Test Accuracy for vowel /u/ dataset: {logreg_accuracy_u:.4f}')

In [None]:
# # Initialize and fit Logistic Regression model for vowel /iau/ data
# logreg_model_iau = LogisticRegression(max_iter=1000)
# logreg_model_iau.fit(X_train_iau, y_train_iau)

# # Evaluate on test set for vowel /iau/ data
# logreg_accuracy_iau = logreg_model_iau.score(X_test_original_iau, y_test_original_iau)
# print(f'Logistic Regression Test Accuracy for vowel /iau/ dataset: {logreg_accuracy_iau:.4f}')

# Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Initialize and fit Random Forest model for vowel /a/ data
rf_model_a = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model_a.fit(X_train_a, y_train_a)

# Evaluate on test set for vowel /a/ data
rf_accuracy_a = rf_model_a.score(X_test_original_a, y_test_original_a)
print(f'Random Forest Test Accuracy for vowel /a/ dataset: {rf_accuracy_a:.4f}')


In [None]:
# Initialize and fit Random Forest model for vowel /i/ data
rf_model_i = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model_i.fit(X_train_i, y_train_i)

# Evaluate on test set for vowel /i/ data
rf_accuracy_i = rf_model_i.score(X_test_original_i, y_test_original_i)
print(f'Random Forest Test Accuracy for vowel /i/ dataset: {rf_accuracy_i:.4f}')

In [None]:
# Initialize and fit Random Forest model for vowel /u/ data
rf_model_u = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model_u.fit(X_train_u, y_train_u)

# Evaluate on test set for vowel /u/ data
rf_accuracy_u = rf_model_u.score(X_test_original_u, y_test_original_u)
print(f'Random Forest Test Accuracy for vowel /u/ dataset: {rf_accuracy_u:.4f}')

In [None]:
# # Initialize and fit Random Forest model for vowel /iau/ data
# rf_model_iau = RandomForestClassifier(n_estimators=100, random_state=42)
# rf_model_iau.fit(X_train_iau, y_train_iau)

# # Evaluate on test set for vowel /iau/ data
# rf_accuracy_iau = rf_model_iau.score(X_test_original_iau, y_test_original_iau)
# print(f'Random Forest Test Accuracy for vowel /iau/ dataset: {rf_accuracy_iau:.4f}')

# Support Vector Classifier

In [None]:
from sklearn.svm import SVC

# Initialize and fit SVM model for vowel /a/ data
svm_model_a = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_model_a.fit(X_train_a, y_train_a)

# Evaluate on test set for vowel /a/ data
svm_accuracy_a = svm_model_a.score(X_test_original_a, y_test_original_a)
print(f'SVM Test Accuracy for vowel /a/ dataset: {svm_accuracy_a:.4f}')


In [None]:
# Initialize and fit SVM model for vowel /i/ data
svm_model_i = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_model_i.fit(X_train_i, y_train_i)

# Evaluate on test set for vowel /i/ data
svm_accuracy_i = svm_model_i.score(X_test_original_i, y_test_original_i)
print(f'SVM Test Accuracy for vowel /i/ dataset: {svm_accuracy_i:.4f}')

In [None]:
# Initialize and fit SVM model for vowel /u/ data
svm_model_u = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_model_u.fit(X_train_u, y_train_u)

# Evaluate on test set for vowel /u/ data
svm_accuracy_u = svm_model_u.score(X_test_original_u, y_test_original_u)
print(f'SVM Test Accuracy for vowel /u/ dataset: {svm_accuracy_u:.4f}')

In [None]:
# # Initialize and fit SVM model for vowel /iau/ data
# svm_model_iau = SVC(kernel='rbf', C=1.0, gamma='scale')
# svm_model_iau.fit(X_train_iau, y_train_iau)

# # Evaluate on test set for vowel /iau/ data
# svm_accuracy_iau = svm_model_iau.score(X_test_original_iau, y_test_original_iau)
# print(f'SVM Test Accuracy for vowel /iau/ dataset: {svm_accuracy_iau:.4f}')

# Array reshaping for SincNet Model

In [None]:
# Reshape /a/ vowel data for SincNet
X_train_a = X_train_a.reshape(-1, X_train_a.shape[1], 1)
X_test_a = X_test_a.reshape(-1, X_test_a.shape[1], 1)
X_test_original_a = X_test_original_a.reshape(-1, X_test_original_a.shape[1], 1)

In [None]:
# Reshape /i/ vowel data for SincNet
X_train_i = X_train_i.reshape(-1, X_train_i.shape[1], 1)
X_test_i = X_test_i.reshape(-1, X_test_i.shape[1], 1)
X_test_original_i = X_test_original_i.reshape(-1, X_test_original_i.shape[1], 1)

In [None]:
# Reshape /u/ vowel data for SincNet
X_train_u = X_train_u.reshape(-1, X_train_u.shape[1], 1)
X_test_u = X_test_u.reshape(-1, X_test_u.shape[1], 1)
X_test_original_u = X_test_original_u.reshape(-1, X_test_original_u.shape[1], 1)

In [None]:
# # Reshape /iau/ vowel data for SincNet
# X_train_iau = X_train_iau.reshape(-1, X_train_iau.shape[1], 1)
# X_test_iau = X_test_iau.reshape(-1, X_test_iau.shape[1], 1)
# X_test_original_iau = X_test_original_iau.reshape(-1, X_test_original_iau.shape[1], 1)

# SincNet Model

In [None]:
def build_sincnet_model(input_shape):
    model = Sequential([
        SincConv1D(N_filt=80, Filt_dim=11, fs=16000, stride=1, padding="VALID", input_shape=input_shape),
        BatchNormalization(),
        MaxPooling1D(pool_size=3),
        Conv1D(60, kernel_size=5, padding="valid", activation="relu"),
        BatchNormalization(),
        MaxPooling1D(pool_size=3),
        Conv1D(60, kernel_size=5, padding="valid", activation="relu"),
        BatchNormalization(),
        MaxPooling1D(pool_size=3),
        Flatten(),
        Dense(256, activation="relu"),
        Dropout(0.4),
        Dense(128, activation="relu"),
        Dropout(0.4),
        Dense(1, activation="sigmoid")
    ])
    model.compile(optimizer=Adam(learning_rate=0.0001),loss=BinaryCrossentropy(), metrics=[BinaryAccuracy()])
    return model

In [None]:
model_a = build_sincnet_model((X_train_a.shape[1], 1))
model_i = build_sincnet_model((X_train_i.shape[1], 1))
model_u = build_sincnet_model((X_train_u.shape[1], 1))
# model_iau = build_sincnet_model((X_train_iau.shape[1], 1))

# Model Compilation

In [None]:
model_a.summary()
model_i.summary()
model_u.summary()
# model_iau.summary()

In [None]:
# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.000005)

In [None]:
# Train the model
model_a.fit(X_train_a, y_train_a, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping, reduce_lr])
model_i.fit(X_train_i, y_train_i, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping, reduce_lr])
model_u.fit(X_train_u, y_train_u, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping, reduce_lr])
# model_iau.fit(X_train_iau, y_train_iau, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping, reduce_lr])

# Evaluating the model performance

In [None]:
# Evaluate on the test set of /a/ vowel dataset
loss_a, accuracy_a = model_a.evaluate(X_test_original_a, y_test_original_a)
print(f'Test accuracy on original test set of /a/ vowel dataset: {accuracy_a:.4f}')
print()
# Evaluate model on training data of /a/ vowel dataset
train_loss_a, train_accuracy_a = model_a.evaluate(X_train_a, y_train_a)
print(f'Train accuracy on /a/ vowel dataset: {train_accuracy_a:.4f}')

In [None]:
# Evaluate on the test set of /i/ vowel dataset
loss_i, accuracy_i = model_i.evaluate(X_test_original_i, y_test_original_i)
print(f'Test accuracy on original test set of /i/ vowel dataset: {accuracy_i:.4f}')
print()
# Evaluate model on training data of /i/ vowel dataset
train_loss_i, train_accuracy_i = model_i.evaluate(X_train_i, y_train_i)
print(f'Train accuracy on /i/ vowel dataset: {train_accuracy_i:.4f}')

In [None]:
# Evaluate on the test set of /u/ vowel dataset
loss_u, accuracy_u = model_u.evaluate(X_test_original_u, y_test_original_u)
print(f'Test accuracy on original test set of /u/ vowel dataset: {accuracy_u:.4f}')
print()
# Evaluate model on training data of /u/ vowel dataset
train_loss_u, train_accuracy_u = model_u.evaluate(X_train_u, y_train_u)
print(f'Train accuracy on /u/ vowel dataset: {train_accuracy_u:.4f}')

In [None]:
# # Evaluate on the test set of /iau/ vowel dataset
# loss_iau, accuracy_iau = model_iau.evaluate(X_test_original_iau, y_test_original_iau)
# print(f'Test accuracy on original test set of /iau/ vowel dataset: {accuracy_iau:.4f}')
# print()
# # Evaluate model on training data of /iau/ vowel dataset
# train_loss_iau, train_accuracy_iau = model_iau.evaluate(X_train_iau, y_train_iau)
# print(f'Train accuracy on /iau/ vowel dataset: {train_accuracy_iau:.4f}')

# Confusion Matrix

### Test Data

In [None]:
# Confusion Matrix for original test set of vowel /a/ dataset
y_pred_test_a = model_a.predict(X_test_original_a)
y_pred_binary_a = (y_pred_test_a > 0.5).astype(int)
cm_a = confusion_matrix(y_test_original_a, y_pred_binary_a)

plt.figure(figsize=(10, 8))
sns.heatmap(cm_a, annot=True, cmap='Blues', fmt='g', xticklabels=['Healthy', 'Disease'], yticklabels=['Healthy', 'Disease'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix - Original Test Set of vowel /a/ dataset')
plt.show()

In [None]:
# Confusion Matrix for original test set of vowel /i/ dataset
y_pred_test_i = model_i.predict(X_test_original_i)
y_pred_binary_i = (y_pred_test_i > 0.5).astype(int)
cm_i = confusion_matrix(y_test_original_i, y_pred_binary_i)

plt.figure(figsize=(10, 8))
sns.heatmap(cm_i, annot=True, cmap='Blues', fmt='g', xticklabels=['Healthy', 'Disease'], yticklabels=['Healthy', 'Disease'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix - Original Test Set of vowel /i/ dataset')
plt.show()

In [None]:
# Confusion Matrix for original test set of vowel /u/ dataset
y_pred_test_u = model_u.predict(X_test_original_u)
y_pred_binary_u = (y_pred_test_u > 0.5).astype(int)
cm_u = confusion_matrix(y_test_original_u, y_pred_binary_u)

plt.figure(figsize=(10, 8))
sns.heatmap(cm_u, annot=True, cmap='Blues', fmt='g', xticklabels=['Healthy', 'Disease'], yticklabels=['Healthy', 'Disease'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix - Original Test Set of vowel /u/ dataset')
plt.show()

In [None]:
# # Confusion Matrix for original test set of vowel /iau/ dataset
# y_pred_test_iau = model_iau.predict(X_test_original_iau)
# y_pred_binary_iau = (y_pred_test_iau > 0.5).astype(int)
# cm_iau = confusion_matrix(y_test_original_iau, y_pred_binary_iau)

# plt.figure(figsize=(10, 8))
# sns.heatmap(cm_iau, annot=True, cmap='Blues', fmt='g', 
#             xticklabels=['Healthy', 'Disease'], yticklabels=['Healthy', 'Disease'])
# plt.xlabel('Predicted')
# plt.ylabel('True')
# plt.title('Confusion Matrix - Original Test Set of vowel /iau/ dataset')
# plt.show()

### Train data

In [None]:
# Confusion matrix for training data of vowel /a/ dataset
y_pred_train_a = (model_a.predict(X_train_a) > 0.5).astype(int)
cm_train_a = confusion_matrix(y_train_a, y_pred_train_a)

classes = ['Healthy', 'Disease']
plt.figure(figsize=(10, 8))
sns.heatmap(cm_train_a, annot=True, cmap='Blues', fmt='g', xticklabels=classes, yticklabels=classes)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix - Training Data of vowel /a/ dataset')
plt.show()

In [None]:
# Confusion matrix for training data of vowel /i/ dataset
y_pred_train_i = (model_i.predict(X_train_i) > 0.5).astype(int)
cm_train_i = confusion_matrix(y_train_i, y_pred_train_i)

classes = ['Healthy', 'Disease']
plt.figure(figsize=(10, 8))
sns.heatmap(cm_train_i, annot=True, cmap='Blues', fmt='g', xticklabels=classes, yticklabels=classes)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix - Training Data of vowel /i/ dataset')
plt.show()

In [None]:
# Confusion matrix for training data of vowel /u/ dataset
y_pred_train_u = (model_u.predict(X_train_u) > 0.5).astype(int)
cm_train_u = confusion_matrix(y_train_u, y_pred_train_u)

classes = ['Healthy', 'Disease']
plt.figure(figsize=(10, 8))
sns.heatmap(cm_train_u, annot=True, cmap='Blues', fmt='g', xticklabels=classes, yticklabels=classes)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix - Training Data of vowel /u/ dataset')
plt.show()

In [None]:
# # Confusion matrix for training data of vowel /iau/ dataset
# y_pred_train_iau = (model_iau.predict(X_train_iau) > 0.5).astype(int)
# cm_train_iau = confusion_matrix(y_train_iau, y_pred_train_iau)

# classes = ['Healthy', 'Disease']
# plt.figure(figsize=(10, 8))
# sns.heatmap(cm_train_iau, annot=True, cmap='Blues', fmt='g', xticklabels=classes, yticklabels=classes)
# plt.xlabel('Predicted')
# plt.ylabel('True')
# plt.title('Confusion Matrix - Training Data of vowel /iau/ dataset')
# plt.show()