## Data Visualizations

In [None]:
# Installing the necessary libraries for audio visualization
!pip install librosa
!pip install IPython

In [None]:
# Getting an audio file
filename='Healthy/1.wav'

In [None]:
# Importing the necessary libraries for visualizations
import IPython.display as ipd
import librosa
import librosa.display
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Visualizing and playing one audio
plt.figure(figsize=(8,2))
data, sample_rate=librosa.load(filename)
librosa.display.waveshow(data, sr=sample_rate)
ipd.Audio(filename)

In [None]:
# Getting the sample rate
sample_rate

In [None]:
from scipy.io import wavfile as wav
wave_sample_rate, wave_audio = wav.read(filename)

In [None]:
wave_sample_rate

In [None]:
wave_audio

In [None]:
data

## Data Pre-processing

In [None]:
# Importing the necessary libraries
import os
import numpy as np
import soundfile as sf
import shutil

# Function to load and play the audio
def load_play(filename):
    data, sample_rate = librosa.load(filename)
    display(ipd.Audio(filename))
    return data, sample_rate

# Adding gaussian noise to the audio
def add_gaussian_noise(audio_data, stddev=0.01):
    noise = np.random.normal(0, stddev, audio_data.shape)
    noisy_audio = audio_data + noise
    return noisy_audio

# Saving the audio files
def save_audio(file_path, audio_data, sample_rate):
    sf.write(file_path, audio_data, sample_rate)

# Function to copy original audios the new folder
def copy(folder_path, output_folder):
    files = [f for f in os.listdir(folder_path) if f.endswith('.wav')]

    for file_name in files:
        file_path = os.path.join(folder_path, file_name)
        output_file_path = os.path.join(output_folder, file_name)

        shutil.copy(file_path, output_file_path)


# Processing all audio files
def process_audio(folder_path, output_folder, stddev=0.01):
    files = [f for f in os.listdir(folder_path) if f.endswith('.wav')]

    category = os.path.basename(folder_path)

    for file_name in files:
        file_path = os.path.join(folder_path, file_name)

        # Displaying the original audio
        print(f"o_audio ({category}): {file_name}")
        data, sample_rate = load_play(file_path)

        # Adding Gaussian noise to the audio
        noisy_data = add_gaussian_noise(data, stddev)

        # Saving the noisy audio to a new folder
        noisy_file_name = f'n_{file_name}'
        noisy_file_path = os.path.join(output_folder, noisy_file_name)
        save_audio(noisy_file_path, noisy_data, sample_rate)

        # Play the noisy audio
        print(f"n_audio ({category}): {noisy_file_name}")
        load_play(noisy_file_path)


# Example usage
healthy_data = 'Healthy'
unhealthy_data = 'Unhealthy'
healthy_audio = 'new_healthy_data'
unhealthy_audio = 'new_unhealthy_data'

# Creating new folders to save the audios in case they don't exist
os.makedirs(healthy_audio, exist_ok=True)
os.makedirs(unhealthy_audio, exist_ok=True)

# Copying the original audio files to the new folders
copy(healthy_data, healthy_audio)
copy(unhealthy_data, unhealthy_audio)

# Processing the audio files and saving noisy audio
process_audio(healthy_data, healthy_audio, stddev=0.01)
process_audio(unhealthy_data, unhealthy_audio, stddev=0.01)


## Feature Extraction using MFCC

In [None]:
import pandas as pd
from tqdm import tqdm

# Defining folder paths
healthy_data = 'new_healthy_data'
unhealthy_data = 'new_unhealthy_data'

# Extracting MFCC features
def features_extractor(file_path, n_mfcc=40):
    audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
    mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
    # mfccs_scaled_features = (mfccs_scaled_features - np.mean(mfccs_scaled_features)) / np.std(mfccs_scaled_features)
    return mfccs_scaled_features

# Initializing an empty list to store features
extracted_features = []

# Processing healthy audio files
for file_name in tqdm(os.listdir(healthy_data)):
    if file_name.endswith('.wav'):
        file_path = os.path.join(healthy_data, file_name)
        features = features_extractor(file_path)
        extracted_features.append([features, 'healthy'])

# Processing unhealthy audio files
for file_name in tqdm(os.listdir(unhealthy_data)):
    if file_name.endswith('.wav'):
        file_path = os.path.join(unhealthy_data, file_name)
        features = features_extractor(file_path)
        extracted_features.append([features, 'unhealthy'])

# Convert extracted features to a DataFrame
extracted_features_df = pd.DataFrame(extracted_features, columns=['feature', 'label'])

# Save the DataFrame to a CSV file
output_csv = 'audio_features_df.csv'
extracted_features_df.to_csv(output_csv, index=False)

print(f"Features and labels saved to {output_csv}")


In [None]:
### Splitting the dataset into independent and dependent dataset
X=np.array(extracted_features_df['feature'].tolist())
y=np.array(extracted_features_df['label'].tolist())

In [None]:
X.shape

In [None]:
### Label Encoding
### Label Encoder
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
labelencoder=LabelEncoder()
y=to_categorical(labelencoder.fit_transform(y))

## Splitting the dataset

In [None]:
### Train Test Split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
y_train.shape

In [None]:
y_test.shape

In [None]:
# Further split test dataset into validation
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

In [None]:
X_train.shape

In [None]:
X_val.shape

## Building the model

In [None]:
from tensorflow.keras import layers, models
from tensorflow.keras.regularizers import l2

# Building the model
def build_light_vgg11(input_shape=(40, 1, 1), num_classes=2):
    # Input layer
    
    inputs = layers.Input(shape=input_shape)

    # Feature extraction (VGG11-like architecture)
    x = layers.Conv2D(64, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.01))(inputs)
    x = layers.MaxPooling2D(pool_size=(2, 1))(x)
    
    x = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = layers.MaxPooling2D(pool_size=(2, 1))(x)
    
    x = layers.Conv2D(256, (3, 3), padding='same', activation='relu')(x)
    # x = layers.Conv2D(256, (3, 3), padding='same', activation='relu')(x)
    x = layers.MaxPooling2D(pool_size=(2, 1))(x)
    
    x = layers.Conv2D(512, (3, 3), padding='same', activation='relu')(x)
    #x = layers.Conv2D(512, (3, 3), padding='same', activation='relu')(x)
    x = layers.MaxPooling2D(pool_size=(2, 1))(x)
    
    # Custom lightweight block
    x = layers.Conv2D(128, (1, 1), activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(1, (1, 1), activation='relu')(x)
    x = layers.BatchNormalization()(x)
    
    # Global average pooling and sigmoid activation
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='sigmoid')(x)

    # Build and compile the model
    model = models.Model(inputs=inputs, outputs=outputs)
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

# Instantiate the model
model = build_light_vgg11(input_shape=(40, 1, 1), num_classes=2)
model.summary()

In [None]:
# Reshaping the data
X_train = X_train.reshape(-1, 40, 1, 1)
X_val = X_val.reshape(-1, 40, 1, 1)
X_test = X_test.reshape(-1, 40, 1, 1)

In [None]:
# More data augmentation
from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)
datagen.fit(X_train)

## Training the model

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

# Callbacks for early stopping and saving the best model
callbacks = [
    ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss', mode='min', verbose=1),
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1)
]

# Training the model
history = model.fit(
    X_train, y_train,
    epochs=40,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=callbacks,
    verbose=1
)

# Evaluating the model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=1)
print(f"Test loss: {test_loss}")
print(f"Test accuracy: {test_acc}")

## Getting the report

In [None]:
from sklearn.metrics import classification_report
import numpy as np

# Getting model predictions on test data
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1) 
y_true = np.argmax(y_test, axis=1)

# Generating classification report
report = classification_report(y_true, y_pred, digits=4)

# Printing the report
print("Classification Report:\n", report)


## Doing some predictions

In [None]:
# Defining class labels
class_labels = ["Healthy", "Unhealthy"]

# Selecting first three test samples
X_test_sample = X_test[:3]  # MFCC features
y_true_sample = np.argmax(y_test[:3], axis=1)  # Convert one-hot to class index

# Get predictions
y_pred_probs = model.predict(X_test_sample)
y_pred = np.argmax(y_pred_probs, axis=1)

# Print out predictions
for i in range(3):
    print(f"audio {i+1}: Predicted = {class_labels[y_pred[i]]}, Actual = {class_labels[y_true_sample[i]]}")
