In [1]:
# Global imports
import os
import numpy as np
import pandas as pd
import imageio
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization, Conv2D, LeakyReLU, MaxPooling2D, Flatten, Dense
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.callbacks import Callback
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import tensorflow_addons as tfa
import torch
import torch.nn as nn
import torch.optim as optim
import torchaudio
from sklearn.model_selection import KFold
import numpy as np


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

 The versions of TensorFlow you are currently using is 2.10.1 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [2]:
import tensorflow as tf
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


# Load dataset


In [4]:
import pickle
def load_data(filename):
    """
    Load data from a pickle file.

    Args:
        filename (str): The path to the pickle file.

    Returns:
        dict: The loaded data dictionary.
    """
    with open(filename, 'rb') as file:
        data_dict_loaded = pickle.load(file)
    return data_dict_loaded

filename = "../Data/D3TEC.pkl"
data_dict_loaded = load_data(filename)
data_dict_loaded

{1: {'Marca temporal': '2023/10/11 11:13:07 a.\xa0m. GMT-6',
  'PHQ-9 Score': 8,
  'Age': 36,
  'Gender': 'Female',
  'Lugar de Residencia': 'Santa Catarina, Nuevo León',
  'Lugar de Procedencia': nan,
  'Social Class': 'Working Class',
  'Institution': 'CAABI',
  'Medicine': 'Forxiga. Atrovastatina.',
  'Physical Condition': 'Diabetes. Fatty Liver.',
  'Mental Health Condition': nan,
  'Depression Diagnosis (level)': nan,
  'PHQ-Binary': 0,
  'audios': {'sm': {'positive': [],
    'negative': [],
    'neutral': [],
    0: {'file_path': 'D:/Github/Improving-deep-neural-networks-to-identify-mental-disorders-using-Neural-Architecture-Search/D3T3C/D3TEC Dataset/SM-27\\153.wav',
     'waveform': array([-3.4575351e-07, -3.7821010e-06, -4.8473012e-06, ...,
            -7.4757336e-06, -4.6938076e-06, -1.4753023e-06], dtype=float32),
     'sample_rate': 22050,
     'spectrogram': array([[-41.41863 , -35.586212, -42.963806, ..., -50.34841 , -42.829468,
             -59.43257 ],
            [-42.

In [3]:
import os
import datetime
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization, Conv2D, LeakyReLU, Flatten, Dense
from sklearn.model_selection import KFold
import pandas as pd

class KFoldCNNTester:
    def __init__(self, data_dict, filename):
        self.X_train = data_dict['X_train']
        self.y_train = data_dict['y_train']
        self.new_df_train = data_dict['new_df_train']
        self.filename = filename
        self.mean_acc_per_fold = []
        self.mean_loss_per_fold = []
        self.mean_precision_per_fold = []
        self.kfold_list = []

    def run_kfold_test(self):
        for fold_it in range(3, 11):
            acc_per_fold = []
            loss_per_fold = []
            precision_per_fold = []

            batch_size = 1
            no_epochs = 100
            verbosity = 2
            num_folds = fold_it
            kfold = KFold(n_splits=num_folds, shuffle=True)

            fold_no = 1

            for train, test in kfold.split(self.X_train, self.y_train):
                model = self.define_model()
                print('------------------------------------------------------------------------')
                print(f'Training for fold {fold_no} ...')

                callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3, min_delta=0.0001)
                history = model.fit(self.X_train[train], self.y_train[train], batch_size=batch_size, epochs=no_epochs, verbose=verbosity, callbacks=[callback])

                scores = model.evaluate(self.X_train[test], self.y_train[test], verbose=0)
                print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%; {model.metrics_names[2]}: {scores[2]}')
                acc_per_fold.append(scores[1] * 100)
                loss_per_fold.append(scores[0])
                precision_per_fold.append(scores[2])

                if scores[1] * 100 > 60:
                    self.save_model_and_data(model, fold_no, scores, num_folds, train)

                fold_no += 1

            self.calculate_means(acc_per_fold, loss_per_fold, precision_per_fold, num_folds)

    def define_model(self):
        model = Sequential([
            BatchNormalization(),
            Conv2D(32, kernel_size=(3, 3), padding='same'),
            LeakyReLU(alpha=0.01),
            BatchNormalization(),
            Conv2D(8, (3, 3), padding='same'),
            LeakyReLU(alpha=0.01),
            BatchNormalization(),
            Conv2D(8, (3, 3), padding='same'),
            LeakyReLU(alpha=0.01),
            BatchNormalization(),
            Conv2D(8, (3, 3), padding='same'),
            LeakyReLU(alpha=0.01),
            Flatten(),
            Dense(64),
            LeakyReLU(alpha=0.01),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.SpecificityAtSensitivity(sensitivity=1)])
        return model

    def save_model_and_data(self, model, fold_no, scores, num_folds, train):
        current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        folder_path = f'{self.filename}_{current_time}/'
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

        subfolder_path = folder_path + f'{num_folds}-fold_{fold_no}-{scores[1]*100}/'
        if not os.path.exists(subfolder_path):
            os.makedirs(subfolder_path)

        model.save(subfolder_path + f'fold-{fold_no}.h5')
        df_to_save = self.new_df_train.iloc[train].copy()
        df_to_save.drop('Spectrogram', axis=1, inplace=True)
        df_to_save.to_csv(subfolder_path + f'train-data-fold-{fold_no}.csv', index=False)

    def calculate_means(self, acc_per_fold, loss_per_fold, precision_per_fold, num_folds):
        mean_acc = sum(acc_per_fold) / len(acc_per_fold)
        mean_loss = sum(loss_per_fold) / len(loss_per_fold)
        mean_precision = sum(precision_per_fold) / len(precision_per_fold)
        self.mean_acc_per_fold.append(mean_acc)
        self.mean_loss_per_fold.append(mean_loss)
        self.mean_precision_per_fold.append(mean_precision)
        self.kfold_list.append(num_folds)

        current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        folder_path = f'{self.filename}_{current_time}/'
        with open(folder_path + 'score.txt', 'w') as file:
            file.write(f'accuracy: {mean_acc}. loss: {mean_loss}. precision: {mean_precision}')