# Imports

In [3]:
# Global imports
import os
import numpy as np
import pandas as pd
import imageio.v2 as imageio
import tensorflow as tf
import tensorflow_addons as tfa
import seaborn as sns
import matplotlib.pyplot as plt
import librosa
import glob
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split, KFold
from tensorflow.keras.callbacks import Callback
from sklearn.metrics import confusion_matrix
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import BatchNormalization, Conv2D, LeakyReLU, MaxPooling2D, Flatten, Dense
import torchaudio


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

 The versions of TensorFlow you are currently using is 2.10.1 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [21]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [22]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import os
from io import BytesIO

class AudioProcessor:
    def __init__(self, num_channels=3):
        self.num_channels = num_channels
    
    def generate_image_from_spectrogram(self, spectrogram):
        fig, ax = plt.subplots(figsize=(7.92, 2.52))  # Ajustar tamaño de figura a 792x252 píxeles
        img = librosa.display.specshow(spectrogram, x_axis='time', y_axis='mel', ax=ax)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)  # Ajustar los márgenes
        ax.margins(0)
        ax.set_axis_off()
        plt.axis('off')  # Turn off axis labels
        buf = BytesIO()
        plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0, transparent=True)
        buf.seek(0)
        img = Image.open(buf)
        
        if self.num_channels == 1:
            img = img.convert('L')  # Convertir a grayscale
        elif self.num_channels == 2:
            img = img.convert('LA')  # Convertir a Luminance + Alpha
        else:
            img = img.convert('RGB')  # Convertir a RGB
        
        img_array = np.array(img.resize((792, 252)))  # Asegurar tamaño de imagen
        
        img.save('path_to_save_spectrogram.png')  # Guardar imagen de espectrograma
        plt.close(fig)
        
        return img_array

    def process_audio_files(self, audio_dir, output_dir):
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        
        for root, _, files in os.walk(audio_dir):
            for file in files:
                if file.endswith('.wav'):
                    audio_path = os.path.join(root, file)
                    y, sr = librosa.load(audio_path, sr=None)
                    spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
                    log_spectrogram = librosa.power_to_db(spectrogram, ref=np.max)
                    
                    output_path = os.path.join(output_dir, os.path.splitext(file)[0] + '.png')
                    self.generate_image_from_spectrogram(log_spectrogram)
                    print(f'Generated spectrogram for {file} and saved to {output_path}')

# Ejemplo de uso
audio_directory = 'path_to_audio_files'
output_directory = 'path_to_save_spectrograms'
processor = AudioProcessor(num_channels=3)
processor.process_audio_files(audio_directory, output_directory)


# Loading the data

## Global data

We first load the CSV file with the data of all participants, then we will use it to make a wider dataset with all the recordings

In [23]:
df = pd.read_csv('../D3TEC Dataset\Dataset.csv')
df.head()

Unnamed: 0,Marca temporal,Participant_ID,PHQ-9 Score,Age,Gender,Lugar de Residencia,Lugar de Procedencia,Social Class,Institution,Medicine,Physical Condition,Mental Health Condition,Depression Diagnosis (level)
0,2023/10/11 11:13:07 a. m. GMT-6,1,8,36,Female,"Santa Catarina, Nuevo León",,Working Class,CAABI,Forxiga. Atrovastatina.,Diabetes. Fatty Liver.,,
1,2023/10/11 1:13:47 p. m. GMT-6,2,1,57,Female,"Santa Catarina, Nuevo León",,Working Class,CAABI,Metformina,,,
2,2023/10/11 1:57:08 p. m. GMT-6,3,3,62,Female,"San Pedro, Nuevo León",,Upper Class,CAABI,Sertrialina,Estenosis Espinal,Ansiedad,
3,2023/10/11 3:02:28 p. m. GMT-6,4,13,48,Female,"Santa Catarina, Nuevo León",,Working Class,CAABI,,,,
4,2023/10/11 4:11:32 p. m. GMT-6,5,7,40,Female,"Santa Catarina, Nuevo León",,Middle Class,CAABI,Homeopatía.,,Moderate Anxiety.,


### Checking types

In [24]:
df.dtypes

Marca temporal                  object
Participant_ID                   int64
PHQ-9 Score                      int64
Age                              int64
Gender                          object
Lugar de Residencia             object
Lugar de Procedencia            object
Social Class                    object
Institution                     object
Medicine                        object
Physical Condition              object
Mental Health Condition         object
Depression Diagnosis (level)    object
dtype: object

### Adding binary classification

In [25]:
# Crear la nueva columna "PHQ-Binary" basada en "PHQ-9 Score"
df['PHQ-Binary'] = df['PHQ-9 Score'].apply(lambda x: 1 if x >= 10 else 0)
df

Unnamed: 0,Marca temporal,Participant_ID,PHQ-9 Score,Age,Gender,Lugar de Residencia,Lugar de Procedencia,Social Class,Institution,Medicine,Physical Condition,Mental Health Condition,Depression Diagnosis (level),PHQ-Binary
0,2023/10/11 11:13:07 a. m. GMT-6,1,8,36,Female,"Santa Catarina, Nuevo León",,Working Class,CAABI,Forxiga. Atrovastatina.,Diabetes. Fatty Liver.,,,0
1,2023/10/11 1:13:47 p. m. GMT-6,2,1,57,Female,"Santa Catarina, Nuevo León",,Working Class,CAABI,Metformina,,,,0
2,2023/10/11 1:57:08 p. m. GMT-6,3,3,62,Female,"San Pedro, Nuevo León",,Upper Class,CAABI,Sertrialina,Estenosis Espinal,Ansiedad,,0
3,2023/10/11 3:02:28 p. m. GMT-6,4,13,48,Female,"Santa Catarina, Nuevo León",,Working Class,CAABI,,,,,1
4,2023/10/11 4:11:32 p. m. GMT-6,5,7,40,Female,"Santa Catarina, Nuevo León",,Middle Class,CAABI,Homeopatía.,,Moderate Anxiety.,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,2024/04/09 5:28:51 p. m. GMT-6,149,12,19,Male,"Monterrey, Nuevo León.","Bogotá, Colombia.",Upper Class,TEC,,,,,1
58,2024/04/10 4:17:26 p. m. GMT-6,150,12,20,Female,"Monterrey, Nuevo León.",San Luis Potosí,Middle Class,TEC,,,,,1
59,2024/04/10 4:18:06 p. m. GMT-6,151,4,20,Male,"Monterrey, Nuevo León.","Monterrey, Nuevo León.",Upper Class,TEC,,,,,0
60,2024/04/10 4:18:50 p. m. GMT-6,152,5,22,Male,"Monterrey, Nuevo León.",Oaxaca de Juárez,Upper Class,TEC,,,,,0


### Treat data frame as dict

In [26]:
df_dict = df.set_index('Participant_ID').to_dict('index')
df_dict

{1: {'Marca temporal': '2023/10/11 11:13:07 a.\xa0m. GMT-6',
  'PHQ-9 Score': 8,
  'Age': 36,
  'Gender': 'Female',
  'Lugar de Residencia': 'Santa Catarina, Nuevo León',
  'Lugar de Procedencia': nan,
  'Social Class': 'Working Class',
  'Institution': 'CAABI',
  'Medicine': 'Forxiga. Atrovastatina.',
  'Physical Condition': 'Diabetes. Fatty Liver.',
  'Mental Health Condition': nan,
  'Depression Diagnosis (level)': nan,
  'PHQ-Binary': 0},
 2: {'Marca temporal': '2023/10/11 1:13:47 p.\xa0m. GMT-6',
  'PHQ-9 Score': 1,
  'Age': 57,
  'Gender': 'Female',
  'Lugar de Residencia': 'Santa Catarina, Nuevo León',
  'Lugar de Procedencia': nan,
  'Social Class': 'Working Class',
  'Institution': 'CAABI',
  'Medicine': 'Metformina',
  'Physical Condition': nan,
  'Mental Health Condition': nan,
  'Depression Diagnosis (level)': nan,
  'PHQ-Binary': 0},
 3: {'Marca temporal': '2023/10/11 1:57:08 p.\xa0m. GMT-6',
  'PHQ-9 Score': 3,
  'Age': 62,
  'Gender': 'Female',
  'Lugar de Residencia': '

### Helper function definition

#### Spectrogram generator

In [27]:
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from io import BytesIO

def extract_question_number(file_name):
    parts = file_name.split('_')
    if len(parts) >= 2:
        try:
            return int(parts[-1].split('.')[0])
        except ValueError:
            return 0
    return 0

def generate_and_save_spectrogram(spectrogram, save_path, num_channels=3):
    fig, ax = plt.subplots(figsize=(7.92, 2.52))  # Ajustar tamaño de figura a 792x252 píxeles
    img = librosa.display.specshow(spectrogram, x_axis='time', y_axis='mel', ax=ax)
    plt.subplots_adjust(left=0, right=1, top=1, bottom=0)  # Ajustar los márgenes
    ax.margins(0)
    ax.set_axis_off()
    plt.axis('off')  # Turn off axis labels
    buf = BytesIO()
    plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0, transparent=True)
    buf.seek(0)
    img = Image.open(buf)
    
    if num_channels == 1:
        img = img.convert('L')  # Convertir a grayscale
    elif num_channels == 2:
        img = img.convert('LA')  # Convertir a Luminance + Alpha
    else:
        img = img.convert('RGB')  # Convertir a RGB
    
    img_array = np.array(img.resize((792, 252)))  # Asegurar tamaño de imagen
    
    img.save(save_path)  # Guardar imagen de espectrograma
    plt.close(fig)

def load_audios_multiple_sources(data_dict, sources):
    """
    Load audios from multiple sources and types into the data_dict using librosa,
    and generate both standard and Mel spectrograms.
    
    Args:
    - data_dict (dict): Dictionary with the participants' information.
    - sources (dict): Dictionary where keys are audio types and values are their respective base paths.
    
    Returns:
    - data_dict (dict): Updated dictionary with the loaded audios and their spectrograms.
    """
    total_sources = len(sources)
    source_count = 0
    for audio_type, base_path in sources.items():
        source_count += 1
        print(f"Processing source {source_count} of {total_sources}: {audio_type}")
        
        audio_files = os.listdir(base_path)
        total_files = len(audio_files)
        processed_files = 0
        
        for file in audio_files:
            if file == '.DS_Store':
                continue
            processed_files += 1
            if processed_files % 10 == 0:
                print(f"Processed {processed_files} of {total_files} files in {audio_type}")
            
            file_path = os.path.join(base_path, file)
            question_number = extract_question_number(file)
            
            audio_time_series, sample_rate = librosa.load(file_path)
            
            # Generate the STFT spectrogram
            stft_spectrogram = librosa.stft(audio_time_series)
            spectrogram_db = librosa.amplitude_to_db(np.abs(stft_spectrogram), ref=np.max)
            
            # Generate the Mel spectrogram
            mel_spectrogram = librosa.feature.melspectrogram(y=audio_time_series, sr=sample_rate)
            mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
            
            # Generate and save the spectrogram image
            spectrogram_save_path = os.path.join(base_path, f"{os.path.splitext(file)[0]}_spectrogram.png")
            generate_and_save_spectrogram(mel_spectrogram_db, spectrogram_save_path)
            
            # Update data_dict with audio and spectrogram info
            for participant_id, info in data_dict.items():
                if 'audios' not in info:
                    info['audios'] = {}
                if audio_type not in info['audios']:
                    info['audios'][audio_type] = {}
                
                info['audios'][audio_type][question_number] = {
                    'file_path': file_path,
                    'waveform': audio_time_series,
                    'sample_rate': sample_rate,
                    'spectrogram': spectrogram_db,
                    'mel_spectrogram': mel_spectrogram_db,
                    'spectrogram_image_path': spectrogram_save_path  # Add the path to the saved spectrogram image
                }
    print("All sources processed.")
    return data_dict



In [None]:

# Define the sources
sources = {
    'sm': '../D3TEC Dataset/SM-27',
    'mobile': '../D3TEC Dataset/iPhoneSE2020'
}

# Load audios from both sources
data_dict_updated = load_audios_multiple_sources(df_dict, sources)

# Verify the content of the updated dictionary
data_dict_updated

In [1]:
data_dict_updated

NameError: name 'data_dict_updated' is not defined

#### Export to pickle to save and load the data

In [4]:
import pickle

def save_data_dict_to_file(filename, data_dict):
    """
    Save the data_dict dictionary to a file using pickle.

    Parameters:
    - filename (str): The name of the file to save the data_dict to.
    - data_dict (dict): The dictionary containing the data to be saved.

    Returns:
    None
    """
    # Open a file in write-binary (wb) mode
    with open(filename, 'wb') as file:
        # Use pickle.dump() to write the data_dict dictionary to the file
        pickle.dump(data_dict, file)

    print(f'data_dict has been saved to {filename}')

# Specify the filename for saving
filename = 'D3TEC.pkl'

# Call the save_data_dict_to_file function to save the data_dict_updated dictionary
save_data_dict_to_file(filename, data_dict_updated)


NameError: name 'data_dict_updated' is not defined

In [5]:
def load_data(filename):
    """
    Load data from a pickle file.

    Args:
        filename (str): The path to the pickle file.

    Returns:
        dict: The loaded data dictionary.
    """
    with open(filename, 'rb') as file:
        data_dict_loaded = pickle.load(file)
    return data_dict_loaded

data_dict_loaded = load_data(filename)
data_dict_loaded

{1: {'Marca temporal': '2023/10/11 11:13:07 a.\xa0m. GMT-6',
  'PHQ-9 Score': 8,
  'Age': 36,
  'Gender': 'Female',
  'Lugar de Residencia': 'Santa Catarina, Nuevo León',
  'Lugar de Procedencia': nan,
  'Social Class': 'Working Class',
  'Institution': 'CAABI',
  'Medicine': 'Forxiga. Atrovastatina.',
  'Physical Condition': 'Diabetes. Fatty Liver.',
  'Mental Health Condition': nan,
  'Depression Diagnosis (level)': nan,
  'PHQ-Binary': 0,
  'audios': {'sm': {0: {'file_path': '../D3TEC Dataset/SM-27\\153.wav',
     'waveform': array([-3.4575351e-07, -3.7821010e-06, -4.8473012e-06, ...,
            -7.4757336e-06, -4.6938076e-06, -1.4753023e-06], dtype=float32),
     'sample_rate': 22050,
     'spectrogram': array([[-41.41863 , -35.586212, -42.963806, ..., -50.34841 , -42.829468,
             -59.43257 ],
            [-42.578403, -34.543472, -38.21392 , ..., -37.266647, -38.4431  ,
             -47.42988 ],
            [-45.220688, -38.50419 , -31.796242, ..., -31.12186 , -36.444393,


#### Testing the spectrograms

In [None]:
import matplotlib.pyplot as plt
import numpy as np

participant_id = 2  # Replace with the actual participant ID
audio_type = 'sm'  # Replace with 'sm' or 'mobile'
question_number = 1  # Replace with the actual question number

# Retrieve both spectrograms
mel_spectrogram_data = data_dict_updated[participant_id]['audios'][audio_type][question_number]['mel_spectrogram']
spectrogram_data = data_dict_updated[participant_id]['audios'][audio_type][question_number]['spectrogram']

# Convert the Mel spectrogram to a numpy array if it's a tensor
if not isinstance(mel_spectrogram_data, np.ndarray):
    mel_spectrogram_data = mel_spectrogram_data.numpy()

# Convert the standard spectrogram to a numpy array if it's a tensor
if not isinstance(spectrogram_data, np.ndarray):
    spectrogram_data = spectrogram_data.numpy()

# Squeeze both spectrograms
mel_spectrogram_data = np.squeeze(mel_spectrogram_data)
spectrogram_data = np.squeeze(spectrogram_data)

# Plot the Mel spectrogram
plt.figure(figsize=(10, 4))
plt.imshow(mel_spectrogram_data, aspect='auto', origin='lower')
plt.title(f'Mel Spectrogram for Participant {participant_id}, {audio_type}, Question {question_number}')
plt.ylabel('Mel Frequency bins')
plt.xlabel('Time frames')
plt.colorbar(format='%+2.0f dB')
plt.show()

# Plot the standard spectrogram
plt.figure(figsize=(10, 4))
plt.imshow(spectrogram_data, aspect='auto', origin='lower')
plt.title(f'Spectrogram for Participant {participant_id}, {audio_type}, Question {question_number}')
plt.ylabel('Frequency bins')
plt.xlabel('Time frames')
plt.colorbar(format='%+2.0f dB')
plt.show()