In [None]:
!pip install malaya-speech

In [None]:
!pip install tensorflow

In [None]:
import pandas as pd
import numpy as np
import librosa as lb
import torch
import os

# **Extract Data**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
DRIVE_PATH = 'drive/MyDrive'
DATA_PATH = 'psychiatric.disorders.ML/wav files'

In [None]:
from zipfile import ZipFile

with ZipFile(f'{DRIVE_PATH}/psychiatric.disorders.ML.zip', 'r') as zObject:
    zObject.extractall()

In [None]:
print("{} recordings total".format(len(os.listdir('/content/psychiatric.disorders.ML/wav files'))))

814 recordings total


In [None]:
participants = pd.read_excel(
    os.path.join(DRIVE_PATH, 'PsychiatricDiscourse_participant.data.xlsx')
)

In [None]:
# depression_only
depression_only = participants.loc[
    (participants['thought.disorder.symptoms'] == 0.) &
    (participants['depression.symptoms'] != 0.)

]
depression_only

Unnamed: 0,ID,group,diagnosis,sex,age,education.level,education.years,depression.symptoms,thought.disorder.symptoms
0,PD-001,patient,schizotypal.disorder,female,19.0,secondary,11,1,0
1,PD-002,patient,bipolar.affective.disorder,female,26.0,higher,17,1,0
3,PD-004,patient,borderline.personality.disorder,female,16.0,secondary,9,1,0
7,PD-008,patient,bipolar.affective.disorder,female,19.0,higher.unfinished,12,1,0
12,PD-013,patient,recurrent.depressive.disorder,female,20.0,higher.unfinished,12,1,0
...,...,...,...,...,...,...,...,...,...
262,PN-313,control,,female,28.0,higher,17,2,0
265,PN-316,control,,female,30.0,higher,16,2,0
268,PN-319,control,,female,27.0,higher,19,1,0
269,PN-320,control,,female,27.0,higher,18,3,0


In [None]:
control_group = participants.loc[
    (participants['depression.symptoms'] == 0.) &
    (participants['thought.disorder.symptoms'] == 0.)
]

control_group

Unnamed: 0,ID,group,diagnosis,sex,age,education.level,education.years,depression.symptoms,thought.disorder.symptoms
2,PD-003,patient,schizotypal.disorder,female,17.0,secondary,10,0,0
4,PD-005,patient,schizotypal.disorder,female,19.0,secondary,11,0,0
5,PD-006,patient,borderline.personality.disorder,female,22.0,higher.unfinished,15,0,0
8,PD-009,patient,schizotypal.disorder,female,17.0,secondary,10,0,0
9,PD-010,patient,bipolar.affective.disorder,female,20.0,higher.unfinished,12,0,0
...,...,...,...,...,...,...,...,...,...
264,PN-315,control,,female,37.0,ученая степень,17,0,0
266,PN-317,control,,male,34.0,ученая степень,18,0,0
267,PN-318,control,,female,31.0,higher,15,0,0
271,PN-322,control,,female,41.0,vocational,13,0,0


# **Process Data**

In [None]:
df = pd.concat([depression_only, control_group])

df.drop(['education.level', 'diagnosis', 'thought.disorder.symptoms', 'group'], axis=1, inplace=True)
df.sex.replace(['female', 'male'], [0, 1], inplace=True)
df.age.fillna(df.age.mean(), inplace=True)
df['age'] = (df['age'] - df['age'].mean()) / df['age'].std()

In [None]:
def get_patient_audio(row, data_folder=DATA_PATH, return_uncomplete=False):
    """
    Find patient's recordings
    """
    key = row.ID
    audio_files = []
    for filename in os.listdir(data_folder):
        if filename.find(key) != -1:
            audio_files.append(filename)
    return audio_files

df['audio'] = df.apply(get_patient_audio, axis=1)

# exclude patients with no recordings
df = df[df.audio.apply(len) > 0]

# reset index since we excluded some patients
df.reset_index(drop=True, inplace=True)

# add new boolean column indicating depression presence
df['depressed'] = pd.Series(df['depression.symptoms'] != 0).astype(int)

In [None]:
print("{} participants with recordings".format(len(df)))

243 participants with recordings


In [None]:
df.sample(5)

Unnamed: 0,ID,sex,age,education.years,depression.symptoms,audio,depressed
162,PD-145,0,1.007173,16,0,"[PD-145-pic-1-winterday.wav, PD-145-pers-1-par...",0
138,PD-079,0,-0.543967,12,0,"[PD-079-pers-1-trip.wav, PD-079-instr-1-chair....",0
21,PD-061,0,-0.543967,12,1,"[PD-061-instr-1-chair.wav, PD-061-pic-1-sports...",1
159,PD-142,0,0.076489,17,0,"[PD-142-instr-1-table.wav, PD-142-pic-1-advent...",0
77,PN-258,0,-0.440558,15,1,"[PN-258-pers-1-trip.wav, PN-258-pic-1-adventur...",1


## **Noise Reduction Demo**

In [None]:
import malaya_speech
import numpy as np
from malaya_speech import Pipeline
import IPython.display as ipd



In [None]:
malaya_speech.utils.available_gpu()

[('GPU:0', '13.929 GB')]

In [None]:
from tensorflow.python.ops.gen_math_ops import sign_eager_fallback
# filename = 'PN-251-pers-1-present.wav'
filename = 'PD-016-pic-1-adventure.wav'

signal, sr = malaya_speech.load(os.path.join(DATA_PATH, filename), sr = 44100)
ipd.Audio(signal[:20 * sr], rate = sr)

In [None]:
quantized_model = malaya_speech.noise_reduction.deep_model(model = 'resnet-unet', quantized = True)
output = quantized_model(signal)



In [None]:
ipd.Audio(output['voice'][:20 * sr], rate = sr)

In [None]:
ipd.Audio(output['noise'][:20 * sr], rate = sr)

## **Procseeing Demo**

In [None]:
spec_img_dir = 'spec_images' # same name defined in f-n get_spectrogram_images
show_progress = False

if not os.path.isdir(spec_img_dir):
    os.makedirs(spec_img_dir)

In [None]:
from torchvision.utils import save_image
from math import ceil
from tensorflow.errors import ResourceExhaustedError


whole_audio = True

noise_reduction_model = malaya_speech.noise_reduction.deep_model(model = 'resnet-unet', quantized = True)
p = Pipeline()

fragments = []

filename = 'PN-251-pers-1-present.wav'
# filename = 'PN-312-pers-1-trip.wav'

# signal, sr = lb.load(os.path.join(DATA_PATH, filename), sr=12000)
signal, sr = malaya_speech.load(os.path.join(DATA_PATH, filename), sr = 44100)

try:
    noise_reduced_signal = noise_reduction_model(signal)['voice']
except ResourceExhaustedError:
    pipeline = (
        p.map(malaya_speech.generator.frames, frame_duration_ms = 15000, sample_rate = sr)
        .foreach_map(noise_reduction_model)
        .foreach_map(lambda x: x['voice'])
        .map(np.concatenate)
    )
    noise_reduced_signal = p(signal)['concatenate']


window_length = 20 # 20 ms
hop_factor = 0.5 # slide window by 0.5 of its length

# this is the number of samples in a window per fft
n_fft = int(sr * window_length / 100) # 20 ms

# The amount of samples we are shifting after each fft
hop_length = int(n_fft * hop_factor)

# remove trailing silence
signal, _ = lb.effects.trim(noise_reduced_signal, top_db=25, frame_length=n_fft, hop_length=hop_length)

mel_signal = lb.feature.melspectrogram(
    y=signal, sr=sr, hop_length=hop_length, n_fft=n_fft, n_mels=128
)

spectrogram = np.abs(mel_signal)
power_to_db = lb.power_to_db(spectrogram, ref=np.max)

# rescale pixel values to [0.0; 1.0] instead of [0; -80] range for torch compatibility
rescale = np.vectorize(lambda x: (x + 80) / 80)
spec_rescaled = rescale(power_to_db)

if whole_audio:
    spec_tensor = torch.from_numpy(spec_rescaled).float()
    img_name = filename.split('.')[0] + '.jpg'
    save_image(
        spec_tensor, 
        fp=os.path.join('spec_images', img_name))
    fragments.append(img_name)
else:
    # split each recording into 15s (1500ms) intervals
    cols_in_interval = int(1500/(window_length * hop_factor))
    splits = np.hsplit(
        spec_rescaled, 
        [cols_in_interval * i for i in range(1, ceil(spec_rescaled.shape[1] / cols_in_interval))])[:-1]

    for idx, split in enumerate(splits):
        # create a torch.tensor from the pd.dataframe
        spec_tensor = torch.from_numpy(split).float()

        split_name = filename.split('.')[0] + f'-{str(idx)}' + '.jpg'

        # save tensor as image for further use
        save_image(
            spec_tensor, 
            fp=os.path.join('spec_images', split_name))
        
        fragments.append(split_name)

fragments



['PN-251-pers-1-present.jpg']

# **Processing**

In [None]:
import os
import torch
from torchvision.utils import save_image
from math import ceil
from tensorflow.errors import ResourceExhaustedError


noise_reduction_model = malaya_speech.noise_reduction.deep_model(model = 'resnet-unet', quantized = True, gpu_limit=0.7)
p = Pipeline()


def get_spectrogram_images(files, data_folder, whole_audio):
    """
    Get spectrogram images
    """
    fragments = []

    for filename in files:
        # signal, sr = lb.load(os.path.join(data_folder, filename), sr=12000)
        signal, sr = malaya_speech.load(os.path.join(DATA_PATH, filename), sr = 44100)
        try:
            noise_reduced_signal = noise_reduction_model(signal)['voice']
        except ResourceExhaustedError:
            pipeline = (
                p.map(malaya_speech.generator.frames, frame_duration_ms = 15000, sample_rate = sr)
                .foreach_map(noise_reduction_model)
                .foreach_map(lambda x: x['voice'])
                .map(np.concatenate)
            )
            noise_reduced_signal = p(signal)['concatenate']


        window_length = 20 # 20 ms
        hop_factor = 0.5 # slide window by 0.5 of its length

        # this is the number of samples in a window per fft
        n_fft = int(sr * window_length / 100) # 20 ms

        # The amount of samples we are shifting after each fft
        hop_length = int(n_fft * hop_factor)

        # remove trailing silence
        signal, _ = lb.effects.trim(noise_reduced_signal, top_db=25, frame_length=n_fft, hop_length=hop_length)

        mel_signal = lb.feature.melspectrogram(
            y=signal, sr=sr, hop_length=hop_length, n_fft=n_fft, n_mels=128
        )

        spectrogram = np.abs(mel_signal)
        power_to_db = lb.power_to_db(spectrogram, ref=np.max)
        
        # rescale pixel values to [0.0; 1.0] instead of [0; -80] range for torch compatibility
        rescale = np.vectorize(lambda x: (x + 80) / 80)
        spec_rescaled = rescale(power_to_db)
        
        if whole_audio:
            spec_tensor = torch.from_numpy(spec_rescaled).float()
            img_name = filename.split('.')[0] + '.jpg'
            save_image(
                spec_tensor, 
                fp=os.path.join('spec_images', img_name))
            
            fragments.append(img_name)

            del spec_tensor
        else:
            # split each recording into 15s (1500ms) intervals
            cols_in_interval = int(1500/(window_length * hop_factor))
            splits = np.hsplit(
                spec_rescaled, 
                [cols_in_interval * i for i in range(1, ceil(spec_rescaled.shape[1] / cols_in_interval))])[:-1]

            for idx, split in enumerate(splits):
                # create a torch.tensor from the pd.dataframe
                spec_tensor = torch.from_numpy(split).float()

                split_name = filename.split('.')[0] + f'-{str(idx)}' + '.jpg'

                # save tensor as image for further use
                save_image(
                    spec_tensor, 
                    fp=os.path.join('spec_images', split_name))
                
                fragments.append(split_name)

                del splits

        del signal, noise_reduced_signal, mel_signal, spectrogram, power_to_db, spec_rescaled
        
    return fragments



In [None]:
!rm -r /content/spec_images/
!rm /content/spec_images.zip

rm: cannot remove '/content/spec_images.zip': No such file or directory


In [None]:
from tqdm import tqdm

spec_img_dir = 'spec_images' # same name defined in f-n get_spectrogram_images
show_progress = False

if not os.path.isdir(spec_img_dir):
    os.makedirs(spec_img_dir)

# showing the progress bar heavily slows down the cycle
if show_progress:
    tqdm.pandas()
    df['audio.fragments'] = df['audio'].progress_apply(get_spectrogram_images, data_folder=DATA_PATH, whole_audio=True)
else:
    df['audio.fragments'] = df['audio'].apply(get_spectrogram_images, data_folder=DATA_PATH, whole_audio=True)

In [None]:
!zip -r /content/spec_images.zip /content/spec_images

  adding: content/spec_images/ (stored 0%)
  adding: content/spec_images/PN-301-pers-1-trip.jpg (deflated 2%)
  adding: content/spec_images/PD-061-pic-1-sportsman.jpg (deflated 2%)
  adding: content/spec_images/PD-073-pers-1-present.jpg (deflated 3%)
  adding: content/spec_images/PD-055-pic-1-winterday.jpg (deflated 1%)
  adding: content/spec_images/PD-122-instr-1-chair.jpg (deflated 2%)
  adding: content/spec_images/PN-011-pers-1-present.jpg (deflated 2%)
  adding: content/spec_images/PD-017-pic-1-sportsman.jpg (deflated 2%)
  adding: content/spec_images/PD-071-pers-1-trip.jpg (deflated 1%)
  adding: content/spec_images/PD-088-instr-1-bench.jpg (deflated 1%)
  adding: content/spec_images/PD-016-pers-1-present.jpg (deflated 2%)
  adding: content/spec_images/PD-013-pers-1-present.jpg (deflated 3%)
  adding: content/spec_images/PN-033-pic-1-sportsman.jpg (deflated 2%)
  adding: content/spec_images/PN-264-pers-1-present.jpg (deflated 1%)
  adding: content/spec_images/PN-004-pic-1-adventur

In [None]:
df.to_pickle('/content/fragment_df.pkl')