# Installations

In [1]:
!pip install torch
!pip install pytorch_forecasting
!pip install dtaidistance

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pytorch_forecasting
  Downloading pytorch_forecasting-1.0.0-py3-none-any.whl (140 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m140.4/140.4 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fastapi>=0.80 (from pytorch_forecasting)
  Downloading fastapi-0.95.2-py3-none-any.whl (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.0/57.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting lightning<3.0.0,>=2.0.0 (from pytorch_forecasting)
  Downloading lightning-2.0.2-py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
Collecting optuna<4.0.0,>=3.1.0 (from pytorch_forecasting)
  Downloading optuna-3.1.1-py3-none-any.whl (365 kB

# Main part

In [2]:
import os
import numpy as np
import pandas as pd
import librosa as lb
import matplotlib.pyplot as plt

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
#Global Sample Rate
# SR = 22000
SR = 8000

SEED = 42
np.random.seed(SEED)

DRIVE_PATH = 'drive/MyDrive/psychiatric.disorders.ML'

In [5]:
# participants_info
# participants = pd.read_excel("../Datasets/psychiatric_disorders_data/PsychiatricDiscourse_participant_data.xlsx")

participants = pd.read_excel(os.path.join(DRIVE_PATH, 'PsychiatricDiscourse_participant.data.xlsx'))

In [6]:
# depression_only
depression_only = participants.loc[
    (participants['thought.disorder.symptoms'] == 0.) &
    (participants['depression.symptoms'] != 0.)
]

In [7]:
control_group = participants.loc[
    (participants['depression.symptoms'] == 0.) &
    (participants['thought.disorder.symptoms'] == 0.)
]

In [8]:
df = pd.concat([depression_only, control_group])

In [9]:
def get_patient_audio(row, data_folder=os.path.join(DRIVE_PATH, 'wav files'), return_uncomplete=False):
    key = row.ID
    audio_files = []
    for filename in os.listdir(data_folder):
        if filename.find(key) != -1:
            audio_files.append(filename)
    return audio_files

df['audio'] = df.apply(get_patient_audio, axis=1)

# exclude patients with num of recordings other than 3
df = df[df.audio.apply(len) == 3]

In [10]:
os.path.join(DRIVE_PATH, 'wav files')

'drive/MyDrive/psychiatric.disorders.ML/wav files'

In [11]:
task_mapping = {
    'narrative': ['sportsman', 'adventure', 'winterday'], 
    'story': ['present', 'trip', 'party'], 
    'instruction': ['chair', 'table', 'bench']
}

def get_domain_audio(row, domain):
    files = []
    for topic in task_mapping[domain]:
        for file_name in row.audio:
            if file_name.find(topic) != -1:
                files.append(file_name)
                
    if len(files) > 1:
        print(files)
    # assert len(files) < 2
    return files[0] if len(files) else None
    
    
    
for domain in task_mapping:
    df[f'audio.{domain}'] = df.apply(get_domain_audio, axis=1, domain=domain)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'audio.{domain}'] = df.apply(get_domain_audio, axis=1, domain=domain)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'audio.{domain}'] = df.apply(get_domain_audio, axis=1, domain=domain)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'audio.{domain}'] = df.apply(get_domain_audio, axis=1

In [12]:
df.head()

Unnamed: 0,ID,group,diagnosis,sex,age,education.level,education.years,depression.symptoms,thought.disorder.symptoms,audio,audio.narrative,audio.story,audio.instruction
0,PD-001,patient,schizotypal.disorder,female,19.0,secondary,11,1,0,"[PD-001-pers-1-present.wav, PD-001-instr-1-cha...",PD-001-pic-1-sportsman.wav,PD-001-pers-1-present.wav,PD-001-instr-1-chair.wav
1,PD-002,patient,bipolar.affective.disorder,female,26.0,higher,17,1,0,"[PD-002-pers-1-present.wav, PD-002-instr-1-cha...",PD-002-pic-1-adventure.wav,PD-002-pers-1-present.wav,PD-002-instr-1-chair.wav
3,PD-004,patient,borderline.personality.disorder,female,16.0,secondary,9,1,0,"[PD-004-pic-1-adventure.wav, PD-004-pers-1-pre...",PD-004-pic-1-adventure.wav,PD-004-pers-1-present.wav,PD-004-instr-1-chair.wav
7,PD-008,patient,bipolar.affective.disorder,female,19.0,higher.unfinished,12,1,0,"[PD-008-instr-1-chair.wav, PD-008-pic-1-advent...",PD-008-pic-1-adventure.wav,PD-008-pers-1-present.wav,PD-008-instr-1-chair.wav
12,PD-013,patient,recurrent.depressive.disorder,female,20.0,higher.unfinished,12,1,0,"[PD-013-pic-1-sportsman.wav, PD-013-pers-1-pre...",PD-013-pic-1-sportsman.wav,PD-013-pers-1-present.wav,PD-013-instr-1-chair.wav


In [13]:
df['depression.symptoms'].value_counts()

0    142
1     62
2     22
3     10
Name: depression.symptoms, dtype: int64

In [14]:
#80% training data and 20% test data. Split so that test data will include all types of depression severity

from sklearn.model_selection import StratifiedShuffleSplit

sss = StratifiedShuffleSplit(n_splits=1, test_size = 0.2, train_size = 0.8, random_state = 42)

for (train_index, test_index) in sss.split(df, df['depression.symptoms']):
  train_df = df.iloc[train_index]
  test_df = df.iloc[test_index]

In [15]:
train_df.head()

Unnamed: 0,ID,group,diagnosis,sex,age,education.level,education.years,depression.symptoms,thought.disorder.symptoms,audio,audio.narrative,audio.story,audio.instruction
189,PN-075,control,,male,23.0,vocational,14,2,0,"[PN-075-instr-1-table.wav, PN-075-pers-1-trip....",PN-075-pic-1-adventure.wav,PN-075-pers-1-trip.wav,PN-075-instr-1-table.wav
32,PD-034,patient,bipolar.affective.disorder,female,20.0,higher.unfinished,13,0,0,"[PD-034-instr-1-chair.wav, PD-034-pic-1-advent...",PD-034-pic-1-adventure.wav,PD-034-pers-1-present.wav,PD-034-instr-1-chair.wav
259,PN-306,control,,male,53.0,higher,20,0,0,"[PN-306-instr-1-bench.wav, PN-306-pic-1-winter...",PN-306-pic-1-winterday.wav,PN-306-pers-1-party.wav,PN-306-instr-1-bench.wav
179,PN-054,control,,female,24.0,higher,17,2,0,"[PN-054-pers-1-party.wav, PN-054-instr-1-chair...",PN-054-pic-1-adventure.wav,PN-054-pers-1-party.wav,PN-054-instr-1-chair.wav
84,PD-089,patient,recurrent.depressive.disorder,female,17.0,vocational.unfinished,10,0,0,"[PD-089-pers-1-present.wav, PD-089-instr-1-cha...",PD-089-pic-1-adventure.wav,PD-089-pers-1-present.wav,PD-089-instr-1-chair.wav


In [16]:
test_df.shape

(48, 13)

## Data and train

In [17]:
# from tqdm import tqdm
from tqdm.auto import tqdm
import pytorch_forecasting as ptf
import torch
from pytorch_forecasting import NBeats, TimeSeriesDataSet
from pytorch_forecasting.data import NaNLabelEncoder
import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping
from pytorch_forecasting.metrics import RMSE, 
from pytorch_forecasting.metrics import MASE
from dtaidistance import dtw
from itertools import product
import sklearn.metrics as skm 


import warnings
warnings.filterwarnings('ignore')


pl.seed_everything(42)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

INFO: Global seed set to 42
INFO:lightning.fabric.utilities.seed:Global seed set to 42


cpu


In [18]:
BASE_LEN = 5
BATCH_SIZE = 2

In [19]:
def cut_recordings(data, audio_dur, cutoff_len, min_len = 0):
  res = []
  cnt = 1
  res.append(data[:int(cutoff_len * SR)])
  audio_dur -= cutoff_len

  while(audio_dur > min_len and cnt < 100): # for test less cuts (and cnt < 7)
    res.append(data[int(cutoff_len * SR)*cnt:int(cutoff_len * SR)*(cnt+1)])
    audio_dur -= cutoff_len
    cnt += 1

  return res


def pad_ts(data, max_dur):
  for i in range(len(data)):
      if (len(data[i]) < max_dur):
        data[i] = np.pad(data[i], (max_dur - len(data[i]), 0), 'constant', constant_values=(0,))

  return data


def load_and_preprocess(files, data_folder, cutoff_len = None, min_len=0):
  audio_ts = []

  for filename in files:
      signal, sr = lb.load(os.path.join(data_folder, filename), sr=SR)
      signal, _ = lb.effects.trim(signal, top_db=40)
      audio_ts.append(signal)


  for i in range(len(audio_ts)):
    audio_dur = len(audio_ts[i]) / SR

    audio_ts[i] = cut_recordings(data = audio_ts[i], audio_dur = audio_dur, 
                                 cutoff_len = cutoff_len, min_len = min_len)
    audio_ts[i] = pad_ts(audio_ts[i], cutoff_len*SR)

  upd_df = pd.DataFrame(columns=[f'observations', 'time_idx', 'group'])
  audio_len = len(audio_ts[0][0])


  for i in range(len(audio_ts)):
    for j in range(len(audio_ts[i])):
      if j >= 10:
        tmp_df = pd.DataFrame({'observations':audio_ts[i][j], 'time_idx' : np.arange(audio_len), 'group':[f'{j}_' + files.iloc[i]] * audio_len})
      else:
        tmp_df = pd.DataFrame({'observations':audio_ts[i][j], 'time_idx' : np.arange(audio_len), 'group':[f'0{j}_' + files.iloc[i]] * audio_len})

      upd_df = pd.concat([upd_df, tmp_df], axis=0, ignore_index=True)

  upd_df['time_idx'] = upd_df['time_idx'].astype(int)

  return upd_df


def create_timeSeriesDataSet(df, encoder_len = 60, prediction_len = 60):
  # Replace "." with "_"
  df.columns = [col.replace(".", "_") for col in df.columns]

  # Define the TimeSeriesDataSet object
  training_cutoff = df["time_idx"].max() - prediction_len

  training = TimeSeriesDataSet(
      data=df.loc[lambda x: x.time_idx <= training_cutoff],
      time_idx="time_idx",
      target = "observations",
      group_ids= ["group"],
      max_encoder_length=encoder_len,
      max_prediction_length=prediction_len,
      time_varying_unknown_reals=["observations"],
  )

  validation = TimeSeriesDataSet.from_dataset(training, df, min_prediction_idx=training_cutoff+1)

  return (training, validation)


def create_timeSeriesDataSet_test(df, encoder_len = 60, prediction_len = 60):
  # Replace "." with "_"
  df.columns = [col.replace(".", "_") for col in df.columns]

  # Define the TimeSeriesDataSet object
  training_cutoff = df["time_idx"].max() - prediction_len

  test = TimeSeriesDataSet(
      data=df,
      time_idx="time_idx",
      target = "observations",
      group_ids= ["group"],
      max_encoder_length=encoder_len,
      max_prediction_length=prediction_len,
      time_varying_unknown_reals=["observations"],
      min_prediction_idx=training_cutoff+1,
  )

  return test


def classify_obs(pid, pred, df, data_folder, size_of_pred, audio_ts, stimuli_type):
  min_dist = float('inf')
  most_similar = None

  for filename in df[f'audio.{stimuli_type}']:
    distance_dtw = dtw.distance_fast(pred, audio_ts[filename][-size_of_pred:].astype(np.double))

    if distance_dtw < min_dist:
      min_dist = distance_dtw
      most_similar = filename
    
  return (pid, df[df[f'audio.{stimuli_type}'] == most_similar]['depression.symptoms'].iloc[0], min_dist)

### Baseline

In [None]:
# Fit first 4 observations
files = train_df['audio.narrative'][:4]
data_folder=os.path.join(DRIVE_PATH, 'wav files')

new_df = load_and_preprocess(files, data_folder, cutoff_len = BASE_LEN_f, min_len = BASE_LEN_f)
train_val = create_timeSeriesDataSet(new_df)

train_dataloader = train_val[0].to_dataloader(
    train=True, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
)
# NBeats model
net_nbeats = NBeats.from_dataset(
    train_val[0],
    learning_rate=1e-3,
    optimizer="AdamW",
    backcast_loss_ratio = 0.5,
)
# net_nbeats.to(device)

# Trainer
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=50, verbose=False, mode="min")

In [None]:
# Fit for the rest observations
for i in tqdm(range(0, len(train_df['audio.narrative']), 4)):
  files = train_df['audio.narrative'][i:i+4]
  data_folder=os.path.join(DRIVE_PATH, 'wav files')

  new_df = load_and_preprocess(files, data_folder, cutoff_len = BASE_LEN_f, min_len = BASE_LEN_f)

  train_val = create_timeSeriesDataSet(new_df)

  train_dataloader = train_val[0].to_dataloader(
      train=True, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
  )

  val_dataloader = train_val[1].to_dataloader(
      train=False, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
  )

  trainer = pl.Trainer(
    max_epochs=2, #10 FOR NOW
    accelerator="gpu",
    enable_model_summary=False,
    gradient_clip_val=0.1,
    limit_train_batches=2000,
    callbacks=[early_stop_callback],
    enable_checkpointing=True,
    check_val_every_n_epoch=1,
    # reload_dataloaders_every_n_epochs = 10
  )

  trainer.fit(
    net_nbeats,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
  )

  0%|          | 0/47 [00:00<?, ?it/s]

INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

KeyboardInterrupt: ignored

In [None]:
# net1 = DeepAR.load_from_checkpoint("/content/lightning_logs/version_56/checkpoints/epoch=1-step=4000.ckpt")
# net1.to(device)

# files = df['audio.narrative'][53*4:54*4]
# data_folder=os.path.join(DRIVE_PATH, 'wav files')

# new_df = load_and_preprocess(files, data_folder)
# train_val = create_timeSeriesDataSet(new_df)

# train_dataloader = train_val[0].to_dataloader(
#     train=True, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
# )
# val_dataloader = train_val[1].to_dataloader(
#     train=False, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
# )

# # Trainer
# early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=50, verbose=False, mode="min")
# trainer = pl.Trainer(
#     max_epochs=2, #10 FOR NOW
#     accelerator="gpu",
#     enable_model_summary=False,
#     gradient_clip_val=0.1,
#     limit_train_batches=2000,
#     callbacks=[early_stop_callback],
#     enable_checkpointing=True,
#     check_val_every_n_epoch=1,
#     # reload_dataloaders_every_n_epochs = 10
# )

# trainer.fit(
#   net1,
#   train_dataloaders=train_dataloader,
#   val_dataloaders=val_dataloader,
# )

### Fine-tuning N-Beats

Save models here: "/content/drive/MyDrive/Grid_search_thesis/NBeats_models"

In [None]:
data_folder=os.path.join(DRIVE_PATH, 'wav files')

In [None]:
BATCH_SIZE = 8
LIMIT_TRAIN_BATCHES = 3000
EPOCHS = 3
BASE_LEN_f = 5

In [None]:
PREDICTION_LENGTH = 400 # [200, 400], [400, 800], [800, 1600]. 1 - pred le. 2 - co text le
CONTEXT_LENGTH = 800 
LR = 1e-3
N_BLOCK_LAYERS = [4] # [8] 
N_BLOCKS = [1] # [2], [4]
WIDTH = [512] # [1024], [2048]
BACKCAST_LOSS_RATIO = 0.5 # 0.1

In [None]:
def train_model(model, epochs, stimuli_type):
  # Fit for the rest observations
  criterion = MASE()
  optimizer = torch.optim.AdamW(model.parameters(), lr=LR)

  for i in tqdm(range(0, len(train_df[f'audio.{stimuli_type}']), 4), desc='Files loaded'):
    if (i+4 > len(train_df[f'audio.{stimuli_type}'])):
      files = train_df[f'audio.{stimuli_type}'][i:]
    else:
      files = train_df[f'audio.{stimuli_type}'][i:i+4]
    data_folder=os.path.join(DRIVE_PATH, 'wav files')

    new_df = load_and_preprocess(files, data_folder, cutoff_len = BASE_LEN_f, min_len = BASE_LEN_f)
    train_val = create_timeSeriesDataSet(new_df, prediction_len=model.hparams.prediction_length, encoder_len=model.hparams.context_length)

    train_dataloader = train_val[0].to_dataloader(
        train=True, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
    )

    val_dataloader = train_val[1].to_dataloader(
        train=False, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
    )
    for epoch in range(epochs):

      model.train()
      train_it = iter(train_dataloader)
      train_loss = 0
      for n_batches in range(LIMIT_TRAIN_BATCHES):
        batch = next(train_it)
        x = {k: v.to(device) for k, v in batch[0].items()}
        y = batch[1][0].to(device)
        
        backcast_weight = model.hparams.backcast_loss_ratio * model.hparams.prediction_length / model.hparams.context_length
        backcast_weight = backcast_weight / (backcast_weight + 1) 
        forecast_weight = 1 - backcast_weight

        out = model.forward(x)
        backcast_loss = criterion(out['backcast'], x['encoder_target'], x["decoder_target"])
        loss = criterion(out['prediction'], target = y, encoder_target = x['encoder_target'])
        loss = loss * forecast_weight + backcast_loss * backcast_weight
        train_loss += loss.detach()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # n_batches += 1


      model.eval()
      iteration = 0
      overall_loss = 0
      for val_batch in val_dataloader:
        x_val = {k: v.to(device) for k, v in val_batch[0].items()}
        y_val = val_batch[1][0].to(device)
        out = model.forward(x_val)
        loss = criterion(out['prediction'], target = y_val, encoder_target = x_val['encoder_target'])
        iteration += 1
        overall_loss += loss.detach()

      # print(f"Epoch {epoch} Train loss: ", round(float(train_loss / LIMIT_TRAIN_BATCHES), 4), "\tVal loss: ",  round(float(overall_loss / iteration), 4))
  return model 

In [None]:
grid_v2 = { 'PREDICTION_CONTEXT_LENGTH': [[400, 800]],
        'N_BLOCKS_AND_LAYERS' : [[[2], [4]], [[2], [8]], [[4], [4]], [[4], [8]]],
        'WIDTH' : [[512], [1024], [2048]],
        'BACKCAST_LOSS_RATIO' : [0.2] 
}

In [None]:
nbeats_paths = "/content/drive/MyDrive/Grid_search_thesis/NBeats_models"
# stimuli_type = 'narrative'
# stimuli_type = 'story'
stimuli_type = 'instruction'

for i, params in tqdm(enumerate(product(*grid_v2.values())), total = len(list(product(*grid_v2.values()))), desc='Grid search progress'):
  if os.path.isfile(os.path.join(nbeats_paths, f'Nbeats_{stimuli_type}_{i}.pt')):
    continue
  
  files = train_df[f'audio.{stimuli_type}'][:4]

  new_df = load_and_preprocess(files, data_folder, cutoff_len = BASE_LEN_f, min_len = BASE_LEN_f)
  train_val = create_timeSeriesDataSet(new_df, prediction_len=params[0][0], encoder_len=params[0][1])

  # NBeats model
  net_nbeats = NBeats.from_dataset(
      train_val[0],
      stack_types = ['generic'],
      widths = params[2],
      num_blocks = params[1][0],
      num_block_layers = params[1][1],
      expansion_coefficient_lengths = [32],
      sharing = [False],
      learning_rate=LR,
      optimizer="AdamW",
      backcast_loss_ratio = params[3],
  )
  net_nbeats.to(device)

  net_nbeats = train_model(net_nbeats, epochs=3)

  torch.save({'model_state_dict' : net_nbeats.state_dict(),
            'hyperparameters' : net_nbeats.hparams}, 
           os.path.join(nbeats_paths, f'Nbeats_{stimuli_type}_{i}.pt'))

Grid search progress:   0%|          | 0/12 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

### Predictions

In [None]:
from itertools import product

BATCH_SIZE = 8
LIMIT_TRAIN_BATCHES = 300
EPOCHS = 2
BASE_LEN_f = 5

PREDICTION_LENGTH = 400
CONTEXT_LENGTH = 800 
LR = 5e-3

data_folder=os.path.join(DRIVE_PATH, 'wav files')

In [None]:
nbeats_paths = "/content/drive/MyDrive/Grid_search_thesis/NBeats_models"
nbeats_predictions_path = '/content/drive/MyDrive/Grid_search_thesis/NBeats_models/Predictions'

# stimuli_type = 'narrative'
# stimuli_type = 'story'
stimuli_type = 'instruction'

files = test_df[f'audio.{stimuli_type}']
new_df = load_and_preprocess(files, data_folder, cutoff_len = BASE_LEN_f, min_len = BASE_LEN_f)
test = create_timeSeriesDataSet_test(new_df, prediction_len=PREDICTION_LENGTH, encoder_len=CONTEXT_LENGTH)

test_dataloader = test.to_dataloader(
    train=False, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
)

for i in tqdm(range(12)):
  checkpoint = torch.load(os.path.join(nbeats_paths, f'Nbeats_{stimuli_type}_{i}.pt'), map_location=torch.device(device))

  # NBeats model
  net_nbeats = NBeats.from_dataset(
      test,
      stack_types = checkpoint['hyperparameters']['stack_types'],
      widths = checkpoint['hyperparameters']['widths'],
      num_blocks = checkpoint['hyperparameters']['num_blocks'],
      num_block_layers = checkpoint['hyperparameters']['num_block_layers'],
      expansion_coefficient_lengths = [32],
      sharing = [False],
      learning_rate=LR,
      optimizer="AdamW",
      backcast_loss_ratio = checkpoint['hyperparameters']['backcast_loss_ratio'],
  )
  net_nbeats.load_state_dict(checkpoint['model_state_dict'])
  net_nbeats.to(device)


  raw_predictions = net_nbeats.predict(
    test_dataloader, mode="raw", return_x=True,
  )

  predicted_classes = []
  size_of_pred = raw_predictions[0][0].shape[1]
  groups = new_df['group'].unique()

  audio_ts = {}

  for filename in train_df[f'audio.{stimuli_type}']:
      signal, sr = lb.load(os.path.join(data_folder, filename), sr=SR)
      signal, _ = lb.effects.trim(signal, top_db=35)
      audio_ts[filename] = signal

  for m in range(len(raw_predictions[0][0])):
    tmp = classify_obs(pid = groups[m], 
                      pred = raw_predictions[0][0][m].detach().cpu().numpy().astype(np.double),
                      df = train_df, 
                      data_folder = data_folder,
                      size_of_pred = size_of_pred,
                      audio_ts = audio_ts,
                      stimuli_type = stimuli_type)
    predicted_classes.append(tmp)


  pred_df = pd.DataFrame(predicted_classes, columns=['id', 'pred_severity', 'min_dist'])
  pred_df['actual_severity'] = [test_df[test_df[f'audio.{stimuli_type}'] == j[3:]]['depression.symptoms'].iloc[0] for j in pred_df['id']]

  pred_df.to_csv(os.path.join(nbeats_predictions_path,f'NBeats_{stimuli_type}_{i}_pred_multiple.csv'))
  # pred_df

  0%|          | 0/12 [00:00<?, ?it/s]

INFO: GPU available: False, used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: GPU available: False, used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: GPU available:

In [None]:
sum(pred_df['pred_severity'] == pred_df['actual_severity']) / len(pred_df)

0.42374727668845313

In [None]:
pred_df['pred_severity'].value_counts()

0    592
2    167
1    122
3     37
Name: pred_severity, dtype: int64

### Best model for each type of stimuli

Narrative: 3 

Story: 1

Instruction: 7

In [402]:
nbeats_paths = "/content/drive/MyDrive/Grid_search_thesis/NBeats_models"
nbeats_predictions_path = '/content/drive/MyDrive/Grid_search_thesis/NBeats_models/Predictions'

# stimuli_type = 'narrative'
# stimuli_type = 'story'
stimuli_type = 'instruction'

best_score = 0
model_n = 0

for j in range(12):
  predictions = pd.read_csv(os.path.join(nbeats_predictions_path, f'NBeats_{stimuli_type}_{j}_pred_multiple.csv'), index_col=0)
  predictions['id'] = predictions['id'].apply(lambda x: x[3:])

  # predictions["pred_severity"].loc[(predictions['pred_severity'] == 2) | (predictions['pred_severity'] == 3)] = 1
  # predictions['actual_severity'].loc[(predictions['actual_severity'] == 2) | (predictions['actual_severity'] == 3)] = 1
  
  best_guess = {}

  for i in range(len(predictions)):
    cnt = 1
    if predictions['id'][i] not in best_guess:
      best_guess[predictions['id'][i]] = [predictions['pred_severity'][i], predictions['min_dist'][i]]
    elif best_guess[predictions['id'][i]][1] > predictions['min_dist'][i]:
      best_guess[predictions['id'][i]] = [predictions['pred_severity'][i], predictions['min_dist'][i]]

  tsdad = list(best_guess.keys())
  tsdada = np.array(list(best_guess.values()))
  tsdada[:, 0].astype(int)

  single_pred = pd.DataFrame([tsdad, tsdada[:, 0].astype(int), tsdada[:, 1]]).T
  single_pred.rename({0: 'id', 1 : 'pred_severity', 2 : 'DWT_dist'}, axis=1, inplace=True)
  single_pred['actual_severity'] = [test_df[test_df[f'audio.{stimuli_type}'] == i]['depression.symptoms'].iloc[0] for i in single_pred['id']]
  single_pred['pred_severity'] = single_pred['pred_severity'].astype(int)
  report_nbeats = skm.classification_report(single_pred['actual_severity'], single_pred['pred_severity'], output_dict=True)

  if report_nbeats['weighted avg']['f1-score'] > best_score:
    model_n = j
    best_score = report_nbeats['weighted avg']['f1-score']

print(best_score)
print(model_n)

0.43307593307593306
7


In [403]:
predictions = pd.read_csv(os.path.join(nbeats_predictions_path, f'NBeats_{stimuli_type}_{7}_pred_multiple.csv'), index_col=0)
predictions['id'] = predictions['id'].apply(lambda x: x[3:])

In [404]:
best_guess = {}

for i in range(len(predictions)):
  cnt = 1
  if predictions['id'][i] not in best_guess:
    best_guess[predictions['id'][i]] = [predictions['pred_severity'][i], predictions['min_dist'][i]]
  elif best_guess[predictions['id'][i]][1] > predictions['min_dist'][i]:
    best_guess[predictions['id'][i]] = [predictions['pred_severity'][i], predictions['min_dist'][i]]

tsdad = list(best_guess.keys())
tsdada = np.array(list(best_guess.values()))
tsdada[:, 0].astype(int)

single_pred = pd.DataFrame([tsdad, tsdada[:, 0].astype(int), tsdada[:, 1]]).T
single_pred.rename({0: 'id', 1 : 'pred_severity', 2 : 'DWT_dist'}, axis=1, inplace=True)
single_pred['actual_severity'] = [test_df[test_df[f'audio.{stimuli_type}'] == i]['depression.symptoms'].iloc[0] for i in single_pred['id']]
single_pred['pred_severity'] = single_pred['pred_severity'].astype(int)
report_nbeats = skm.classification_report(single_pred['actual_severity'], single_pred['pred_severity'], output_dict=True)

In [405]:
report_nbeats

{'0': {'precision': 0.7,
  'recall': 0.4827586206896552,
  'f1-score': 0.5714285714285714,
  'support': 29},
 '1': {'precision': 0.25,
  'recall': 0.46153846153846156,
  'f1-score': 0.32432432432432434,
  'support': 13},
 '2': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4},
 '3': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 2},
 'accuracy': 0.4166666666666667,
 'macro avg': {'precision': 0.2375,
  'recall': 0.23607427055702918,
  'f1-score': 0.22393822393822393,
  'support': 48},
 'weighted avg': {'precision': 0.4906249999999999,
  'recall': 0.4166666666666667,
  'f1-score': 0.43307593307593306,
  'support': 48}}

### 10-fold CV of results

In [29]:
BATCH_SIZE = 8
LIMIT_TRAIN_BATCHES = 300
EPOCHS = 2
BASE_LEN_f = 5

PREDICTION_LENGTH = 400
CONTEXT_LENGTH = 800 
LR = 3e-3

data_folder=os.path.join(DRIVE_PATH, 'wav files')

In [25]:
from sklearn.model_selection import StratifiedShuffleSplit

sss = StratifiedShuffleSplit(n_splits=10, test_size = 0.1, train_size = 0.9, random_state = 42)
cv_splits = {}


for i, (train_index, test_index) in enumerate(sss.split(df, df['depression.symptoms'])):
  cv_splits[i] = (train_index, test_index)

In [None]:
nbeats_paths = "/content/drive/MyDrive/Grid_search_thesis/NBeats_models"
nbeats_cv = "/content/drive/MyDrive/Grid_search_thesis/NBeats_models/CV_results"


# stimuli_type = 'narrative'
stimuli_type = 'story'
# stimuli_type = 'instruction'

checkpoint = torch.load(os.path.join(nbeats_paths, f'Nbeats_{stimuli_type}_{3}.pt'), map_location=torch.device(device))

for i, idx in enumerate(cv_splits.values()):
    # Obtain new train and test dataframes
    train_df = df.iloc[idx[0]]
    test_df = df.iloc[idx[1]]

    files = train_df[f'audio.{stimuli_type}'][:2]
    new_df = load_and_preprocess(files, data_folder, cutoff_len = BASE_LEN_f, min_len = BASE_LEN_f)
    train_val = create_timeSeriesDataSet(new_df, prediction_len=PREDICTION_LENGTH, encoder_len=CONTEXT_LENGTH)

    # Training the model
    net_nbeats = NBeats.from_dataset(
        train_val[0],
        stack_types = checkpoint['hyperparameters']['stack_types'],
        widths = checkpoint['hyperparameters']['widths'],
        num_blocks = checkpoint['hyperparameters']['num_blocks'],
        num_block_layers = checkpoint['hyperparameters']['num_block_layers'],
        expansion_coefficient_lengths = [32],
        sharing = [False],
        learning_rate=LR,
        optimizer="AdamW",
        backcast_loss_ratio = checkpoint['hyperparameters']['backcast_loss_ratio'],
    )
    net_nbeats.load_state_dict(checkpoint['model_state_dict'])
    net_nbeats.to(device)

    net_nbeats = train_model(net_nbeats, epochs=EPOCHS, stimuli_type = stimuli_type)

    # Get predictions from it
    files = test_df[f'audio.{stimuli_type}']
    new_df = load_and_preprocess(files, data_folder, cutoff_len = BASE_LEN_f, min_len = BASE_LEN_f)
    test = create_timeSeriesDataSet_test(new_df, prediction_len=PREDICTION_LENGTH, encoder_len=CONTEXT_LENGTH)

    test_dataloader = test.to_dataloader(
        train=False, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
    )

    raw_predictions = net_nbeats.predict(
    test_dataloader, mode="raw", return_x=True,
    )

    predicted_classes = []
    size_of_pred = raw_predictions[0][0].shape[1]
    groups = new_df['group'].unique()

    audio_ts = {}

    for filename in train_df[f'audio.{stimuli_type}']:
        signal, sr = lb.load(os.path.join(data_folder, filename), sr=SR)
        signal, _ = lb.effects.trim(signal, top_db=35)
        audio_ts[filename] = signal

    for m in range(len(raw_predictions[0][0])):
      tmp = classify_obs(pid = groups[m], 
                        pred = raw_predictions[0][0][m].detach().cpu().numpy().astype(np.double),
                        df = train_df, 
                        data_folder = data_folder,
                        size_of_pred = size_of_pred,
                        audio_ts = audio_ts,
                        stimuli_type = stimuli_type)
      predicted_classes.append(tmp)

    pred_df = pd.DataFrame(predicted_classes, columns=['id', 'pred_severity', 'min_dist'])
    pred_df['actual_severity'] = [test_df[test_df[f'audio.{stimuli_type}'] == j[3:]]['depression.symptoms'].iloc[0] for j in pred_df['id']]

    pred_df['id'] = pred_df['id'].apply(lambda x: x[3:])

    best_guess = {}

    for i in range(len(pred_df)):
      cnt = 1
      if pred_df['id'][i] not in best_guess:
        best_guess[pred_df['id'][i]] = [pred_df['pred_severity'][i], pred_df['min_dist'][i]]
      elif best_guess[pred_df['id'][i]][1] > pred_df['min_dist'][i]:
        best_guess[pred_df['id'][i]] = [pred_df['pred_severity'][i], pred_df['min_dist'][i]]

    tsdad = list(best_guess.keys())
    tsdada = np.array(list(best_guess.values()))
    tsdada[:, 0].astype(int)

    single_pred = pd.DataFrame([tsdad, tsdada[:, 0].astype(int), tsdada[:, 1]]).T
    single_pred.rename({0: 'id', 1 : 'pred_severity', 2 : 'DWT_dist'}, axis=1, inplace=True)
    single_pred['actual_severity'] = [test_df[test_df[f'audio.{stimuli_type}'] == i]['depression.symptoms'].iloc[0] for i in single_pred['id']]
    single_pred['pred_severity'] = single_pred['pred_severity'].astype(int)
    report_nbeats = skm.classification_report(single_pred['actual_severity'], single_pred['pred_severity'], output_dict=True)

    single_pred.to_csv(os.path.join(nbeats_cv, f'NBeats_{stimuli_type}_fold_{i}_pred.csv'))

KeyboardInterrupt: ignored

In [68]:
skm.classification_report(single_pred['actual_severity'], single_pred['pred_severity'], output_dict=True)

{'0': {'precision': 0.6896551724137931,
  'recall': 0.6896551724137931,
  'f1-score': 0.6896551724137931,
  'support': 29},
 '1': {'precision': 0.4,
  'recall': 0.46153846153846156,
  'f1-score': 0.42857142857142855,
  'support': 13},
 '2': {'precision': 0.5,
  'recall': 0.25,
  'f1-score': 0.3333333333333333,
  'support': 4},
 '3': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 2},
 'accuracy': 0.5625,
 'macro avg': {'precision': 0.39741379310344827,
  'recall': 0.3502984084880637,
  'f1-score': 0.36288998357963875,
  'support': 48},
 'weighted avg': {'precision': 0.5666666666666667,
  'recall': 0.5625,
  'f1-score': 0.5605158730158729,
  'support': 48}}

In [69]:
single_pred.to_csv(os.path.join(nbeats_cv, f'NBeats_{stimuli_type}_fold_{1}_pred.csv'))

# **Playground**

In [24]:
nbeats_paths = "/content/drive/MyDrive/Grid_search_thesis/NBeats_models"

stimuli_type = 'narrative'
# stimuli_type = 'story'
# stimuli_type = 'instruction'

checkpoint = torch.load(os.path.join(nbeats_paths, f'Nbeats_{stimuli_type}_{7}.pt'), map_location=torch.device(device))

In [25]:
checkpoint['hyperparameters']

"backcast_loss_ratio":           0.2
"context_length":                800
"dropout":                       0.1
"expansion_coefficient_lengths": [32]
"learning_rate":                 0.001
"log_gradient_flow":             False
"log_interval":                  -1
"log_val_interval":              -1
"logging_metrics":               ModuleList(
  (0): SMAPE()
  (1): MAE()
  (2): RMSE()
  (3): MAPE()
  (4): MASE()
)
"loss":                          MASE()
"monotone_constaints":           {}
"num_block_layers":              [4]
"num_blocks":                    [4]
"optimizer":                     AdamW
"optimizer_params":              None
"output_transformer":            EncoderNormalizer(
	method='standard',
	center=True,
	max_length=None,
	transformation=None,
	method_kwargs={}
)
"prediction_length":             400
"reduce_on_plateau_min_lr":      1e-05
"reduce_on_plateau_patience":    1000
"reduce_on_plateau_reduction":   2.0
"sharing":                       [False]
"stack_types":     