# Installations

In [1]:
!pip install torch
!pip install pytorch_forecasting
!pip install dtaidistance

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pytorch_forecasting
  Downloading pytorch_forecasting-1.0.0-py3-none-any.whl (140 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m140.4/140.4 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fastapi>=0.80 (from pytorch_forecasting)
  Downloading fastapi-0.95.2-py3-none-any.whl (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.0/57.0 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting lightning<3.0.0,>=2.0.0 (from pytorch_forecasting)
  Downloading lightning-2.0.2-py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0m
Collecting optuna<4.0.0,>=3.1.0 (from pytorch_forecasting)
  Downloading optuna-3.1.1-py3-none-any.whl (365 k

# Main part

In [2]:
import os
import numpy as np
import pandas as pd
import librosa as lb
import matplotlib.pyplot as plt

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
#Global Sample Rate
# SR = 22000
SR = 8000

SEED = 42
np.random.seed(SEED)

DRIVE_PATH = 'drive/MyDrive/psychiatric.disorders.ML'

In [5]:
participants = pd.read_excel(os.path.join(DRIVE_PATH, 'PsychiatricDiscourse_participant.data.xlsx'))

In [6]:
# depression_only
depression_only = participants.loc[
    (participants['thought.disorder.symptoms'] == 0.) &
    (participants['depression.symptoms'] != 0.)
]

In [7]:
control_group = participants.loc[
    (participants['depression.symptoms'] == 0.) &
    (participants['thought.disorder.symptoms'] == 0.)
]

In [8]:
df = pd.concat([depression_only, control_group])

In [9]:
def get_patient_audio(row, data_folder=os.path.join(DRIVE_PATH, 'wav files'), return_uncomplete=False):
    key = row.ID
    audio_files = []
    for filename in os.listdir(data_folder):
        if filename.find(key) != -1:
            audio_files.append(filename)
    return audio_files

df['audio'] = df.apply(get_patient_audio, axis=1)

# exclude patients with num of recordings other than 3
df = df[df.audio.apply(len) == 3]

In [10]:
os.path.join(DRIVE_PATH, 'wav files')

'drive/MyDrive/psychiatric.disorders.ML/wav files'

In [11]:
task_mapping = {
    'narrative': ['sportsman', 'adventure', 'winterday'], 
    'story': ['present', 'trip', 'party'], 
    'instruction': ['chair', 'table', 'bench']
}

def get_domain_audio(row, domain):
    files = []
    for topic in task_mapping[domain]:
        for file_name in row.audio:
            if file_name.find(topic) != -1:
                files.append(file_name)
                
    if len(files) > 1:
        print(files)
    # assert len(files) < 2
    return files[0] if len(files) else None
    
    
    
for domain in task_mapping:
    df[f'audio.{domain}'] = df.apply(get_domain_audio, axis=1, domain=domain)

In [12]:
df.head()

Unnamed: 0,ID,group,diagnosis,sex,age,education.level,education.years,depression.symptoms,thought.disorder.symptoms,audio,audio.narrative,audio.story,audio.instruction
0,PD-001,patient,schizotypal.disorder,female,19.0,secondary,11,1,0,"[PD-001-pers-1-present.wav, PD-001-instr-1-cha...",PD-001-pic-1-sportsman.wav,PD-001-pers-1-present.wav,PD-001-instr-1-chair.wav
1,PD-002,patient,bipolar.affective.disorder,female,26.0,higher,17,1,0,"[PD-002-pers-1-present.wav, PD-002-instr-1-cha...",PD-002-pic-1-adventure.wav,PD-002-pers-1-present.wav,PD-002-instr-1-chair.wav
3,PD-004,patient,borderline.personality.disorder,female,16.0,secondary,9,1,0,"[PD-004-pic-1-adventure.wav, PD-004-pers-1-pre...",PD-004-pic-1-adventure.wav,PD-004-pers-1-present.wav,PD-004-instr-1-chair.wav
7,PD-008,patient,bipolar.affective.disorder,female,19.0,higher.unfinished,12,1,0,"[PD-008-instr-1-chair.wav, PD-008-pic-1-advent...",PD-008-pic-1-adventure.wav,PD-008-pers-1-present.wav,PD-008-instr-1-chair.wav
12,PD-013,patient,recurrent.depressive.disorder,female,20.0,higher.unfinished,12,1,0,"[PD-013-pic-1-sportsman.wav, PD-013-pers-1-pre...",PD-013-pic-1-sportsman.wav,PD-013-pers-1-present.wav,PD-013-instr-1-chair.wav


In [13]:
df['depression.symptoms'].value_counts()

0    142
1     62
2     22
3     10
Name: depression.symptoms, dtype: int64

In [14]:
#80% training data and 20% test data. Split so that test data will include all types of depression severity

from sklearn.model_selection import StratifiedShuffleSplit

sss = StratifiedShuffleSplit(n_splits=1, test_size = 0.2, train_size = 0.8, random_state = 42)

for (train_index, test_index) in sss.split(df, df['depression.symptoms']):
  train_df = df.iloc[train_index]
  test_df = df.iloc[test_index]

In [15]:
train_df.head()

Unnamed: 0,ID,group,diagnosis,sex,age,education.level,education.years,depression.symptoms,thought.disorder.symptoms,audio,audio.narrative,audio.story,audio.instruction
189,PN-075,control,,male,23.0,vocational,14,2,0,"[PN-075-instr-1-table.wav, PN-075-pers-1-trip....",PN-075-pic-1-adventure.wav,PN-075-pers-1-trip.wav,PN-075-instr-1-table.wav
32,PD-034,patient,bipolar.affective.disorder,female,20.0,higher.unfinished,13,0,0,"[PD-034-instr-1-chair.wav, PD-034-pic-1-advent...",PD-034-pic-1-adventure.wav,PD-034-pers-1-present.wav,PD-034-instr-1-chair.wav
259,PN-306,control,,male,53.0,higher,20,0,0,"[PN-306-instr-1-bench.wav, PN-306-pic-1-winter...",PN-306-pic-1-winterday.wav,PN-306-pers-1-party.wav,PN-306-instr-1-bench.wav
179,PN-054,control,,female,24.0,higher,17,2,0,"[PN-054-pers-1-party.wav, PN-054-instr-1-chair...",PN-054-pic-1-adventure.wav,PN-054-pers-1-party.wav,PN-054-instr-1-chair.wav
84,PD-089,patient,recurrent.depressive.disorder,female,17.0,vocational.unfinished,10,0,0,"[PD-089-pers-1-present.wav, PD-089-instr-1-cha...",PD-089-pic-1-adventure.wav,PD-089-pers-1-present.wav,PD-089-instr-1-chair.wav


In [16]:
test_df.shape

(48, 13)

## Data and train

In [17]:
# from tqdm import tqdm
from tqdm.auto import tqdm
import pytorch_forecasting as ptf
import torch
from pytorch_forecasting import TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import NaNLabelEncoder
import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping
from pytorch_forecasting.metrics import RMSE, MultivariateNormalDistributionLoss, QuantileLoss
from dtaidistance import dtw
from itertools import product
import sklearn.metrics as skm


import warnings
warnings.filterwarnings('ignore')


pl.seed_everything(42)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

INFO: Global seed set to 42
INFO:lightning.fabric.utilities.seed:Global seed set to 42


cpu


In [18]:
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import TensorBoardLogger

In [19]:
BASE_LEN = 83
BATCH_SIZE = 2

In [20]:
def cut_recordings(data, audio_dur, cutoff_len, min_len = 0):
  res = []
  cnt = 1
  res.append(data[:int(cutoff_len * SR)])
  audio_dur -= cutoff_len

  while(audio_dur > min_len and cnt < 100):
    res.append(data[int(cutoff_len * SR)*cnt:int(cutoff_len * SR)*(cnt+1)])
    audio_dur -= cutoff_len
    cnt += 1

  return res


def pad_ts(data, max_dur):
  for i in range(len(data)):
      if (len(data[i]) < max_dur):
        data[i] = np.pad(data[i], (max_dur - len(data[i]), 0), 'constant', constant_values=(0,))

  return data


def load_and_preprocess(files, data_folder, cutoff_len = None, min_len=0):
  audio_ts = []

  for filename in files:
      signal, sr = lb.load(os.path.join(data_folder, filename), sr=SR)
      signal, _ = lb.effects.trim(signal, top_db=40)
      audio_ts.append(signal)


  for i in range(len(audio_ts)):
    audio_dur = len(audio_ts[i]) / SR

    audio_ts[i] = cut_recordings(data = audio_ts[i], audio_dur = audio_dur, 
                                 cutoff_len = cutoff_len, min_len = min_len)
    audio_ts[i] = pad_ts(audio_ts[i], cutoff_len*SR)

  upd_df = pd.DataFrame(columns=[f'observations', 'time_idx', 'group'])
  audio_len = len(audio_ts[0][0])


  for i in range(len(audio_ts)):
    for j in range(len(audio_ts[i])):
      if j >= 10:
        tmp_df = pd.DataFrame({'observations':audio_ts[i][j], 'time_idx' : np.arange(audio_len), 'group':[f'{j}_' + files.iloc[i]] * audio_len})
      else:
        tmp_df = pd.DataFrame({'observations':audio_ts[i][j], 'time_idx' : np.arange(audio_len), 'group':[f'0{j}_' + files.iloc[i]] * audio_len})

      upd_df = pd.concat([upd_df, tmp_df], axis=0, ignore_index=True)

  upd_df['time_idx'] = upd_df['time_idx'].astype(int)

  return upd_df


def create_timeSeriesDataSet(df, encoder_len = 60, prediction_len = 60):
  # Replace "." with "_"
  df.columns = [col.replace(".", "_") for col in df.columns]

  # Define the TimeSeriesDataSet object
  training_cutoff = df["time_idx"].max() - prediction_len

  training = TimeSeriesDataSet(
      data=df.loc[lambda x: x.time_idx <= training_cutoff],
      time_idx="time_idx",
      target = "observations",
      group_ids= ["group"],
      max_encoder_length=encoder_len,
      max_prediction_length=prediction_len,
      time_varying_unknown_reals=["observations"],
      time_varying_known_reals=["time_idx"],
  )

  validation = TimeSeriesDataSet.from_dataset(training, df, min_prediction_idx=training_cutoff+1)
  # validation = TimeSeriesDataSet.from_dataset(training, df, predict=True, stop_randomization=True)
  return (training, validation)


def create_timeSeriesDataSet_test(df, encoder_len = 60, prediction_len = 60):
  # Replace "." with "_"
  df.columns = [col.replace(".", "_") for col in df.columns]

  # Define the TimeSeriesDataSet object
  training_cutoff = df["time_idx"].max() - prediction_len

  test = TimeSeriesDataSet(
      data=df,
      time_idx="time_idx",
      target = "observations",
      group_ids= ["group"],
      max_encoder_length=encoder_len,
      max_prediction_length=prediction_len,
      time_varying_unknown_reals=["observations"],
      time_varying_known_reals=["time_idx"],
      min_prediction_idx=training_cutoff+1,
  )

  return test


def classify_obs(pid, pred, df, data_folder, size_of_pred, audio_ts, stimuli_type):
  min_dist = float('inf')
  most_similar = None

  for filename in df[f'audio.{stimuli_type}']:
    distance_dtw = dtw.distance_fast(pred, audio_ts[filename][-size_of_pred:].astype(np.double))

    if distance_dtw < min_dist:
      min_dist = distance_dtw
      most_similar = filename
    
  return (pid, df[df[f'audio.{stimuli_type}'] == most_similar]['depression.symptoms'].iloc[0], min_dist)


def classify_obs(pid, pred, df, data_folder, size_of_pred, audio_ts, stimuli_type):
  min_dist = float('inf')
  most_similar = None

  for filename in df[f'audio.{stimuli_type}']:
    distance_dtw = dtw.distance_fast(pred, audio_ts[filename][-size_of_pred:].astype(np.double))

    if distance_dtw < min_dist:
      min_dist = distance_dtw
      most_similar = filename
    
  return (pid, df[df[f'audio.{stimuli_type}'] == most_similar]['depression.symptoms'].iloc[0], min_dist)

### Training the baseline

In [None]:
# Fit first 4 observations
files = train_df['audio.narrative'][:4]
data_folder=os.path.join(DRIVE_PATH, 'wav files')

new_df = load_and_preprocess(files, data_folder, min_len = 24)
train_val = create_timeSeriesDataSet(new_df)

train_dataloader = train_val[0].to_dataloader(
    train=True, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
)
val_dataloader = train_val[1].to_dataloader(
    train=False, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
)

# TemporalFusionTransformer model
net = TemporalFusionTransformer.from_dataset(
    train_val[0],
    learning_rate=1e-3,
    hidden_size = 16,
    lstm_layers = 2,
    dropout = 0.2,
    attention_head_size = 4,
    optimizer="AdamW",
    # optimizer="Ranger",
    loss=QuantileLoss(),
    # log_interval=10,
)
net.to(device)

# Trainer
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=2, verbose=False, mode="min")
lr_logger = LearningRateMonitor()  # log the learning rate
logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

trainer = pl.Trainer(
    max_epochs=2, #10 FOR NOW
    accelerator="gpu",
    enable_model_summary=False,
    gradient_clip_val=0.1,
    limit_train_batches=2000,
    callbacks=[lr_logger, early_stop_callback],
    logger=logger,
    enable_checkpointing=True,
    check_val_every_n_epoch=1,
    # reload_dataloaders_every_n_epochs = 10
)

trainer.fit(
  net,
  train_dataloaders=train_dataloader,
  val_dataloaders=val_dataloader,
)

# Fit for the rest observations
for i in tqdm(range(4,len(train_df['audio.narrative']), 4)):
  files = train_df['audio.narrative'][i:i+4]
  data_folder=os.path.join(DRIVE_PATH, 'wav files')

  new_df = load_and_preprocess(files, data_folder)

  train_val = create_timeSeriesDataSet(new_df)

  train_dataloader = train_val[0].to_dataloader(
      train=True, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
  )

  val_dataloader = train_val[1].to_dataloader(
      train=False, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
  )

  trainer = pl.Trainer(
    max_epochs=2, #10 FOR NOW
    accelerator="gpu",
    enable_model_summary=False,
    gradient_clip_val=0.1,
    limit_train_batches=2000,
    callbacks=[early_stop_callback],
    enable_checkpointing=True,
    check_val_every_n_epoch=1,
    # reload_dataloaders_every_n_epochs = 10
  ) 

  trainer.fit(
    net,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
  )

In [None]:
net1 = TemporalFusionTransformer.load_from_checkpoint("/content/lightning_logs/version_41/checkpoints/epoch=0-step=2000.ckpt")
net1.to(device)

files = train_df['audio.narrative'][41*4:42*4]
data_folder=os.path.join(DRIVE_PATH, 'wav files')

new_df = load_and_preprocess(files, data_folder)
train_val = create_timeSeriesDataSet(new_df)

train_dataloader = train_val[0].to_dataloader(
    train=True, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
)
val_dataloader = train_val[1].to_dataloader(
    train=False, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
)

# Trainer
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=2, verbose=False, mode="min")
lr_logger = LearningRateMonitor()  # log the learning rate
logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

trainer = pl.Trainer(
    max_epochs=2, #10 FOR NOW
    accelerator="gpu",
    enable_model_summary=False,
    gradient_clip_val=0.1,
    limit_train_batches=2000,
    callbacks=[lr_logger, early_stop_callback],
    logger=logger,
    enable_checkpointing=True,
    check_val_every_n_epoch=1,
    # reload_dataloaders_every_n_epochs = 10
)

trainer.fit(
  net1,
  train_dataloaders=train_dataloader,
  val_dataloaders=val_dataloader,
)


# Fit for the rest observations
for i in tqdm(range(4*42,len(train_df['audio.narrative']), 4)):
  files = train_df['audio.narrative'][i:i+4]
  data_folder=os.path.join(DRIVE_PATH, 'wav files')

  new_df = load_and_preprocess(files, data_folder)

  train_val = create_timeSeriesDataSet(new_df)

  train_dataloader = train_val[0].to_dataloader(
      train=True, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
  )

  val_dataloader = train_val[1].to_dataloader(
      train=False, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
  )

  trainer = pl.Trainer(
    max_epochs=2, #10 FOR NOW
    accelerator="gpu",
    enable_model_summary=False,
    gradient_clip_val=0.1,
    limit_train_batches=2000,
    callbacks=[early_stop_callback],
    enable_checkpointing=True,
    check_val_every_n_epoch=1,
    # reload_dataloaders_every_n_epochs = 10
  ) 

  trainer.fit(
    net1,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
  )

### Fine-tuning

In [None]:
data_folder=os.path.join(DRIVE_PATH, 'wav files')

BATCH_SIZE = 8
LIMIT_TRAIN_BATCHES = 1300
EPOCHS = 2
BASE_LEN_f = 5

PREDICTION_LENGTH = 200
CONTEXT_LENGTH = 200 
LR = 5e-3

In [None]:
def train_model(model, EPOCHS, stimuli_type):
  # Fit for the rest observations
  criterion = QuantileLoss()
  optimizer = torch.optim.AdamW(model.parameters(), lr=LR)
  
  for i in tqdm(range(0, len(train_df[f'audio.{stimuli_type}']), 4), desc='Files loaded'):
    if (i+4 > len(train_df[f'audio.{stimuli_type}'])):
      files = train_df[f'audio.{stimuli_type}'][i:]
    else:
      files = train_df[f'audio.{stimuli_type}'][i:i+4]
    data_folder=os.path.join(DRIVE_PATH, 'wav files')

    new_df = load_and_preprocess(files, data_folder, cutoff_len = BASE_LEN_f, min_len = BASE_LEN_f)
    train_val = create_timeSeriesDataSet(new_df, prediction_len=PREDICTION_LENGTH, encoder_len=CONTEXT_LENGTH)

    train_dataloader = train_val[0].to_dataloader(
        train=True, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
    )

    val_dataloader = train_val[1].to_dataloader(
        train=False, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
    )
    for epoch in range(EPOCHS):

      model.train()
      train_it = iter(train_dataloader)
      train_loss = 0
      for n_batches in range(LIMIT_TRAIN_BATCHES):
        batch = next(train_it)
        x = {k: v.to(device) for k, v in batch[0].items()}
        y = batch[1][0].to(device)

        out = model.forward(x)
        loss = criterion(out['prediction'], target = y)
        train_loss += loss.detach()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


      model.eval()
      iteration = 0
      overall_loss = 0
      for val_batch in val_dataloader:
        x_val = {k: v.to(device) for k, v in val_batch[0].items()}
        y_val = val_batch[1][0].to(device)
        out = model.forward(x_val)
        loss = criterion(out['prediction'], target = y_val)
        iteration += 1
        overall_loss += loss.detach()

      # print(f"Epoch {epoch} Train loss: ", round(float(train_loss / LIMIT_TRAIN_BATCHES), 4), "\tVal loss: ",  round(float(overall_loss / iteration), 4))
  return model 

In [None]:
grid_v2 = { 'PREDICTION_CONTEXT_LENGTH': [[PREDICTION_LENGTH, CONTEXT_LENGTH]],
        'hidden_size ' : [32, 64, 128],
        'lstm_layers ' : [2, 4],
        'attention_head_size ' : [4, 6],
}

In [None]:
from itertools import product

tft_paths = "/content/drive/MyDrive/Grid_search_thesis/TFT_models"
stimuli_type = 'narrative'
# stimuli_type = 'story'
# stimuli_type = 'instruction'

for i, params in tqdm(enumerate(product(*grid_v2.values())), total = len(list(product(*grid_v2.values()))), desc='Grid search progress'):
  if os.path.isfile(os.path.join(tft_paths, f'TFT_{stimuli_type}_{i}.pt')):
    continue
  
  files = train_df[f'audio.{stimuli_type}'][:4]

  new_df = load_and_preprocess(files, data_folder, cutoff_len = BASE_LEN_f, min_len = BASE_LEN_f)
  train_val = create_timeSeriesDataSet(new_df, prediction_len=params[0][0], encoder_len=params[0][1])

  # TFT model
  net_tft = TemporalFusionTransformer.from_dataset(
    train_val[0],
    learning_rate=LR,
    hidden_size = params[1],
    lstm_layers = params[2],
    attention_head_size = params[3],
    dropout = 0.2,
    output_size = 20
  )
  net_tft.to(device)

  net_tft = train_model(net_tft, EPOCHS=EPOCHS, stimuli_type=stimuli_type)

  torch.save({'model_state_dict' : net_tft.state_dict(),
            'hyperparameters' : net_tft.hparams}, 
           os.path.join(tft_paths, f'TFT_{stimuli_type}_{i}.pt'))

Grid search progress:   0%|          | 0/12 [00:00<?, ?it/s]

Files loaded:   0%|          | 0/47 [00:00<?, ?it/s]

### Predictions

In [455]:
BATCH_SIZE = 8
LIMIT_TRAIN_BATCHES = 300
EPOCHS = 2
BASE_LEN_f = 5

PREDICTION_LENGTH = 200
CONTEXT_LENGTH = 200 
LR = 4e-3

data_folder=os.path.join(DRIVE_PATH, 'wav files')

In [None]:
tft_paths = "/content/drive/MyDrive/Grid_search_thesis/TFT_models"
tft_predictions_paths = '/content/drive/MyDrive/Grid_search_thesis/TFT_models/Predictions'

# stimuli_type = 'narrative'
# stimuli_type = 'story'
stimuli_type = 'instruction'


files = test_df[f'audio.{stimuli_type}']
new_df = load_and_preprocess(files, data_folder, cutoff_len = BASE_LEN_f, min_len = BASE_LEN_f)
test = create_timeSeriesDataSet_test(new_df, prediction_len=PREDICTION_LENGTH, encoder_len=CONTEXT_LENGTH)

test_dataloader = test.to_dataloader(
    train=False, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
)

In [None]:
for i in tqdm(range(12)):
  checkpoint = torch.load(os.path.join(tft_paths, f'TFT_{stimuli_type}_{i}.pt'), map_location=torch.device(device))

  # TFT model
  net_tft = TemporalFusionTransformer.from_dataset(
      test,
      learning_rate=LR,
      hidden_size = checkpoint['hyperparameters']['hidden_size'],
      lstm_layers = checkpoint['hyperparameters']['lstm_layers'],
      attention_head_size = checkpoint['hyperparameters']['attention_head_size'],
      dropout = 0.2,
      output_size = 20
    )
  net_tft.load_state_dict(checkpoint['model_state_dict'])
  net_tft.to(device)


  raw_predictions = net_tft.predict(
    test_dataloader, mode="raw",
  )

  predicted_classes = []
  size_of_pred = raw_predictions[0].shape[1]
  groups = new_df['group'].unique()

  audio_ts = {}

  for filename in train_df[f'audio.{stimuli_type}']:
      signal, sr = lb.load(os.path.join(data_folder, filename), sr=SR)
      signal, _ = lb.effects.trim(signal, top_db=35)
      audio_ts[filename] = signal

  for m in range(raw_predictions[0].size()[0]):
    tmp = classify_obs(pid = groups[m], 
                      pred = raw_predictions[0][m].mean(axis=1).detach().cpu().numpy().astype(np.double),
                      df = train_df, 
                      data_folder = data_folder,
                      size_of_pred = size_of_pred,
                      audio_ts = audio_ts,
                      stimuli_type = stimuli_type)
    predicted_classes.append(tmp)


  pred_df = pd.DataFrame(predicted_classes, columns=['id', 'pred_severity', 'min_dist'])
  pred_df['actual_severity'] = [test_df[test_df[f'audio.{stimuli_type}'] == j[3:]]['depression.symptoms'].iloc[0] for j in pred_df['id']]

  pred_df.to_csv(os.path.join(tft_predictions_paths,f'TFT_{stimuli_type}_{i}_pred_multiple.csv'))
  # pred_df

  0%|          | 0/2 [00:00<?, ?it/s]

INFO: GPU available: False, used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: GPU available: False, used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


### Best model for each type of stimuli

Narrative: 8

Story: 8

Instruction: 0

In [350]:
import sklearn.metrics as skm 

tft_paths = "/content/drive/MyDrive/Grid_search_thesis/TFT_models"
tft_predictions_paths = '/content/drive/MyDrive/Grid_search_thesis/TFT_models/Predictions'


# stimuli_type = 'narrative'
# stimuli_type = 'story'
stimuli_type = 'instruction'

best_score = 0
model_n = 0

for j in range(12):
  predictions = pd.read_csv(os.path.join(tft_predictions_paths, f'TFT_{stimuli_type}_{j}_pred_multiple.csv'), index_col=0)
  predictions['id'] = predictions['id'].apply(lambda x: x[3:])

  # predictions["pred_severity"].loc[(predictions['pred_severity'] == 2) | (predictions['pred_severity'] == 3)] = 1
  # predictions['actual_severity'].loc[(predictions['actual_severity'] == 2) | (predictions['actual_severity'] == 3)] = 1
  
  best_guess = {}

  for i in range(len(predictions)):
    cnt = 1
    if predictions['id'][i] not in best_guess:
      best_guess[predictions['id'][i]] = [predictions['pred_severity'][i], predictions['min_dist'][i]]
    elif best_guess[predictions['id'][i]][1] > predictions['min_dist'][i]:
      best_guess[predictions['id'][i]] = [predictions['pred_severity'][i], predictions['min_dist'][i]]

  tsdad = list(best_guess.keys())
  tsdada = np.array(list(best_guess.values()))
  tsdada[:, 0].astype(int)

  single_pred = pd.DataFrame([tsdad, tsdada[:, 0].astype(int), tsdada[:, 1]]).T
  single_pred.rename({0: 'id', 1 : 'pred_severity', 2 : 'DWT_dist'}, axis=1, inplace=True)
  single_pred['actual_severity'] = [test_df[test_df[f'audio.{stimuli_type}'] == i]['depression.symptoms'].iloc[0] for i in single_pred['id']]
  single_pred['pred_severity'] = single_pred['pred_severity'].astype(int)
  report_nbeats = skm.classification_report(single_pred['actual_severity'], single_pred['pred_severity'], output_dict=True)

  # maj_voting_pred = predictions.groupby(['id']).agg(lambda x:x.value_counts().index[0])
  # report_nbeats = skm.classification_report(maj_voting_pred['actual_severity'], maj_voting_pred['pred_severity'], output_dict=True)

  if report_nbeats['weighted avg']['f1-score'] > best_score:
    model_n = j
    best_score = report_nbeats['weighted avg']['f1-score']

print(stimuli_type, model_n, best_score)

instruction 0 0.5007716049382716


### CV

In [36]:
BATCH_SIZE = 8
LIMIT_TRAIN_BATCHES = 3000
EPOCHS = 2
BASE_LEN_f = 5

PREDICTION_LENGTH = 400
CONTEXT_LENGTH = 800 
LR = 3e-3

data_folder=os.path.join(DRIVE_PATH, 'wav files')

In [37]:
from sklearn.model_selection import StratifiedShuffleSplit

sss = StratifiedShuffleSplit(n_splits=10, test_size = 0.1, train_size = 0.9, random_state = 42)
cv_splits = {}


for i, (train_index, test_index) in enumerate(sss.split(df, df['depression.symptoms'])):
  cv_splits[i] = (train_index, test_index)

In [None]:
tft_paths = "/content/drive/MyDrive/Grid_search_thesis/TFT_models"
tft_predictions_paths = '/content/drive/MyDrive/Grid_search_thesis/TFT_models/Predictions'
tft_cv = '/content/drive/MyDrive/Grid_search_thesis/TFT_models/CV_results' 

# stimuli_type = 'narrative'
# stimuli_type = 'story'
stimuli_type = 'instruction'

checkpoint = torch.load(os.path.join(tft_paths, f'TFT_{stimuli_type}_{8}.pt'), map_location=torch.device(device))

for i, idx in enumerate(cv_splits.values()):
    # Obtain new train and test dataframes
    train_df = df.iloc[idx[0]]
    test_df = df.iloc[idx[1]]

    files = train_df[f'audio.{stimuli_type}'][:4]
    new_df = load_and_preprocess(files, data_folder, cutoff_len = BASE_LEN_f, min_len = BASE_LEN_f)
    train_val = create_timeSeriesDataSet(new_df, prediction_len=PREDICTION_LENGTH, encoder_len=CONTEXT_LENGTH)

    net_tft = TemporalFusionTransformer.from_dataset(
      train_val[0],
      learning_rate=LR,
      hidden_size = checkpoint['hyperparameters']['hidden_size'],
      lstm_layers = checkpoint['hyperparameters']['lstm_layers'],
      attention_head_size = checkpoint['hyperparameters']['attention_head_size'],
      dropout = 0.2,
      output_size = 20
    )
    net_tft.load_state_dict(checkpoint['model_state_dict'])
    net_tft.to(device)

    net_tft = train_model(net_tft, EPOCHS=EPOCHS, stimuli_type=stimuli_type)

    # Get predictions from it
    files = test_df[f'audio.{stimuli_type}']
    new_df = load_and_preprocess(files, data_folder, cutoff_len = BASE_LEN_f, min_len = BASE_LEN_f)
    test = create_timeSeriesDataSet_test(new_df, prediction_len=PREDICTION_LENGTH, encoder_len=CONTEXT_LENGTH)

    test_dataloader = test.to_dataloader(
        train=False, batch_size=BATCH_SIZE, num_workers=0, batch_sampler="synchronized"
    )

    raw_predictions = net_tft.predict(
    test_dataloader, mode="raw", return_x=True,
    )

    predicted_classes = []
    size_of_pred = raw_predictions[0][0].shape[1]
    groups = new_df['group'].unique()

    audio_ts = {}

    for filename in train_df[f'audio.{stimuli_type}']:
        signal, sr = lb.load(os.path.join(data_folder, filename), sr=SR)
        signal, _ = lb.effects.trim(signal, top_db=35)
        audio_ts[filename] = signal

    for m in range(len(raw_predictions[0][0])):
      tmp = classify_obs(pid = groups[m], 
                        pred = raw_predictions[0][0][m].detach().cpu().numpy().astype(np.double),
                        df = train_df, 
                        data_folder = data_folder,
                        size_of_pred = size_of_pred,
                        audio_ts = audio_ts,
                        stimuli_type = stimuli_type)
      predicted_classes.append(tmp)

    pred_df = pd.DataFrame(predicted_classes, columns=['id', 'pred_severity', 'min_dist'])
    pred_df['actual_severity'] = [test_df[test_df[f'audio.{stimuli_type}'] == j[3:]]['depression.symptoms'].iloc[0] for j in pred_df['id']]

    pred_df['id'] = pred_df['id'].apply(lambda x: x[3:])

    best_guess = {}

    for i in range(len(pred_df)):
      cnt = 1
      if pred_df['id'][i] not in best_guess:
        best_guess[pred_df['id'][i]] = [pred_df['pred_severity'][i], pred_df['min_dist'][i]]
      elif best_guess[pred_df['id'][i]][1] > pred_df['min_dist'][i]:
        best_guess[pred_df['id'][i]] = [pred_df['pred_severity'][i], pred_df['min_dist'][i]]

    tsdad = list(best_guess.keys())
    tsdada = np.array(list(best_guess.values()))
    tsdada[:, 0].astype(int)

    single_pred = pd.DataFrame([tsdad, tsdada[:, 0].astype(int), tsdada[:, 1]]).T
    single_pred.rename({0: 'id', 1 : 'pred_severity', 2 : 'DWT_dist'}, axis=1, inplace=True)
    single_pred['actual_severity'] = [test_df[test_df[f'audio.{stimuli_type}'] == i]['depression.symptoms'].iloc[0] for i in single_pred['id']]
    single_pred['pred_severity'] = single_pred['pred_severity'].astype(int)
    report_tft = skm.classification_report(single_pred['actual_severity'], single_pred['pred_severity'], output_dict=True)

    single_pred.to_csv(os.path.join(tft_cv, f'TFT_{stimuli_type}_fold_{i}_pred.csv'))

# **Playground**

In [23]:
tft_paths = "/content/drive/MyDrive/Grid_search_thesis/TFT_models"
stimuli_type = 'narrative'
# stimuli_type = 'story'
# stimuli_type = 'instruction'


checkpoint = torch.load(os.path.join(tft_paths, f'TFT_{stimuli_type}_{0}.pt'), map_location=torch.device(device))

In [24]:
checkpoint['hyperparameters']

"attention_head_size":               4
"categorical_groups":                {}
"causal_attention":                  True
"dropout":                           0.2
"embedding_labels":                  {}
"embedding_paddings":                []
"embedding_sizes":                   {}
"hidden_continuous_size":            8
"hidden_continuous_sizes":           {}
"hidden_size":                       32
"learning_rate":                     0.003
"log_gradient_flow":                 False
"log_interval":                      -1
"log_val_interval":                  -1
"logging_metrics":                   ModuleList(
  (0): SMAPE()
  (1): MAE()
  (2): RMSE()
  (3): MAPE()
)
"loss":                              QuantileLoss(quantiles=[0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98])
"lstm_layers":                       2
"max_encoder_length":                200
"monotone_constaints":               {}
"optimizer":                         Ranger
"optimizer_params":                  None
"output_size":     