In [1]:
! pip install deep-utils
! pip install datasets

! pip install transformers[torch]
! pip install accelerate -U
! pip install audiomentations

Collecting deep-utils
  Downloading deep_utils-1.3.19-py3-none-any.whl.metadata (22 kB)
Downloading deep_utils-1.3.19-py3-none-any.whl (529 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m529.1/529.1 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deep-utils
Successfully installed deep-utils-1.3.19
Collecting accelerate
  Downloading accelerate-0.27.2-py3-none-any.whl.metadata (18 kB)
Downloading accelerate-0.27.2-py3-none-any.whl (279 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.0/280.0 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: accelerate
  Attempting uninstall: accelerate
    Found existing installation: accelerate 0.26.1
    Uninstalling accelerate-0.26.1:
      Successfully uninstalled accelerate-0.26.1
Successfully installed accelerate-0.27.2
Collecting audiomentations
  Downloading audiomentations-0.34.1-py3-none-any.whl.metadata (10 kB)
Downloa

In [2]:
import os
import numpy as np
import pandas as pd
from scipy.io import wavfile
import matplotlib.pyplot as plt
from pydub import AudioSegment
import librosa

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import (
    train_test_split,
    StratifiedShuffleSplit,
    KFold,
    StratifiedKFold,
)
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    recall_score,
    precision_score,
    confusion_matrix,
)
from deep_utils import warmup_cosine

from datasets import load_dataset, Audio, Dataset
from transformers import AutoFeatureExtractor
from transformers import AutoModelForAudioClassification, TrainingArguments, Trainer

import warnings

warnings.filterwarnings("ignore")

2024-02-20 04:35:02.408958: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-20 04:35:02.409086: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-20 04:35:02.568118: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
BASE_URL = "/kaggle/input/biomed-datathon-bmefest2/train/"
BASE_URL_test = "/kaggle/input/biomed-datathon-bmefest2/test/"

In [4]:
os.makedirs('train_merged', exist_ok=True)

In [5]:
train = pd.read_csv("/kaggle/input/biomed-datathon-bmefest2/train.csv")
train.head()

Unnamed: 0,patient_id,AS,AR,MR,MS,N,recording_1,recording_2,recording_3,recording_4,recording_5,recording_6,recording_7,recording_8
0,patient_052,0,1,0,0,0,052_sup_Mit,052_sup_Tri,052_sup_Pul,052_sup_Aor,052_sit_Mit,052_sit_Tri,052_sit_Pul,052_sit_Aor
1,patient_058,0,1,0,0,0,058_sup_Mit,058_sup_Tri,058_sup_Pul,058_sup_Aor,058_sit_Mit,058_sit_Tri,058_sit_Pul,058_sit_Aor
2,patient_068,0,1,0,0,0,068_sup_Mit,068_sup_Tri,068_sup_Pul,068_sup_Aor,068_sit_Mit,068_sit_Tri,068_sit_Pul,068_sit_Aor
3,patient_074,0,1,0,0,0,074_sup_Mit,074_sup_Tri,074_sup_Pul,074_sup_Aor,074_sit_Mit,074_sit_Tri,074_sit_Pul,074_sit_Aor
4,patient_054,1,0,0,0,0,054_sup_Mit,054_sup_Tri,054_sup_Pul,054_sup_Aor,054_sit_Mit,054_sit_Tri,054_sit_Pul,054_sit_Aor


In [6]:
def merge_audio(files):
    merged = AudioSegment.empty()

    for file in files:
        audio = AudioSegment.from_file(file)
        merged += audio

    return merged


def save_merged_audio(merged, output_file):
    merged.export(output_file, format="wav")

In [7]:
patient_id_to_drop = "patient_085"
train = train[train["patient_id"] != patient_id_to_drop]

In [8]:
train["recording_1"] = train["recording_1"].apply(lambda x: BASE_URL + x + ".wav")
train["recording_2"] = train["recording_2"].apply(lambda x: BASE_URL + x + ".wav")
train["recording_3"] = train["recording_3"].apply(lambda x: BASE_URL + x + ".wav")
train["recording_4"] = train["recording_4"].apply(lambda x: BASE_URL + x + ".wav")
train["recording_5"] = train["recording_5"].apply(lambda x: BASE_URL + x + ".wav")
train["recording_6"] = train["recording_6"].apply(lambda x: BASE_URL + x + ".wav")
train["recording_7"] = train["recording_7"].apply(lambda x: BASE_URL + x + ".wav")
train["recording_8"] = train["recording_8"].apply(lambda x: BASE_URL + x + ".wav")

In [9]:
for i in range(len(train)):
    files_to_merge = list(train.iloc[i][6:])
    merged_audio = merge_audio(files_to_merge)
    save_merged_audio(
        merged_audio,
        f"{'train_merged/patient_' + list(train.iloc[i][6:])[0].split('/')[-1][:3]}.wav",
    )

In [10]:
BASE_URL_merged = "/kaggle/working/train_merged/"
train["path"] = train["patient_id"].apply(lambda x: BASE_URL_merged + x + ".wav")
train["audio"] = train["patient_id"].apply(lambda x: BASE_URL_merged + x + ".wav")

In [11]:
label_encoder = LabelEncoder()
train["labels"] = label_encoder.fit_transform(
    np.argmax(train[["AS", "AR", "MR", "MS", "N"]], axis=1)
)
train.drop(
    [
        "AS",
        "AR",
        "MR",
        "MS",
        "N",
        "recording_1",
        "recording_2",
        "recording_3",
        "recording_4",
        "recording_5",
        "recording_6",
        "recording_7",
        "recording_8",
    ],
    axis=1,
    inplace=True,
)
train.head()

Unnamed: 0,patient_id,path,audio,labels
0,patient_052,/kaggle/working/train_merged/patient_052.wav,/kaggle/working/train_merged/patient_052.wav,1
1,patient_058,/kaggle/working/train_merged/patient_058.wav,/kaggle/working/train_merged/patient_058.wav,1
2,patient_068,/kaggle/working/train_merged/patient_068.wav,/kaggle/working/train_merged/patient_068.wav,1
3,patient_074,/kaggle/working/train_merged/patient_074.wav,/kaggle/working/train_merged/patient_074.wav,1
4,patient_054,/kaggle/working/train_merged/patient_054.wav,/kaggle/working/train_merged/patient_054.wav,0


In [12]:
n_fold = 5
MODEL_NAME = "MIT/ast-finetuned-audioset"
train_bs = 4
epochs = 50
lr = 1e-6
lrf = lr
output_dir = "./results"

In [13]:
labels = ["AS", "AR", "MR", "MS", "N"]

label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = str(i)
    id2label[str(i)] = label
label2id

{'AS': '0', 'AR': '1', 'MR': '2', 'MS': '3', 'N': '4'}

In [14]:
from transformers import AutoFeatureExtractor, ASTForAudioClassification

feature_extractor = AutoFeatureExtractor.from_pretrained(
    "MIT/ast-finetuned-audioset-10-10-0.4593"
)
model = ASTForAudioClassification.from_pretrained(
    "MIT/ast-finetuned-audioset-10-10-0.4593"
)

np.object = object

preprocessor_config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

In [15]:
def preprocess_function(examples):
    audio_arrays = [x["array"] for x in examples["audio"]]
    inputs = feature_extractor(
        audio_arrays,
        sampling_rate=feature_extractor.sampling_rate,
        max_length=16000,
        truncation=True,
    )
    return inputs

In [16]:
def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=1)
    acc = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average="weighted")
    recall = recall_score(labels, predictions, average="weighted")
    precision = precision_score(labels, predictions, average="weighted")
    macro_f1 = f1_score(labels, predictions, average="macro")
    cm = confusion_matrix(labels, predictions)

    tn = cm[0, 0]
    fp = cm[0, 1]
    fn = cm[1, 0]
    tp = cm[1, 1]

    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    icbhi_score = (sensitivity + specificity) / 2

    return {
        "accuracy": acc,
        "f1": f1,
        "macro-f1": macro_f1,
        "recall": recall,
        "precision": precision,
        "sensitivity": sensitivity,
        "specificity": specificity,
        "icbhi": icbhi_score,
    }

In [17]:
import math
from transformers import EarlyStoppingCallback
from transformers import AdamW, get_linear_schedule_with_warmup

early_stopping = EarlyStoppingCallback(early_stopping_patience=5)

total_steps = int((np.ceil(len(train) / train_bs) * epochs))
num_labels = len(id2label)

In [18]:
aug_output_dir = "aug_files"
os.makedirs(aug_output_dir, exist_ok=True)

In [19]:
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift

augment1 = Compose(
    [
        AddGaussianNoise(min_amplitude=0.01, max_amplitude=0.025, p=1),
        TimeStretch(min_rate=0.8, max_rate=1.25, p=0.75),
        PitchShift(min_semitones=-4, max_semitones=4, p=0.25),
        Shift(min_shift=-0.5, max_shift=0.5, p=0.5),
    ]
)

augment2 = Compose(
    [
        AddGaussianNoise(min_amplitude=0.02, max_amplitude=0.05, p=0.5),
        TimeStretch(min_rate=0.8, max_rate=1.25, p=0.35),
        PitchShift(min_semitones=-4, max_semitones=4, p=0.75),
        Shift(min_shift=-0.5, max_shift=0.5, p=0.9),
    ]
)


def load_audio(file_path):
    audio, sr = librosa.load(file_path, sr=4000)
    return audio, sr


def save_audio(audio, file_path, sr):
    wavfile.write(file_path, sr, (audio * 32767).astype(np.int16))


def augment_and_update_df(df, augmenter, pre_name="aug_"):
    augmented_data = []
    for index, row in df.iterrows():
        audio_file = row["path"]
        new_name = pre_name + audio_file.split("/")[-1]
        label = row["labels"]
        audio, sample_rate = load_audio(audio_file)
        augmented_audio = augmenter(samples=audio, sample_rate=sample_rate)
        new_file_name = f"/kaggle/working/aug_files/{new_name}"
        augmented_data.append((new_name, new_file_name, label))
        save_audio(augmented_audio, new_file_name, sample_rate)

    augmented_df = pd.DataFrame(
        augmented_data, columns=["patient_id", "path", "labels"]
    )

    df = pd.concat([df, augmented_df], ignore_index=True)
    return df


train = augment_and_update_df(train, augment1, "aug1_")
train = augment_and_update_df(train, augment2, "aug2_")
train["audio"] = train["path"]

In [20]:
train.shape

(232, 4)

In [21]:
training_args = TrainingArguments(
    output_dir=output_dir,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=epochs,
    load_best_model_at_end=True,
    save_total_limit=1,
    metric_for_best_model="loss",
    per_device_train_batch_size=train_bs,
    per_device_eval_batch_size=8,
    logging_steps=1,
    report_to="none",
    greater_is_better=False,
)

optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.LambdaLR(
    optimizer,
    warmup_cosine(
        epochs // 10, max_lr=2e-6, total_steps=total_steps, optimizer_lr=lr, min_lr=1e-7
    ),
)

In [22]:
train_df = train
kf = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=42)

fold_macro_f1_scores = []
for fold, (train_idx, val_idx) in enumerate(kf.split(train_df, train_df["labels"])):

    print(f"----------------{MODEL_NAME} - Fold: {fold}-----------")

    train_set, val_set = train_df.iloc[train_idx], train_df.iloc[val_idx]
    train_ds = Dataset.from_pandas(train_set)
    val_ds = Dataset.from_pandas(val_set)

    train_ds = train_ds.cast_column("audio", Audio(sampling_rate=16_000))
    val_ds = val_ds.cast_column("audio", Audio(sampling_rate=16_000))

    encoded_train = train_ds.map(
        preprocess_function, remove_columns="audio", batched=True
    )
    encoded_val = val_ds.map(preprocess_function, remove_columns="audio", batched=True)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=encoded_train,
        eval_dataset=encoded_val,
        tokenizer=feature_extractor,
        compute_metrics=compute_metrics,
        optimizers=(optimizer, scheduler),
        callbacks=[early_stopping],
    )

    trainer.train()

    eval_results = trainer.evaluate()
    print("Evaluation results:", eval_results)
    fold_macro_f1_scores.append(eval_results["eval_macro-f1"])

avg_macro_f1 = sum(fold_macro_f1_scores) / len(fold_macro_f1_scores)
print("Average Macro f1 score:", avg_macro_f1)

----------------MIT/ast-finetuned-audioset - Fold: 0-----------


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro-f1,Recall,Precision,Sensitivity,Specificity,Icbhi
1,3.0076,2.913307,0.340426,0.172915,0.101587,0.340426,0.11589,0.0,1.0,0.5
2,2.4598,1.748061,0.340426,0.231397,0.166249,0.340426,0.368794,0.0,1.0,0.5
3,1.9078,1.573122,0.404255,0.286538,0.215311,0.404255,0.430721,0.0,1.0,0.5
4,0.6719,1.532124,0.382979,0.258743,0.177011,0.382979,0.342452,0.0,1.0,0.5
5,1.5216,1.528107,0.297872,0.228063,0.167974,0.297872,0.316785,0.166667,1.0,0.583333
6,1.9575,1.489615,0.382979,0.355174,0.297041,0.382979,0.423404,0.4,1.0,0.7
7,2.0134,1.475062,0.382979,0.357876,0.292213,0.382979,0.375029,0.4,0.916667,0.658333
8,1.541,1.455071,0.404255,0.371544,0.308194,0.404255,0.372009,0.4,0.916667,0.658333
9,0.4185,1.441633,0.404255,0.356953,0.286377,0.404255,0.408916,0.166667,0.928571,0.547619
10,1.6428,1.424392,0.468085,0.424401,0.355338,0.468085,0.444449,0.428571,0.928571,0.678571


Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}


Evaluation results: {'eval_loss': 1.4243923425674438, 'eval_accuracy': 0.46808510638297873, 'eval_f1': 0.42440113078410946, 'eval_macro-f1': 0.3553379953379953, 'eval_recall': 0.46808510638297873, 'eval_precision': 0.4444485204206407, 'eval_sensitivity': 0.42857142857142855, 'eval_specificity': 0.9285714285714286, 'eval_icbhi': 0.6785714285714286, 'eval_runtime': 7.1898, 'eval_samples_per_second': 6.537, 'eval_steps_per_second': 0.417, 'epoch': 15.0}
----------------MIT/ast-finetuned-audioset - Fold: 1-----------


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro-f1,Recall,Precision,Sensitivity,Specificity,Icbhi
1,1.2459,0.77381,0.723404,0.71779,0.702778,0.723404,0.737082,0.714286,0.933333,0.82381
2,0.5695,0.792207,0.765957,0.764289,0.749216,0.765957,0.787234,0.75,0.933333,0.841667
3,0.4479,0.771828,0.744681,0.74037,0.723399,0.744681,0.754559,0.714286,0.933333,0.82381
4,0.1509,0.761497,0.787234,0.785565,0.768301,0.787234,0.804311,0.75,0.933333,0.841667
5,0.4183,0.719139,0.787234,0.778783,0.740556,0.787234,0.786845,0.75,0.933333,0.841667
6,0.0634,0.747099,0.744681,0.737762,0.706732,0.744681,0.758105,0.75,0.933333,0.841667
7,2.5485,0.717912,0.744681,0.735206,0.700963,0.744681,0.742086,0.714286,0.933333,0.82381
8,0.2072,0.695658,0.787234,0.778783,0.740556,0.787234,0.786845,0.75,0.933333,0.841667
9,1.1507,0.688739,0.765957,0.759574,0.723333,0.765957,0.763378,0.75,0.928571,0.839286
10,0.0221,0.702493,0.787234,0.778383,0.743674,0.787234,0.78617,0.75,0.933333,0.841667


Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length'

Evaluation results: {'eval_loss': 0.6762505769729614, 'eval_accuracy': 0.7872340425531915, 'eval_f1': 0.7782918173952383, 'eval_macro-f1': 0.7347804996361872, 'eval_recall': 0.7872340425531915, 'eval_precision': 0.7796882195818367, 'eval_sensitivity': 0.7142857142857143, 'eval_specificity': 0.9333333333333333, 'eval_icbhi': 0.8238095238095238, 'eval_runtime': 7.1613, 'eval_samples_per_second': 6.563, 'eval_steps_per_second': 0.419, 'epoch': 22.0}
----------------MIT/ast-finetuned-audioset - Fold: 2-----------


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro-f1,Recall,Precision,Sensitivity,Specificity,Icbhi
1,0.0724,0.14415,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,0.2488,0.145544,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,0.2758,0.146287,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,0.1531,0.146085,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
5,0.0775,0.148457,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
6,0.2007,0.150306,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}


Evaluation results: {'eval_loss': 0.14414986968040466, 'eval_accuracy': 1.0, 'eval_f1': 1.0, 'eval_macro-f1': 1.0, 'eval_recall': 1.0, 'eval_precision': 1.0, 'eval_sensitivity': 1.0, 'eval_specificity': 1.0, 'eval_icbhi': 1.0, 'eval_runtime': 7.1531, 'eval_samples_per_second': 6.431, 'eval_steps_per_second': 0.419, 'epoch': 6.0}
----------------MIT/ast-finetuned-audioset - Fold: 3-----------


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro-f1,Recall,Precision,Sensitivity,Specificity,Icbhi
1,0.1527,0.13896,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,0.2422,0.140331,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,0.8105,0.141231,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,0.1903,0.1408,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
5,1.9782,0.14241,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
6,0.1027,0.14239,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Checkpoint destination directory ./results/checkpoint-24 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}


Evaluation results: {'eval_loss': 0.13896015286445618, 'eval_accuracy': 1.0, 'eval_f1': 1.0, 'eval_macro-f1': 1.0, 'eval_recall': 1.0, 'eval_precision': 1.0, 'eval_sensitivity': 1.0, 'eval_specificity': 1.0, 'eval_icbhi': 1.0, 'eval_runtime': 7.1993, 'eval_samples_per_second': 6.39, 'eval_steps_per_second': 0.417, 'epoch': 6.0}
----------------MIT/ast-finetuned-audioset - Fold: 4-----------


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro-f1,Recall,Precision,Sensitivity,Specificity,Icbhi
1,0.1813,0.133854,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,0.1737,0.135506,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,0.2932,0.135385,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,0.1812,0.136151,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
5,0.5785,0.138159,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
6,0.455,0.137828,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Checkpoint destination directory ./results/checkpoint-24 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}


Evaluation results: {'eval_loss': 0.13385362923145294, 'eval_accuracy': 1.0, 'eval_f1': 1.0, 'eval_macro-f1': 1.0, 'eval_recall': 1.0, 'eval_precision': 1.0, 'eval_sensitivity': 1.0, 'eval_specificity': 1.0, 'eval_icbhi': 1.0, 'eval_runtime': 7.2052, 'eval_samples_per_second': 6.384, 'eval_steps_per_second': 0.416, 'epoch': 6.0}
Average Macro f1 score: 0.8180236989948366


# Test

In [23]:
output_dir = "test_files"
os.makedirs(output_dir, exist_ok=True)

In [24]:
test = pd.read_csv("/kaggle/input/biomed-datathon-bmefest2/test_files.csv")
test.head()

Unnamed: 0,patient_id,recording_1,recording_2,recording_3,recording_4,recording_5,recording_6,recording_7,recording_8
0,patient_001,001_sit_Aor,001_sit_Mit,001_sit_Pul,001_sit_Tri,001_sup_Aor,001_sup_Mit,001_sup_Pul,001_sup_Tri
1,patient_003,003_sit_Aor,003_sit_Mit,003_sit_Pul,003_sit_Tri,003_sup_Aor,003_sup_Mit,003_sup_Pul,003_sup_Tri
2,patient_005,005_sit_Aor,005_sit_Mit,005_sit_Pul,005_sit_Tri,005_sup_Aor,005_sup_Mit,005_sup_Pul,005_sup_Tri
3,patient_007,007_sit_Aor,007_sit_Mit,007_sit_Pul,007_sit_Tri,007_sup_Aor,007_sup_Mit,007_sup_Pul,007_sup_Tri
4,patient_008,008_sit_Aor,008_sit_Mit,008_sit_Pul,008_sit_Tri,008_sup_Aor,008_sup_Mit,008_sup_Pul,008_sup_Tri


In [25]:
test["recording_1"] = test["recording_1"].apply(lambda x: BASE_URL_test + x + ".wav")
test["recording_2"] = test["recording_2"].apply(lambda x: BASE_URL_test + x + ".wav")
test["recording_3"] = test["recording_3"].apply(lambda x: BASE_URL_test + x + ".wav")
test["recording_4"] = test["recording_4"].apply(lambda x: BASE_URL_test + x + ".wav")
test["recording_5"] = test["recording_5"].apply(lambda x: BASE_URL_test + x + ".wav")
test["recording_6"] = test["recording_6"].apply(lambda x: BASE_URL_test + x + ".wav")
test["recording_7"] = test["recording_7"].apply(lambda x: BASE_URL_test + x + ".wav")
test["recording_8"] = test["recording_8"].apply(lambda x: BASE_URL_test + x + ".wav")

In [26]:
for i in range(len(test)):
    files_to_merge = list(test.iloc[i][6:])
    merged_audio = merge_audio(files_to_merge)
    save_merged_audio(
        merged_audio,
        f"{'test_files/patient_' + list(test.iloc[i][6:])[0].split('/')[-1][:3]}.wav",
    )

In [27]:
test_merged = "/kaggle/working/test_files/"
test["path"] = test["patient_id"].apply(lambda x: test_merged + x + ".wav")
test["audio"] = test["patient_id"].apply(lambda x: test_merged + x + ".wav")

test.drop(
    [
        "recording_1",
        "recording_2",
        "recording_3",
        "recording_4",
        "recording_5",
        "recording_6",
        "recording_7",
        "recording_8",
    ],
    axis=1,
    inplace=True,
)

In [28]:
test_ds = Dataset.from_pandas(test)
test_ds = test_ds.cast_column("audio", Audio(sampling_rate=16_000))

In [29]:
test_ds

Dataset({
    features: ['patient_id', 'path', 'audio'],
    num_rows: 49
})

In [30]:
encoded_test = test_ds.map(preprocess_function, remove_columns="audio", batched=True)

  0%|          | 0/1 [00:00<?, ?ba/s]

In [31]:
test_preds = trainer.predict(encoded_test)
logits = test_preds.predictions
class_predictions_logits = np.argmax(logits, axis=-1)

print("Class predictions from logits:", class_predictions_logits)

Class predictions from logits: [1 3 2 0 0 0 0 1 0 0 0 0 4 0 2 0 2 1 1 0 3 0 0 3 3 2 2 0 2 0 1 1 0 2 2 0 0
 0 0 2 4 4 4 4 0 4 3 4 4]


In [32]:
predicted_original_labels = label_encoder.inverse_transform(class_predictions_logits)
predicted_int_labels = predicted_original_labels.astype(int)
predicted_one_hot = np.eye(5)[predicted_original_labels]

print("One-hot encoded predictions:")
print(predicted_one_hot)

One-hot encoded predictions:
[[0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1.]]


In [33]:
submission_df = pd.DataFrame(predicted_one_hot, columns=["AS", "AR", "MR", "MS", "N"])

submission_df["patient_id"] = test.patient_id
submission_df = submission_df[["patient_id", "AS", "AR", "MR", "MS", "N"]]


submission_df.head()

Unnamed: 0,patient_id,AS,AR,MR,MS,N
0,patient_001,0.0,1.0,0.0,0.0,0.0
1,patient_003,0.0,0.0,0.0,1.0,0.0
2,patient_005,0.0,0.0,1.0,0.0,0.0
3,patient_007,1.0,0.0,0.0,0.0,0.0
4,patient_008,1.0,0.0,0.0,0.0,0.0


In [34]:
submission_df.to_csv("submission-fold-aug.csv", index=False)