In [1]:
! pip install deep-utils
! pip install datasets

! pip install transformers[torch]
! pip install accelerate -U

Collecting deep-utils
  Downloading deep_utils-1.3.19-py3-none-any.whl.metadata (22 kB)
Downloading deep_utils-1.3.19-py3-none-any.whl (529 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m529.1/529.1 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deep-utils
Successfully installed deep-utils-1.3.19
Collecting accelerate
  Downloading accelerate-0.27.2-py3-none-any.whl.metadata (18 kB)
Downloading accelerate-0.27.2-py3-none-any.whl (279 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.0/280.0 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: accelerate
  Attempting uninstall: accelerate
    Found existing installation: accelerate 0.26.1
    Uninstalling accelerate-0.26.1:
      Successfully uninstalled accelerate-0.26.1
Successfully installed accelerate-0.27.2


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import librosa
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from deep_utils import warmup_cosine

from datasets import load_dataset, Audio, Dataset
from transformers import AutoFeatureExtractor
from transformers import AutoModelForAudioClassification, TrainingArguments, Trainer

import warnings

warnings.filterwarnings("ignore")

2024-02-13 20:59:03.517842: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-13 20:59:03.517947: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-13 20:59:03.802893: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
train = pd.read_csv('/kaggle/input/biomed-datathon-bmefest2/train.csv')
train.head()

Unnamed: 0,patient_id,AS,AR,MR,MS,N,recording_1,recording_2,recording_3,recording_4,recording_5,recording_6,recording_7,recording_8
0,patient_052,0,1,0,0,0,052_sup_Mit,052_sup_Tri,052_sup_Pul,052_sup_Aor,052_sit_Mit,052_sit_Tri,052_sit_Pul,052_sit_Aor
1,patient_058,0,1,0,0,0,058_sup_Mit,058_sup_Tri,058_sup_Pul,058_sup_Aor,058_sit_Mit,058_sit_Tri,058_sit_Pul,058_sit_Aor
2,patient_068,0,1,0,0,0,068_sup_Mit,068_sup_Tri,068_sup_Pul,068_sup_Aor,068_sit_Mit,068_sit_Tri,068_sit_Pul,068_sit_Aor
3,patient_074,0,1,0,0,0,074_sup_Mit,074_sup_Tri,074_sup_Pul,074_sup_Aor,074_sit_Mit,074_sit_Tri,074_sit_Pul,074_sit_Aor
4,patient_054,1,0,0,0,0,054_sup_Mit,054_sup_Tri,054_sup_Pul,054_sup_Aor,054_sit_Mit,054_sit_Tri,054_sit_Pul,054_sit_Aor


In [4]:
patient_id_to_drop = 'patient_085'
train = train[train['patient_id'] != patient_id_to_drop]

In [5]:
label_encoder = LabelEncoder()
train["labels"] = label_encoder.fit_transform(
    np.argmax(train[["AS", "AR", "MR", "MS", "N"]], axis=1)
)
train.drop(
    [
        "AS",
        "AR",
        "MR",
        "MS",
        "N",
        "recording_1",
        "recording_2",
        "recording_3",
        "recording_4",
        "recording_5",
        "recording_6",
        "recording_7",
        "recording_8",
    ],
    axis=1,
    inplace=True,
)
train.head()

Unnamed: 0,patient_id,labels
0,patient_052,1
1,patient_058,1
2,patient_068,1
3,patient_074,1
4,patient_054,0


In [6]:
train.labels.value_counts()

labels
0    19
4    12
2    11
1    10
3     6
Name: count, dtype: int64

In [7]:
BASE_URL = "/kaggle/input/bio-med-merged/"
train["path"] = train["patient_id"].apply(lambda x: BASE_URL + x + ".wav")
train["audio"] = train["patient_id"].apply(lambda x: BASE_URL + x + ".wav")

In [8]:
train.head()

Unnamed: 0,patient_id,labels,path,audio
0,patient_052,1,/kaggle/input/bio-med-merged/patient_052.wav,/kaggle/input/bio-med-merged/patient_052.wav
1,patient_058,1,/kaggle/input/bio-med-merged/patient_058.wav,/kaggle/input/bio-med-merged/patient_058.wav
2,patient_068,1,/kaggle/input/bio-med-merged/patient_068.wav,/kaggle/input/bio-med-merged/patient_068.wav
3,patient_074,1,/kaggle/input/bio-med-merged/patient_074.wav,/kaggle/input/bio-med-merged/patient_074.wav
4,patient_054,0,/kaggle/input/bio-med-merged/patient_054.wav,/kaggle/input/bio-med-merged/patient_054.wav


In [9]:
train.labels.value_counts()

labels
0    19
4    12
2    11
1    10
3     6
Name: count, dtype: int64

In [10]:
strat_split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=31)
for train_index, val_index in strat_split.split(train, train["labels"]):
    train_df = train.iloc[train_index]
    val_df = train.iloc[val_index]

unique_classes_in_test_set = val_df.labels.unique()
print("Classes present in the test set:", unique_classes_in_test_set)

Classes present in the test set: [1 4 0 3 2]


In [11]:
train_ds = Dataset.from_pandas(train_df)
val_ds = Dataset.from_pandas(val_df)

In [12]:
train_ds

Dataset({
    features: ['patient_id', 'labels', 'path', 'audio', '__index_level_0__'],
    num_rows: 46
})

In [13]:
train_ds = train_ds.cast_column("audio", Audio(sampling_rate=16_000))
val_ds = val_ds.cast_column("audio", Audio(sampling_rate=16_000))

In [14]:
len(train_ds), len(val_ds)

(46, 12)

In [17]:
labels = ["AS", "AR", "MR", "MS", "N"]

label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = str(i)
    id2label[str(i)] = label
label2id

{'AS': '0', 'AR': '1', 'MR': '2', 'MS': '3', 'N': '4'}

In [19]:
from transformers import AutoFeatureExtractor, ASTForAudioClassification

In [20]:
feature_extractor = AutoFeatureExtractor.from_pretrained(
    "MIT/ast-finetuned-audioset-10-10-0.4593"
)

preprocessor_config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

In [21]:
def preprocess_function(examples):
    audio_arrays = [x["array"] for x in examples["audio"]]
    inputs = feature_extractor(
        audio_arrays,
        sampling_rate=feature_extractor.sampling_rate,
        max_length=16000,
        truncation=True,
    )
    return inputs

In [22]:
np.object = object 

In [23]:
encoded_train = train_ds.map(preprocess_function, remove_columns="audio", batched=True)
encoded_val = val_ds.map(preprocess_function, remove_columns="audio", batched=True)

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [24]:
def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=1)
    acc = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average="weighted")
    recall = recall_score(labels, predictions, average="weighted")
    precision = precision_score(labels, predictions, average="weighted")
    macro_f1 = f1_score(labels, predictions, average="macro")

    return {
        "accuracy": acc,
        "f1-score": f1,
        "macro-f1-score": macro_f1,
        "recall-score": recall,
        "precision-score": precision,
    }

In [25]:
from transformers import AdamW, get_linear_schedule_with_warmup

In [26]:
import math
from transformers import EarlyStoppingCallback

early_stopping = EarlyStoppingCallback(early_stopping_patience=5)

train_bs = 4
epochs = 25
lr = 1e-6
lrf = lr
output_dir = "./results"
total_steps = int((np.ceil(encoded_train.num_rows / train_bs) * epochs))

num_labels = len(id2label)

In [27]:
model = ASTForAudioClassification.from_pretrained(
    "MIT/ast-finetuned-audioset-10-10-0.4593"
)

config.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

In [28]:
training_args = TrainingArguments(
    output_dir=output_dir,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=epochs,
    load_best_model_at_end=True,
    save_total_limit=1,
    metric_for_best_model="loss",
    per_device_train_batch_size=train_bs,
    per_device_eval_batch_size=8,
    logging_steps=1,
    report_to="none",
    greater_is_better=False,
)

optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.LambdaLR(
    optimizer,
    warmup_cosine(
        epochs // 10, max_lr=2e-6, total_steps=total_steps, optimizer_lr=lr, min_lr=1e-7
    ),
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_train,
    eval_dataset=encoded_val,
    tokenizer=feature_extractor,
    compute_metrics=compute_metrics,
    optimizers=(optimizer, scheduler),
)

In [29]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1-score,Macro-f1-score,Recall-score,Precision-score
1,8.9132,8.47705,0.0,0.0,0.0,0.0,0.0
2,4.6994,4.632671,0.083333,0.133333,0.057143,0.083333,0.333333
3,4.3178,3.04607,0.333333,0.166667,0.1,0.333333,0.111111
4,2.2821,2.2551,0.333333,0.166667,0.1,0.333333,0.111111
5,1.8305,1.887412,0.416667,0.277778,0.2,0.416667,0.208333
6,1.3724,1.72426,0.333333,0.277778,0.2,0.333333,0.366667
7,1.4629,1.566931,0.416667,0.290043,0.202597,0.416667,0.22381
8,1.1599,1.478375,0.5,0.367424,0.245455,0.5,0.290476
9,1.4594,1.437895,0.416667,0.288462,0.189744,0.416667,0.231481
10,1.3235,1.39064,0.5,0.365079,0.247619,0.5,0.291667


Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length': 1024}
Non-default generation parameters: {'max_length'

TrainOutput(global_step=150, training_loss=1.587062072729071, metrics={'train_runtime': 408.7805, 'train_samples_per_second': 2.813, 'train_steps_per_second': 0.367, 'total_flos': 7.83153934761984e+16, 'train_loss': 1.587062072729071, 'epoch': 25.0})

In [30]:
eval_results = trainer.evaluate()

print("Evaluation results:", eval_results)

Evaluation results: {'eval_loss': 1.2298098802566528, 'eval_accuracy': 0.5, 'eval_f1-score': 0.4761904761904761, 'eval_macro-f1-score': 0.38095238095238093, 'eval_recall-score': 0.5, 'eval_precision-score': 0.4916666666666667, 'eval_runtime': 1.8852, 'eval_samples_per_second': 6.366, 'eval_steps_per_second': 0.53, 'epoch': 25.0}


In [31]:
test = pd.read_csv('/kaggle/input/biomed-datathon-bmefest2/test_files.csv')
test.head()

Unnamed: 0,patient_id,recording_1,recording_2,recording_3,recording_4,recording_5,recording_6,recording_7,recording_8
0,patient_001,001_sit_Aor,001_sit_Mit,001_sit_Pul,001_sit_Tri,001_sup_Aor,001_sup_Mit,001_sup_Pul,001_sup_Tri
1,patient_003,003_sit_Aor,003_sit_Mit,003_sit_Pul,003_sit_Tri,003_sup_Aor,003_sup_Mit,003_sup_Pul,003_sup_Tri
2,patient_005,005_sit_Aor,005_sit_Mit,005_sit_Pul,005_sit_Tri,005_sup_Aor,005_sup_Mit,005_sup_Pul,005_sup_Tri
3,patient_007,007_sit_Aor,007_sit_Mit,007_sit_Pul,007_sit_Tri,007_sup_Aor,007_sup_Mit,007_sup_Pul,007_sup_Tri
4,patient_008,008_sit_Aor,008_sit_Mit,008_sit_Pul,008_sit_Tri,008_sup_Aor,008_sup_Mit,008_sup_Pul,008_sup_Tri


In [32]:
test.shape

(49, 9)

In [33]:
BASE_URL = '/kaggle/input/biomed-datathon-bmefest2/test/'

In [34]:
test["recording_1"] = test["recording_1"].apply(lambda x: BASE_URL + x + ".wav")
test["recording_2"] = test["recording_2"].apply(lambda x: BASE_URL + x + ".wav")
test["recording_3"] = test["recording_3"].apply(lambda x: BASE_URL + x + ".wav")
test["recording_4"] = test["recording_4"].apply(lambda x: BASE_URL + x + ".wav")
test["recording_5"] = test["recording_5"].apply(lambda x: BASE_URL + x + ".wav")
test["recording_6"] = test["recording_6"].apply(lambda x: BASE_URL + x + ".wav")
test["recording_7"] = test["recording_7"].apply(lambda x: BASE_URL + x + ".wav")
test["recording_8"] = test["recording_8"].apply(lambda x: BASE_URL + x + ".wav")

In [35]:
test.head()

Unnamed: 0,patient_id,recording_1,recording_2,recording_3,recording_4,recording_5,recording_6,recording_7,recording_8
0,patient_001,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...
1,patient_003,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...
2,patient_005,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...
3,patient_007,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...
4,patient_008,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...,/kaggle/input/biomed-datathon-bmefest2/test/00...


In [36]:
from pydub import AudioSegment
import numpy as np


def merge_audio(files):
    merged = AudioSegment.empty()

    for file in files:
        audio = AudioSegment.from_file(file)
        merged += audio

    return merged


def save_merged_audio(merged, output_file):
    merged.export(output_file, format="wav")

In [37]:
import os

output_dir = "test_files"
os.makedirs(output_dir, exist_ok=True)

In [38]:
for i in range(len(test)):
    files_to_merge = list(test.iloc[i][6:])
    merged_audio = merge_audio(files_to_merge)
    save_merged_audio(
        merged_audio,
        f"{'test_files/patient_' + list(test.iloc[i][6:])[0].split('/')[-1][:3]}.wav",
    )

In [39]:
BASE_URL = "/kaggle/working/test_files/"
test["path"] = test["patient_id"].apply(lambda x: BASE_URL + x + ".wav")
test["audio"] = test["patient_id"].apply(lambda x: BASE_URL + x + ".wav")

In [40]:
test.shape

(49, 11)

In [41]:
test.drop(
    [
        "recording_1",
        "recording_2",
        "recording_3",
        "recording_4",
        "recording_5",
        "recording_6",
        "recording_7",
        "recording_8",
    ],
    axis=1,
    inplace=True,
)

In [42]:
test.head()

Unnamed: 0,patient_id,path,audio
0,patient_001,/kaggle/working/test_files/patient_001.wav,/kaggle/working/test_files/patient_001.wav
1,patient_003,/kaggle/working/test_files/patient_003.wav,/kaggle/working/test_files/patient_003.wav
2,patient_005,/kaggle/working/test_files/patient_005.wav,/kaggle/working/test_files/patient_005.wav
3,patient_007,/kaggle/working/test_files/patient_007.wav,/kaggle/working/test_files/patient_007.wav
4,patient_008,/kaggle/working/test_files/patient_008.wav,/kaggle/working/test_files/patient_008.wav


In [43]:
test.shape

(49, 3)

In [44]:
test_ds = Dataset.from_pandas(test)
test_ds = test_ds.cast_column("audio", Audio(sampling_rate=16_000))

In [45]:
test_ds

Dataset({
    features: ['patient_id', 'path', 'audio'],
    num_rows: 49
})

In [46]:
encoded_test = test_ds.map(preprocess_function, remove_columns="audio", batched=True)

  0%|          | 0/1 [00:00<?, ?ba/s]

In [47]:
test_preds = trainer.predict(encoded_test)
logits = test_preds.predictions
class_predictions_logits = np.argmax(logits, axis=-1)

print("Class predictions from logits:", class_predictions_logits)

Class predictions from logits: [1 0 2 0 2 0 0 0 0 0 0 0 4 0 2 0 2 1 1 0 4 0 0 0 3 1 2 0 2 0 1 4 0 0 2 0 1
 0 0 0 4 4 4 3 0 4 3 0 4]


In [48]:
predicted_original_labels = label_encoder.inverse_transform(class_predictions_logits)
predicted_int_labels = predicted_original_labels.astype(int)
predicted_one_hot = np.eye(5)[predicted_original_labels]

print("One-hot encoded predictions:")
print(predicted_one_hot)

One-hot encoded predictions:
[[0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1.]]


In [49]:
submission_df = pd.DataFrame(predicted_one_hot, columns=["AS", "AR", "MR", "MS", "N"])

submission_df["patient_id"] = test.patient_id
submission_df = submission_df[["patient_id", "AS", "AR", "MR", "MS", "N"]]


submission_df.head()

Unnamed: 0,patient_id,AS,AR,MR,MS,N
0,patient_001,0.0,1.0,0.0,0.0,0.0
1,patient_003,1.0,0.0,0.0,0.0,0.0
2,patient_005,0.0,0.0,1.0,0.0,0.0
3,patient_007,1.0,0.0,0.0,0.0,0.0
4,patient_008,0.0,0.0,1.0,0.0,0.0


In [50]:
submission_df.to_csv('submission.csv', index=False)