In [None]:
! pip install deep-utils
! pip install datasets

! pip install transformers[torch]
! pip install accelerate -U
%pip install audiomentations

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import wavfile
import librosa

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from deep_utils import warmup_cosine

from datasets import load_dataset, Audio, Dataset
from transformers import AutoFeatureExtractor
from transformers import AutoModelForAudioClassification, TrainingArguments, Trainer

import warnings

warnings.filterwarnings("ignore")

In [None]:
train = pd.read_csv("/kaggle/input/biomed-datathon-bmefest2/train.csv")
train.head()

In [None]:
patient_id_to_drop = "patient_085"
train = train[train["patient_id"] != patient_id_to_drop]

In [None]:
label_encoder = LabelEncoder()
train["labels"] = label_encoder.fit_transform(
    np.argmax(train[["AS", "AR", "MR", "MS", "N"]], axis=1)
)
train.drop(
    [
        "AS",
        "AR",
        "MR",
        "MS",
        "N",
        "recording_1",
        "recording_2",
        "recording_3",
        "recording_4",
        "recording_5",
        "recording_6",
        "recording_7",
        "recording_8",
    ],
    axis=1,
    inplace=True,
)
train.head()

In [None]:
BASE_URL = "/kaggle/input/bio-med-merged/"
train["path"] = train["patient_id"].apply(lambda x: BASE_URL + x + ".wav")

In [None]:
train.head()

In [None]:
train.labels.value_counts()

In [None]:
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift

augment = Compose(
    [
        AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1),
        TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
        PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
        Shift(min_shift=-0.5, max_shift=0.5, p=0.5),
    ]
)

In [None]:
def load_audio(file_path):
    audio, sr = librosa.load(file_path, sr=None)
    return audio, sr


def save_audio(audio, file_path, sr):
    wavfile.write(file_path, sr, (audio * 32767).astype(np.int16))


def augment_and_update_df(df, augmenter, pre_name="aug_"):
    augmented_data = []
    for index, row in df.iterrows():
        audio_file = row["path"]
        new_name = pre_name + audio_file.split("/")[-1]
        label = row["labels"]
        audio, sample_rate = load_audio(audio_file)
        augmented_audio = augmenter(samples=audio, sample_rate=sample_rate)
        new_file_name = f"/kaggle/working/{new_name}"
        augmented_data.append((new_name, new_file_name, label))
        save_audio(augmented_audio, new_file_name, sample_rate)

    augmented_df = pd.DataFrame(
        augmented_data, columns=["patient_id", "path", "labels"]
    )
    df = pd.concat([df, augmented_df], ignore_index=True)
    return df

In [None]:
train = augment_and_update_df(train, augment)

In [None]:
train.head()

In [None]:
train.shape

In [None]:
train["audio"] = train["path"]

In [None]:
strat_split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=31)
for train_index, val_index in strat_split.split(train, train["labels"]):
    train_df = train.iloc[train_index]
    val_df = train.iloc[val_index]

unique_classes_in_test_set = val_df.labels.value_counts()
print("Classes present in the test set:", unique_classes_in_test_set)

In [None]:
train_ds = Dataset.from_pandas(train_df)
val_ds = Dataset.from_pandas(val_df)

train_ds

In [None]:
train_ds = train_ds.cast_column("audio", Audio(sampling_rate=16_000))
val_ds = val_ds.cast_column("audio", Audio(sampling_rate=16_000))

In [None]:
len(train_ds), len(val_ds)

In [None]:
labels = ["AS", "AR", "MR", "MS", "N"]

label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = str(i)
    id2label[str(i)] = label
label2id

In [None]:
from transformers import AutoFeatureExtractor, ASTForAudioClassification

In [None]:
feature_extractor = AutoFeatureExtractor.from_pretrained(
    "MIT/ast-finetuned-audioset-10-10-0.4593"
)

In [None]:
def preprocess_function(examples):
    audio_arrays = [x["array"] for x in examples["audio"]]
    inputs = feature_extractor(
        audio_arrays,
        sampling_rate=feature_extractor.sampling_rate,
        max_length=16000,
        truncation=True,
    )
    return inputs

In [None]:
np.object = object 

In [None]:
encoded_train = train_ds.map(preprocess_function, remove_columns="audio", batched=True)
encoded_val = val_ds.map(preprocess_function, remove_columns="audio", batched=True)

In [None]:
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    recall_score,
    precision_score,
    confusion_matrix,
)


def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=1)
    acc = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average="weighted")
    recall = recall_score(labels, predictions, average="weighted")
    precision = precision_score(labels, predictions, average="weighted")
    macro_f1 = f1_score(labels, predictions, average="macro")

    cm = confusion_matrix(labels, predictions)
    tn = cm[0, 0]
    fp = cm[0, 1]
    fn = cm[1, 0]
    tp = cm[1, 1]
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)

    icbhi_score = (sensitivity + specificity) / 2

    return {
        "accuracy": acc,
        "f1": f1,
        "macro-f1": macro_f1,
        "recall": recall,
        "precision": precision,
        "sensitivity": sensitivity,
        "specificity": specificity,
        "icbhi": icbhi_score,
    }

In [None]:
from transformers import AdamW, get_linear_schedule_with_warmup

In [None]:
import math
from transformers import EarlyStoppingCallback

early_stopping = EarlyStoppingCallback(early_stopping_patience=5)

train_bs = 4
epochs = 25
lr = 1e-6
lrf = lr
output_dir = "./results"
total_steps = int((np.ceil(encoded_train.num_rows / train_bs) * epochs))

num_labels = len(id2label)

In [None]:
model = ASTForAudioClassification.from_pretrained(
    "MIT/ast-finetuned-audioset-10-10-0.4593"
)

In [None]:
training_args = TrainingArguments(
    output_dir=output_dir,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=epochs,
    gradient_accumulation_steps=4,
    fp16=True,
    load_best_model_at_end=True,
    save_total_limit=1,
    metric_for_best_model="loss",
    per_device_train_batch_size=train_bs,
    per_device_eval_batch_size=8,
    logging_steps=1,
    report_to="none",
    greater_is_better=False,
)

weight_decay = 0.01
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=total_steps)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_train,
    eval_dataset=encoded_val,
    tokenizer=feature_extractor,
    compute_metrics=compute_metrics,
    optimizers=(optimizer, scheduler),
)

In [None]:
trainer.train()

In [None]:
eval_results = trainer.evaluate()

print("Evaluation results:", eval_results)

In [None]:
test = pd.read_csv("/kaggle/input/biomed-datathon-bmefest2/test_files.csv")
test.head()

In [None]:
test.shape

In [None]:
BASE_URL = "/kaggle/input/biomed-datathon-bmefest2/test/"

In [None]:
test["recording_1"] = test["recording_1"].apply(lambda x: BASE_URL + x + ".wav")
test["recording_2"] = test["recording_2"].apply(lambda x: BASE_URL + x + ".wav")
test["recording_3"] = test["recording_3"].apply(lambda x: BASE_URL + x + ".wav")
test["recording_4"] = test["recording_4"].apply(lambda x: BASE_URL + x + ".wav")
test["recording_5"] = test["recording_5"].apply(lambda x: BASE_URL + x + ".wav")
test["recording_6"] = test["recording_6"].apply(lambda x: BASE_URL + x + ".wav")
test["recording_7"] = test["recording_7"].apply(lambda x: BASE_URL + x + ".wav")
test["recording_8"] = test["recording_8"].apply(lambda x: BASE_URL + x + ".wav")

In [None]:
test.head()

In [None]:
from pydub import AudioSegment


def merge_audio(files):
    merged = AudioSegment.empty()

    for file in files:
        audio = AudioSegment.from_file(file)
        merged += audio

    return merged


def save_merged_audio(merged, output_file):
    merged.export(output_file, format="wav")

In [None]:
import os

output_dir = "test_files"
os.makedirs(output_dir, exist_ok=True)

In [None]:
for i in range(len(test)):
    files_to_merge = list(test.iloc[i][6:])
    merged_audio = merge_audio(files_to_merge)
    save_merged_audio(
        merged_audio,
        f"{'test_files/patient_' + list(test.iloc[i][6:])[0].split('/')[-1][:3]}.wav",
    )

In [None]:
BASE_URL = "/kaggle/working/test_files/"
test["path"] = test["patient_id"].apply(lambda x: BASE_URL + x + ".wav")
test["audio"] = test["patient_id"].apply(lambda x: BASE_URL + x + ".wav")

In [None]:
test.shape

In [None]:
test.drop(
    [
        "recording_1",
        "recording_2",
        "recording_3",
        "recording_4",
        "recording_5",
        "recording_6",
        "recording_7",
        "recording_8",
    ],
    axis=1,
    inplace=True,
)

In [None]:
test.head()

In [None]:
test.shape

In [None]:
test_ds = Dataset.from_pandas(test)
test_ds = test_ds.cast_column("audio", Audio(sampling_rate=16_000))

In [None]:
test_ds

In [None]:
encoded_test = test_ds.map(preprocess_function, remove_columns="audio", batched=True)

In [None]:
test_preds = trainer.predict(encoded_test)
logits = test_preds.predictions
class_predictions_logits = np.argmax(logits, axis=-1)

print("Class predictions from logits:", class_predictions_logits)

In [None]:
predicted_original_labels = label_encoder.inverse_transform(class_predictions_logits)
predicted_int_labels = predicted_original_labels.astype(int)
predicted_one_hot = np.eye(5)[predicted_original_labels]

print("One-hot encoded predictions:")
print(predicted_one_hot)

In [None]:
submission_df = pd.DataFrame(predicted_one_hot, columns=["AS", "AR", "MR", "MS", "N"])

submission_df["patient_id"] = test.patient_id
submission_df = submission_df[["patient_id", "AS", "AR", "MR", "MS", "N"]]


submission_df.head()

In [None]:
submission_df.to_csv('submission-aug2.csv', index=False)