In [None]:
validation_dir="../train-val_szunyog_hangok_osztalyozáshoz_3cls_25_02_14/validation/"

checkpoint_dir = "./results_AST_multiclass_mosquito_25_01_28-full-3cls/checkpoint-2092" # the best

id2label = {0: 'Aedes_koreicus', 1: 'Ochlerotatus_geniculatus', 2: 'Aedes_albopictus'}



In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix
import pandas as pd
import numpy as np
from tqdm import tqdm
import librosa
import torch
import os
from datasets import Dataset, DatasetDict

print("Checkpoint folder exists:", os.path.exists(checkpoint_dir))
print("Folder contents:", os.listdir(checkpoint_dir))


In [None]:
def load_audio_dataset_from_folders(validation_dir, everyNth=1):
    """
    Load data from folders and convert to Dataset format.

    Args:
        validation_dir (str): Path to the 'validation' folder.

    Returns:
        DatasetDict: A DatasetDict with 'validation' data.
    """
    def get_audio_files_with_labels(directory):
        data = []
        for class_name in os.listdir(directory):  # Classes ('mosquito', 'not')
            class_path = os.path.join(directory, class_name)
            if os.path.isdir(class_path):
                for file_name in os.listdir(class_path):
                    if file_name.endswith(".wav"):  # Only WAV files
                        file_path = os.path.join(class_path, file_name)
                        data.append({"file_path": file_path, "label": class_name})
        return data

    # Load validation data
    validation_data = get_audio_files_with_labels(validation_dir)

    # Create Dataset

    validation_dataset = Dataset.from_dict({
        "file_path": [d["file_path"] for idx, d in enumerate(validation_data) if idx % everyNth == 0],
        "label": [d["label"] for idx, d in enumerate(validation_data) if idx % everyNth == 0]
    })

    #return DatasetDict({"validation": validation_dataset})
    return validation_dataset



In [None]:
# Load data
validation_dataset = load_audio_dataset_from_folders(validation_dir)

print(validation_dataset)


In [None]:
from transformers import AutoProcessor, AutoModelForAudioClassification
processor = AutoProcessor.from_pretrained(checkpoint_dir)
model = AutoModelForAudioClassification.from_pretrained(checkpoint_dir)

id2label=model.config.id2label
label2id=model.config.label2id

id2label


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model.to(device)
print(device)



In [None]:

def predict(audio_filename,model):
    # Load audio file (with 16 kHz sampling rate)
    audio, sr = librosa.load(audio_filename, sr=16000)

    # Preprocess input
    inputs = processor(audio, sampling_rate=16000, return_tensors="pt", padding=True)
    inputs = {key: value.to(device) for key, value in inputs.items()}  # Move inputs to GPU
    
    # Apply model
    with torch.no_grad():
        logits = model(**inputs).logits

    # Class probabilities and predictions
    probs = torch.nn.functional.softmax(logits, dim=-1)
    predicted_class = torch.argmax(probs, dim=-1)

    #print(f"Predicted class index: {predicted_class.item()}")

    # Map prediction to labels
    id2label = model.config.id2label
    #print(f"Predicted class label: {id2label[predicted_class.item()]}")
    
    return predicted_class.item(), id2label[predicted_class.item()]




In [None]:
# Calculate predictions
true_labels = []
predicted_labels = []
audio_paths=[]

# Iterate with a progress bar
for example in tqdm(validation_dataset, desc="Processing validation set"):
#for example in tqdm(list(validation_dataset), desc="Processing validation set"):
    audio_filepath = example["file_path"]
    audio_paths.append(os.path.basename(audio_filepath))
    true_label = example["label"]
    
    # Prediction from the model
    predicted_class, predicted_label = predict(audio_filepath, model)
    
    # Collect results
    true_labels.append(true_label)
    predicted_labels.append(predicted_class)


res_csv=pd.DataFrame()
res_csv['file']=audio_paths
res_csv['true label']=true_labels
res_csv['predicted class']=predicted_labels

true_labels=[label2id[x] for x in true_labels]
res_csv['true class']=true_labels
res_csv.to_csv("eval_results_AST.csv")

true_labels

In [None]:
from sklearn.metrics import (
    accuracy_score, balanced_accuracy_score, precision_score, recall_score,
    f1_score, confusion_matrix, roc_auc_score
)
import matplotlib.pyplot as plt
import seaborn as sns

# Calculate accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
balanced_accuracy = balanced_accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, average="weighted", zero_division=0)
recall = recall_score(true_labels, predicted_labels, average="weighted", zero_division=0)
f1 = f1_score(true_labels, predicted_labels, average="weighted", zero_division=0)

# Print metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"Balanced Accuracy: {balanced_accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

# Calculate confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)
print("Confusion Matrix:")
print(conf_matrix)

# Visualize confusion matrix
plt.figure(figsize=(4, 3)) # A figura méretét is növelhetjük, hogy jobban elférjenek a nagyobb betűk
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues",
            xticklabels=id2label.values(), yticklabels=id2label.values(),
            annot_kws={"fontsize": 14}) # Annotációk betűmérete
plt.xlabel("Predicted Labels", fontsize=16) # X tengely felirat betűmérete
plt.ylabel("True Labels", fontsize=16)     # Y tengely felirat betűmérete
plt.title("Confusion Matrix", fontsize=18) # Cím betűmérete
plt.show()


In [None]:
# Calculate metrics for individual classes
class_labels = list(id2label.keys())  # Class indices
precision_per_class = precision_score(true_labels, predicted_labels, average=None, zero_division=0)
recall_per_class = recall_score(true_labels, predicted_labels, average=None, zero_division=0)
f1_per_class = f1_score(true_labels, predicted_labels, average=None, zero_division=0)

# Binarize true labels for ROC-AUC calculation (one vs. all)
true_labels_binarized = np.eye(len(class_labels))[true_labels]  # One-hot encoding
predicted_labels_binarized = np.eye(len(class_labels))[predicted_labels]

roc_auc_per_class = []
for i in range(len(class_labels)):
    try:
        roc_auc = roc_auc_score(true_labels_binarized[:, i], predicted_labels_binarized[:, i])
    except ValueError:  # If there is no positive sample
        roc_auc = np.nan
    roc_auc_per_class.append(roc_auc)

# Print class-wise metrics
print("\nClass-wise metrics:")
for i, label in enumerate(class_labels):
    print(f"Class {id2label[label]}:")
    print(f"  Precision: {precision_per_class[i]:.2f}")
    print(f"  Recall: {recall_per_class[i]:.2f}")
    print(f"  F1 Score: {f1_per_class[i]:.2f}")
    print(f"  ROC-AUC: {roc_auc_per_class[i]:.2f}" if not np.isnan(roc_auc_per_class[i]) else "  ROC-AUC: N/A")

# Visualization (class-wise metrics)
metrics_df = {
    "Precision": precision_per_class,
    "Recall": recall_per_class,
    "F1 Score": f1_per_class,
    "ROC-AUC": roc_auc_per_class,
}
metrics_df = {id2label[label]: values for label, values in zip(class_labels, zip(*metrics_df.values()))}

plt.figure(figsize=(12, 6))
for metric, values in metrics_df.items():
    plt.plot(class_labels, values, marker='o', label=metric)

plt.xticks(class_labels, [id2label[label] for label in class_labels], rotation=45)
plt.xlabel("Class")
plt.ylabel("Metric Value")
plt.title("Class-wise Metrics")
plt.legend()
plt.grid()
plt.tight_layout()
plt.show()



# top2 acc

In [None]:
# top-k accuracy

def predict_top(audio_filename, model):
    
    # Load audio file (with 16 kHz sampling rate)
    audio, sr = librosa.load(audio_filename, sr=16000)

    # Preprocess input
    inputs = processor(audio, sampling_rate=16000, return_tensors="pt", padding=True)
    inputs = {key: value.to(device) for key, value in inputs.items()}  # Move inputs to GPU
    
    # Apply model
    with torch.no_grad():
        logits = model(**inputs).logits

    # Class probabilities
    probs = torch.nn.functional.softmax(logits, dim=-1).squeeze()

    # Sort classes by probabilities in descending order
    sorted_indices = torch.argsort(probs, descending=True)
    
    return sorted_indices.tolist()  # Returns the class indices in descending order



In [None]:
from tqdm import tqdm
from sklearn.metrics import accuracy_score

# Calculate predictions
true_labels = []
top1_correct = 0
top2_correct = 0
top3_correct = 0
top4_correct = 0
total_samples = 0

# Iterate with a progress bar
idx=0
for example in tqdm(validation_dataset, desc="Processing validation set"):
    idx=idx+1
    #if idx==20:
    #    break
    audio_filepath = example["file_path"]
    true_label = label2id[example["label"]]
    #print(true_label)
    
    # Prediction from the model
    sorted_predictions = predict_top(audio_filepath, model)
    
    # Check if the correct label is in the predictions
    total_samples += 1
    true_labels.append(true_label)
    if true_label == sorted_predictions[0]:  # Top-1 correct
        top1_correct += 1
    if true_label in sorted_predictions[:2]:  # Top-2 correct
        top2_correct += 1
    if true_label in sorted_predictions[:3]:  # Top-3 correct
        top3_correct += 1
    if true_label in sorted_predictions[:4]:  # Top-4 correct
        top4_correct += 1

# Calculate Top-k Accuracy
top1_accuracy = top1_correct / total_samples
top2_accuracy = top2_correct / total_samples
top3_accuracy = top3_correct / total_samples
top4_accuracy = top4_correct / total_samples

print(f"Top-1 Accuracy: {top1_accuracy:.2f}")
print(f"Top-2 Accuracy: {top2_accuracy:.2f}")
print(f"Top-3 Accuracy: {top3_accuracy:.2f}")
print(f"Top-4 Accuracy: {top4_accuracy:.2f}")


# per-file evaluation

In [None]:
validation_dataset[0]

In [None]:
import pandas as pd
import os

# Initialize the data list
data = []

# Iterate through the dataset
for example in validation_dataset:
    audio_filepath = example["file_path"]
    true_label = label2id[example["label"]]
    sorted_predictions = predict_top(audio_filepath, model)
    
    # Extract filename
    basename = os.path.basename(audio_filepath)
    
    # Extract the part up to the second "_" for test_id
    parts = basename.split("_")
    if len(parts) > 2:
        test_id = parts[0]+"_"+parts[1]  # Part up to the second "_"
    else:
        test_id = "N/A"  # If not found, mark it

    # Add data to the list
    data.append({
        "file_path": audio_filepath,
        "test_id": test_id,
        "true_label": true_label,
        "top1_prediction": sorted_predictions[0],
        "top2_prediction": sorted_predictions[1] if len(sorted_predictions) > 1 else None,
        "top3_prediction": sorted_predictions[2] if len(sorted_predictions) > 2 else None,
        "top4_prediction": sorted_predictions[3] if len(sorted_predictions) > 3 else None,
    })

# Convert data to DataFrame format
df_results = pd.DataFrame(data)

df_results.tail()


In [None]:
df_results.head()

In [None]:
# Group by test_id to aggregate results
grouped_results = df_results.groupby("test_id").agg(
    true_label=("true_label", lambda x: x.iloc[0]),  # Take the first true label
    predicted_label=("top1_prediction", lambda x: x.mode()[0] if not x.mode().empty else "N/A")  # Take the most frequent top-1 prediction (mode)
).reset_index()

# Calculate metrics
accuracy = (grouped_results["true_label"] == grouped_results["predicted_label"]).mean()
precision = grouped_results.groupby("predicted_label").apply(
    lambda x: (x["true_label"] == x["predicted_label"]).sum() / len(x)
).mean()
recall = grouped_results.groupby("true_label").apply(
    lambda x: (x["true_label"] == x["predicted_label"]).sum() / len(x)
).mean()
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0


# Print metrics
metrics = {
    "Accuracy": accuracy,
    "Precision": precision,
    "Recall": recall,
    "F1 Score": f1
}

metrics



In [None]:
grouped_results.head(10)

In [None]:
from sklearn.metrics import f1_score

# Numeric class labels
true_labels = grouped_results["true_label"].values
predicted_labels = grouped_results["predicted_label"].values

# Calculate metrics for individual classes
class_labels = list(id2label.keys())  # Class indices
precision_per_class = precision_score(true_labels, predicted_labels, average=None, zero_division=0)
recall_per_class = recall_score(true_labels, predicted_labels, average=None, zero_division=0)
f1_per_class = f1_score(true_labels, predicted_labels, average=None, zero_division=0)

# Binarize true labels for ROC-AUC calculation (one vs. all)
true_labels_binarized = np.eye(len(class_labels))[true_labels]  # One-hot encoding
predicted_labels_binarized = np.eye(len(class_labels))[predicted_labels]

roc_auc_per_class = []
for i in range(len(class_labels)):
    try:
        roc_auc = roc_auc_score(true_labels_binarized[:, i], predicted_labels_binarized[:, i])
    except ValueError:  # If there is no positive sample
        roc_auc = np.nan
    roc_auc_per_class.append(roc_auc)

# Print class-wise metrics
print("\nClass-wise metrics:")
for i, label in enumerate(class_labels):
    print(f"Class {id2label[label]}:")
    print(f"  Precision: {precision_per_class[i]:.2f}")
    print(f"  Recall: {recall_per_class[i]:.2f}")
    print(f"  F1 Score: {f1_per_class[i]:.2f}")
    print(f"  ROC-AUC: {roc_auc_per_class[i]:.2f}" if not np.isnan(roc_auc_per_class[i]) else "  ROC-AUC: N/A")

# Calculate confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)

# Visualize confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=id2label.values(), yticklabels=id2label.values())
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix")
plt.show()

# Visualization (class-wise metrics)
metrics_df = {
    "Precision": precision_per_class,
    "Recall": recall_per_class,
    "F1 Score": f1_per_class,
    "ROC-AUC": roc_auc_per_class,
}
metrics_df = {id2label[label]: values for label, values in zip(class_labels, zip(*metrics_df.values()))}

# Correct visualization of class-wise metrics

plt.figure(figsize=(12, 6))
metrics_labels = ["Precision", "Recall", "F1 Score", "ROC-AUC"]
metrics_values = [precision_per_class, recall_per_class, f1_per_class, roc_auc_per_class]

for metric, values in zip(metrics_labels, metrics_values):
    plt.plot(class_labels, values, marker='o', label=metric)

plt.xticks(class_labels, [id2label[label] for label in class_labels], rotation=45)
plt.xlabel("Class")
plt.ylabel("Metric Value")
plt.title("Class-wise Metrics")
plt.legend(title="Metrics")
plt.grid()
plt.tight_layout()
plt.show()
