In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from sklearn.model_selection import StratifiedKFold

np.random.seed(42)

file_location = "../../data/cleaned/80_20_cleaned_train.parquet"
data_df = pd.read_parquet(file_location)

y = data_df['en']

kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []
conf_matrices = []

for train_index, test_index in kf.split(data_df, y):
    train_df, test_df = data_df.iloc[train_index], data_df.iloc[test_index]

    bird_counts = train_df['en'].value_counts(normalize=True)

    y_true = test_df['en']
    y_pred_probabilities = test_df['en'].map(bird_counts).fillna(0)

    most_probable_bird = bird_counts.idxmax()
    predicted_classes = y_pred_probabilities.apply(lambda prob: most_probable_bird if prob == 0 else bird_counts.idxmax())

    y_true = y_true.astype(str)
    predicted_classes = predicted_classes.astype(str)

    accuracy = accuracy_score(y_true, predicted_classes)
    precision = precision_score(y_true, predicted_classes, average='weighted', zero_division=1)
    recall = recall_score(y_true, predicted_classes, average='weighted', zero_division=1)
    f1 = f1_score(y_true, predicted_classes, average='weighted', zero_division=1)

    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

    conf_matrices.append(confusion_matrix(y_true, predicted_classes))

print("Baseline Model Performance (10-Fold Cross-Validation):")
print(f"Average Accuracy: {np.mean(accuracy_scores):.4f}")
print(f"Average Precision: {np.mean(precision_scores):.4f}")
print(f"Average Recall: {np.mean(recall_scores):.4f}")
print(f"Average F1 Score: {np.mean(f1_scores):.4f}")

print("\nClassification Report (last fold):")
print(classification_report(y_true, predicted_classes, zero_division=1))

print("\nConfusion Matrix (last fold):")
print(conf_matrices[-1])


Baseline Model Performance (10-Fold Cross-Validation):
Average Accuracy: 0.0175
Average Precision: 0.9828
Average Recall: 0.0175
Average F1 Score: 0.0006

Classification Report (last fold):
                                precision    recall  f1-score   support

              African Blue Tit       1.00      0.00      0.00         2
               Alpine Accentor       1.00      0.00      0.00         8
                 Alpine Chough       1.00      0.00      0.00         5
                  Alpine Swift       1.00      0.00      0.00         5
                   Arctic Tern       1.00      0.00      0.00        10
                Arctic Warbler       1.00      0.00      0.00         3
               Atlantic Canary       1.00      0.00      0.00         5
                Audouin's Gull       1.00      0.00      0.00         2
               Baillon's Crake       1.00      0.00      0.00         7
             Bar-tailed Godwit       1.00      0.00      0.00         9
                 