In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

np.random.seed(42)

train_file_location = "../../data/cleaned/80_20_cleaned_train.parquet"
val_file_location = "../../data/cleaned/80_20_cleaned_test.parquet"
train_df = pd.read_parquet(train_file_location)
val_df = pd.read_parquet(val_file_location)

y_train = train_df['en']
y_val = val_df['en']

bird_counts = y_train.value_counts(normalize=True)

y_pred_random = np.random.choice(bird_counts.index, size=len(y_val), p=bird_counts.values)

accuracy = accuracy_score(y_val, y_pred_random)
precision = precision_score(y_val, y_pred_random, average='weighted', zero_division=1)
recall = recall_score(y_val, y_pred_random, average='weighted', zero_division=1)
f1 = f1_score(y_val, y_pred_random, average='weighted', zero_division=1)

print("Stratified Random Guessing Baseline Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

print("\nClassification Report:")
print(classification_report(y_val, y_pred_random, zero_division=1))

print("\nConfusion Matrix:")
print(confusion_matrix(y_val, y_pred_random))


Stratified Random Guessing Baseline Performance:
Accuracy: 0.0088
Precision: 0.0102
Recall: 0.0088
F1 Score: 0.0089

Classification Report:
                                precision    recall  f1-score   support

               Alpine Accentor       0.00      0.00      0.00         8
                 Alpine Chough       0.00      0.00      0.00         4
                   Arctic Tern       0.00      0.00      0.00         4
               Atlantic Canary       0.00      0.00      0.00         3
               Baillon's Crake       0.00      0.00      0.00        10
             Bar-tailed Godwit       0.00      0.00      0.00         8
                  Barn Swallow       0.00      0.00      0.00        16
                Barnacle Goose       0.00      0.00      0.00         4
                Barred Warbler       0.00      0.00      0.00         6
              Bearded Reedling       0.00      0.00      0.00         8
                    Black Kite       0.00      0.00      0.00      