In [1]:
import os
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    return accuracy, precision, recall, f1

# Path to the folder containing the CSV files
folder_path = '/home/ben/reef-audio-representation-learning/code/notebooks/embedding_extractor/embeddings'

# Loop through each file in the folder
for filename in os.listdir(folder_path):
    # Check if the file is a CSV file
    if filename.endswith('.csv'):
        # Full path to the file
        filepath = os.path.join(folder_path, filename)
        
        # Read the CSV file into a DataFrame
        df = pd.read_csv(filepath)
        
        # Extract features and labels
        X = df.drop(columns=['Label'])
        y = df['Label']
        
        # Split the data into training and testing sets (80:20 ratio)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
        
        # Initialize and train the Random Forest Classifier
        clf = RandomForestClassifier(random_state=42)
        clf.fit(X_train, y_train)
        
        # Make predictions on test set
        y_pred_test = clf.predict(X_test)
        # Make predictions on training set
        y_pred_train = clf.predict(X_train)
        
        # Calculate metrics for test set
        accuracy_test, precision_test, recall_test, f1_test = calculate_metrics(y_test, y_pred_test)
        # Calculate metrics for training set
        accuracy_train, precision_train, recall_train, f1_train = calculate_metrics(y_train, y_pred_train)
        
        # Print metrics
        print(f"Results for {filename}:")
        print("--- Test Metrics ---")
        print(f"Accuracy: {accuracy_test}")
        print(f"Precision: {precision_test}")
        print(f"Recall: {recall_test}")
        print(f"F1 Score: {f1_test}")
        print("--- Training Metrics ---")
        print(f"Accuracy: {accuracy_train}")
        print(f"Precision: {precision_train}")
        print(f"Recall: {recall_train}")
        print(f"F1 Score: {f1_train}")
        print("-" * 40)

Results for ReefCLR_australia_embeddings.csv:
--- Test Metrics ---
Accuracy: 0.6583333333333333
Precision: 0.6583773270352876
Recall: 0.6583333333333333
F1 Score: 0.658309604833669
--- Training Metrics ---
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
----------------------------------------
Results for ReefCLR_bermuda_embeddings.csv:
--- Test Metrics ---
Accuracy: 0.5454545454545454
Precision: 0.5175936961716269
Recall: 0.5454545454545454
F1 Score: 0.5293324709638116
--- Training Metrics ---
Accuracy: 0.9658848614072495
Precision: 0.9669208553986263
Recall: 0.9658848614072495
F1 Score: 0.9661208660585964
----------------------------------------
Results for ReefCLR__indonesia_embeddings.csv:
--- Test Metrics ---
Accuracy: 0.9675810473815462
Precision: 0.9698801068363995
Recall: 0.9675810473815462
F1 Score: 0.9683992734985941
--- Training Metrics ---
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
----------------------------------------
Results for ReefCLR_kenya_embeddi