In [2]:
import os
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from collections import Counter
from sklearn.model_selection import train_test_split

# ---- Step 1: Load Labels from Dataset ----
def load_labels(hamiltonian_dir, non_hamiltonian_dir):
    hamiltonian_files = [f for f in os.listdir(hamiltonian_dir) if f.endswith('.npy')]
    non_hamiltonian_files = [f for f in os.listdir(non_hamiltonian_dir) if f.endswith('.npy')]
    
    labels = []
    labels.extend([1] * len(hamiltonian_files))  # Hamiltonian = 1
    labels.extend([0] * len(non_hamiltonian_files))  # Non-Hamiltonian = 0
    
    return np.array(labels)

# ---- Step 2: Sample Validation and Testing Datasets ----
def sample_val_test(labels, test_size=500, val_ratio=0.2, seed=42):
    np.random.seed(seed)

    # Split off the test set (fixed size)
    y_val_test, y_test = train_test_split(labels, test_size=test_size, random_state=seed, stratify=labels)
    
    # Split the remaining set into validation
    val_size = int(val_ratio * len(y_val_test))
    y_val, _ = train_test_split(y_val_test, train_size=val_size, random_state=seed, stratify=y_val_test)
    
    print(f"Validation set class distribution: {Counter(y_val)}")
    print(f"Test set class distribution: {Counter(y_test)}")

    return y_val, y_test

# ---- Step 3: Random Classifier ----
def apply_random_classifier(y_val, y_test, seed=42):
    np.random.seed(seed)
    
    # Generate random predictions for validation and test sets
    y_val_pred = np.random.randint(2, size=len(y_val))
    y_test_pred = np.random.randint(2, size=len(y_test))

    # Calculate accuracy and F1 score for validation set
    val_accuracy = accuracy_score(y_val, y_val_pred)
    val_f1 = f1_score(y_val, y_val_pred)
    
    # Calculate accuracy and F1 score for test set
    test_accuracy = accuracy_score(y_test, y_test_pred)
    test_f1 = f1_score(y_test, y_test_pred)

    print(f"Validation Accuracy (Random Classifier): {val_accuracy * 100:.2f}%")
    print(f"Validation F1 Score (Random Classifier): {val_f1:.4f}")
    print(f"Test Accuracy (Random Classifier): {test_accuracy * 100:.2f}%")
    print(f"Test F1 Score (Random Classifier): {test_f1:.4f}")

# ---- Main Workflow ----
hamiltonian_dir = './hamiltonian_small_mat'
non_hamiltonian_dir = './non_hamiltonian_small_mat'

# Load labels
labels = load_labels(hamiltonian_dir, non_hamiltonian_dir)

# Define seeds for multiple random tests
seeds = [3, 7, 11, 13, 23, 29, 37, 43, 53, 61]

for seed in seeds:
    print(f"\nRunning random classifier with seed: {seed}")
    y_val, y_test = sample_val_test(labels, test_size=1000, val_ratio=0.2, seed=seed)

    # Apply Random Classifier and evaluate
    apply_random_classifier(y_val, y_test, seed)



Running random classifier with seed: 3
Validation set class distribution: Counter({np.int64(1): 345, np.int64(0): 278})
Test set class distribution: Counter({np.int64(1): 553, np.int64(0): 447})
Validation Accuracy (Random Classifier): 50.40%
Validation F1 Score (Random Classifier): 0.5149
Test Accuracy (Random Classifier): 48.00%
Test F1 Score (Random Classifier): 0.4951

Running random classifier with seed: 7
Validation set class distribution: Counter({np.int64(1): 345, np.int64(0): 278})
Test set class distribution: Counter({np.int64(1): 553, np.int64(0): 447})
Validation Accuracy (Random Classifier): 51.52%
Validation F1 Score (Random Classifier): 0.5368
Test Accuracy (Random Classifier): 51.80%
Test F1 Score (Random Classifier): 0.5401

Running random classifier with seed: 11
Validation set class distribution: Counter({np.int64(1): 345, np.int64(0): 278})
Test set class distribution: Counter({np.int64(1): 553, np.int64(0): 447})
Validation Accuracy (Random Classifier): 47.99%
Val