In [21]:
from hyper_label_model import HyperLabelModel
hlm = HyperLabelModel()

In [2]:
import numpy as np
import os
from sklearn.metrics import accuracy_score

def one_vs_all_transform(labels, current_class):
    return np.where((labels == current_class) & (labels != -1), 1, 0)

def process_dataset(file_path):
    # Load the dataset
    dataset = np.load(file_path, allow_pickle=True).item()
    data = dataset['data']
    original_labels = dataset['labels']
    num_classes = dataset['k']
    
    all_probs = []
    
    for class_idx in range(num_classes):
        # Create one-vs-all labels
        one_vs_all_labels = one_vs_all_transform(original_labels, class_idx)
        
        # Run inference
        probs = hlm.infer(data, return_probs=True)
        all_probs.append(probs)
    
    # Stack probabilities and get final predictions
    all_probs = np.stack(all_probs, axis=-1)
    final_predictions = np.argmax(all_probs, axis=-1)
    
    # Calculate accuracy
    accuracy = np.mean(final_predictions == original_labels)
    
    return accuracy


In [22]:
def process_dataset(file_path):
    # Load the dataset
    dataset = np.load(file_path, allow_pickle=True).item()
    data = dataset['data']
    original_labels = dataset['labels']
    
    preds = hlm.infer(data) 
    # Calculate accuracy
    accuracy = np.mean(preds == original_labels.squeeze())
    
    return accuracy

In [23]:
# Main execution
data_folder = 'data'
accuracies = []
for filename in os.listdir(data_folder):
    if filename.endswith('.npy'):
        file_path = os.path.join(data_folder, filename)
        dataset_name = os.path.splitext(filename)[0]
        
        accuracy = process_dataset(file_path)
        accuracies.append(accuracy)
        print(f"Dataset: {dataset_name}, Accuracy: {accuracy:.4f}")

print("\nMean Accuracy: ", np.mean(accuracies))

Dataset: artificial-characters_train_dataset, Accuracy: 0.7906
Dataset: csgo_train_dataset, Accuracy: 0.8708
Dataset: eye_movements_train_dataset, Accuracy: 0.7109
Dataset: GesturePhaseSegmentationProcessed_train_dataset, Accuracy: 0.6625
Dataset: hs3_train_dataset, Accuracy: 0.9917
Dataset: mboosting_train_dataset, Accuracy: 0.7817
Dataset: microaggregation2_train_dataset, Accuracy: 0.6293
Dataset: mniste_train_dataset, Accuracy: 0.8521
Dataset: pendigits_train_dataset, Accuracy: 0.9943
Dataset: petfinder_train_dataset, Accuracy: 0.7903
Dataset: tree3k_train_dataset, Accuracy: 0.9465
Dataset: volcanoes-b2_train_dataset, Accuracy: 0.9688

Mean Accuracy:  0.8324486492073054


-----

In [2]:
import numpy as np
import os
from sklearn.metrics import accuracy_score

def one_vs_all_transform(data, current_class):
    # Create a copy of the data to avoid modifying the original
    one_vs_all_data = np.copy(data)
    
    # Transform the data: 1 if element equals current_class, 0 otherwise
    one_vs_all_data = np.where(one_vs_all_data == current_class, 1, 0)
    
    # Don't modify -1 values
    one_vs_all_data[data == -1] = -1
    
    return one_vs_all_data


def process_dataset(file_path):
    # Load the dataset
    dataset = np.load(file_path, allow_pickle=True).item()
    data = dataset['data']
    original_labels = dataset['labels'].astype(np.int64)
    num_classes = dataset['k']
    
    all_probs = []
    
    for class_idx in range(num_classes):
        # Create one-vs-all data
        one_vs_all_data = one_vs_all_transform(data, class_idx)
        
        # Run inference
        probs = hlm.infer(one_vs_all_data, return_probs=True)
        all_probs.append(probs)
    
    # Stack probabilities and get final predictions
    all_probs = np.stack(all_probs, axis=-1)
    final_predictions = np.argmax(all_probs, axis=-1)
    
    # Calculate accuracy
    accuracy = np.mean(final_predictions == original_labels)
    
    return accuracy

In [None]:
'tree3k_train_dataset.npy'

In [3]:
# Main execution
data_folder = 'data'

for filename in os.listdir(data_folder):
    if filename.endswith('.npy'):
        file_path = os.path.join(data_folder, filename)
        dataset_name = os.path.splitext(filename)[0]
        if 'tree3k' in dataset_name:
            print(f"\nProcessing dataset: {dataset_name}")
            accuracy = process_dataset(file_path)
            
            print(f"\nDataset: {dataset_name}, Accuracy: {accuracy:.4f}")
            print("=" * 50)



Processing dataset: tree3k_train_dataset

Dataset: tree3k_train_dataset, Accuracy: 0.4777


In [None]:
hlm.infer(np.)

In [9]:
arr = np.random.randint(-1, 4, size=(20,5))

In [14]:
dataset = np.load('data/tree3k_train_dataset.npy', allow_pickle=True).item()
data = dataset['data']
original_labels = dataset['labels'].astype(np.int64)
num_classes = dataset['k']

In [15]:
data.shape

(14400, 12)

In [19]:
np.mean(hlm.infer(data) == original_labels.squeeze())

0.9465277777777777

In [3]:
import os
import numpy as np

# Main execution
data_folder = 'data'
accuracies = []
for filename in os.listdir(data_folder):
    if filename.endswith('.npy'):
        file_path = os.path.join(data_folder, filename)
        dataset_name = os.path.splitext(filename)[0]
        dataset = np.load(file_path, allow_pickle=True).item()
        data = dataset['data']
        original_labels = dataset['labels']
        print(f'{dataset_name} size: {data.shape[0]}')

artificial-characters_train_dataset size: 8276
csgo_train_dataset size: 689
eye_movements_train_dataset size: 8857
GesturePhaseSegmentationProcessed_train_dataset size: 7996
hs3_train_dataset size: 2885
mboosting_train_dataset size: 35280
microaggregation2_train_dataset size: 16200
mniste_train_dataset size: 35280
pendigits_train_dataset size: 8902
petfinder_train_dataset size: 10794
tree3k_train_dataset size: 14400
volcanoes-b2_train_dataset size: 8640
