In [None]:
# Process datasets using H5DataProcessor
processor = H5DataProcessor(verbose=args.verbose)
all_train_data = []
all_test_data = []
class_names = set()

# Process each dataset and split
for dataset_config in config['datasets']:
    if args.verbose:
        print(f"\nProcessing dataset: {dataset_config['dataset']}")
    
    dataset = processor.process_h5_file(
        processor.get_embedding_path(DatasetConfig(**dataset_config)),
        DatasetConfig(**dataset_config)
    )
    
    # Split the dataset
    train_data, test_data = processor.get_train_test_split(
        dataset, 
        test_ratio=args.test_ratio,
        random_seed=args.random_seed
    )
    
    all_train_data.append(train_data)
    all_test_data.append(test_data)
    class_names.update(dataset.labels)

# Combine datasets
combined_train = ProcessedDataset(
    embeddings=np.vstack([d.embeddings for d in all_train_data]),
    labels=[l for d in all_train_data for l in d.labels],
    filenames=[f for d in all_train_data for f in d.filenames],
    name="combined",
    num_samples=sum(d.num_samples for d in all_train_data)
)

combined_test = ProcessedDataset(
    embeddings=np.vstack([d.embeddings for d in all_test_data]),
    labels=[l for d in all_test_data for l in d.labels],
    filenames=[f for d in all_test_data for f in d.filenames],
    name="combined",
    num_samples=sum(d.num_samples for d in all_test_data)
)

# Create and configure model
model = RatingsClassifier(max_iter=1000, l1_penalty=0.1)
model_config = get_model_config(model)

# Train and evaluate model
conf_matrix, metrics = train_evaluate_model(combined_train, combined_test, model, args.verbose)