In [2]:
import random
from functools import partial

import torch
from num2words import num2words

# from transformer_lens import HookedTransformer
from sae_lens import SAE, ActivationsStore, HookedSAETransformer
from tqdm.autonotebook import tqdm
from transformers.utils.logging import disable_progress_bar

from sae_cooccurrence.utils.set_paths import get_git_root

disable_progress_bar()

  warn(


In [3]:
torch.set_grad_enabled(False)

git_root = get_git_root()

if torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
# from transformer_lens import HookedTransformer

model = HookedSAETransformer.from_pretrained("gemma-2-2b", device=device)

# the cfg dict is returned alongside the SAE since it may contain useful information for analysing the SAE (eg: instantiating an activation store)
# Note that this is not the same as the SAEs config dict, rather it is whatever was in the HF repo, from which we can extract the SAE config dict
# We also return the feature sparsities which are stored in HF for convenience.
sae, cfg_dict, sparsity = SAE.from_pretrained(
    release="gemma-scope-2b-pt-res-canonical",  # <- Release name
    sae_id="layer_12/width_16k/canonical",  # <- SAE id (not always a hook point!)
    device=device,
)

activation_store = ActivationsStore.from_sae(
    model=model,
    sae=sae,
    streaming=True,
    # fairly conservative parameters here so can use same for larger
    # models without running out of memory.
    store_batch_size_prompts=8,
    train_batch_size_tokens=4096,
    n_batches_in_buffer=4,
    device=device,
)



Loaded pretrained model gemma-2-2b into HookedTransformer


Resolving data files:   0%|          | 0/30 [00:00<?, ?it/s]



In [43]:
class TokenQuestionGenerator:
    def __init__(self):
        self.colors = ["black", "white"]
        self.special_cases = {
            "some": "some",
            "all": "none",  # If all tokens are one color, none are the other
        }
        self.sections = ["basic", "numeric", "all", "some"]

    def _number_to_words(self, n: int) -> str:
        """Convert a number to words."""
        return num2words(n)

    def _questions_are_equivalent(self, q1: str, q2: str) -> bool:
        """Compare two questions to check if they are functionally equivalent."""
        # Extract key parts for comparison
        q1_parts = [
            p.lower()
            for p in q1.split()
            if p.lower()
            not in [
                "q:",
                "a:",
                "and",
                "are",
                "of",
                "them",
                "my",
                "tokens",
                "have",
                "how",
                "many",
            ]
        ]
        q2_parts = [
            p.lower()
            for p in q2.split()
            if p.lower()
            not in [
                "q:",
                "a:",
                "and",
                "are",
                "of",
                "them",
                "my",
                "tokens",
                "have",
                "how",
                "many",
            ]
        ]
        return q1_parts == q2_parts

    def generate_numeric_question(
        self, force_complementary: bool = False
    ) -> tuple[str, str]:
        """Generate a question with numeric values."""
        n_total = random.randint(3, 10)  # Minimum 3 tokens for more interesting cases
        if force_complementary:
            # Generate numbers that sum to interesting complements
            n_color_tokens = random.randint(
                1, n_total - 1
            )  # Ensure at least 1 token of each color
        else:
            n_color_tokens = random.randint(
                1, n_total - 1
            )  # Modified to never generate 0 cases

        colors = random.sample(self.colors, 2)
        held_color, test_color = colors

        question = (
            f"Q: I have {self._number_to_words(n_total)} tokens, and "
            f"{self._number_to_words(n_color_tokens)} of them are {held_color}. "
            f"How many of my tokens are {test_color}?"
        )
        answer = f"A: {self._number_to_words(n_total - n_color_tokens)} of them are {test_color}"
        return question, answer

    def generate_all_question(self) -> tuple[str, str]:
        """Generate a question using 'all of'."""
        colors = random.sample(self.colors, 2)
        held_color, test_color = colors
        n_total = random.randint(3, 10)

        question = (
            f"Q: I have {self._number_to_words(n_total)} tokens, and "
            f"all of them are {held_color}. "
            f"How many of my tokens are {test_color}?"
        )
        answer = f"A: none of them are {test_color}"
        return question, answer

    def generate_some_question(self) -> tuple[str, str]:
        """Generate a question using 'some of'."""
        colors = random.sample(self.colors, 2)
        held_color, test_color = colors
        n_total = random.randint(3, 10)

        question = (
            f"Q: I have {self._number_to_words(n_total)} tokens, and "
            f"some of them are {held_color}. "
            f"How many of my tokens are {test_color}?"
        )
        answer = f"A: some of them are {test_color}"
        return question, answer

    def generate_test_question(
        self, 
        force_numeric: bool | None = None, 
        force_special: bool | None = None,
        force_all: bool | None = None,  # New parameter
        force_some: bool | None = None,  # New parameter
    ) -> tuple[str, str]:
        """Generate a test question."""
        # Check for conflicting parameters
        if sum(bool(x) for x in [force_numeric, force_special, force_all, force_some]) > 1:
            raise ValueError("Can only force one type of question")

        if force_numeric:
            return self.generate_numeric_question(force_complementary=True)
        elif force_all:
            return self.generate_all_question()
        elif force_some:
            return self.generate_some_question()
        elif force_special:
            # Randomly choose between 'all' and 'some' for special cases
            return random.choice(
                [self.generate_all_question, self.generate_some_question]
            )()
        else:
            generators = [
                self.generate_numeric_question,
                self.generate_all_question,
                self.generate_some_question,
            ]
            return random.choice(generators)()

    def generate_training_set(self, section_counts: dict) -> list[tuple[str, str, str]]:
        """Generate a structured training set with sections."""
        training_set = []

        # Basic cases (all)
        for _ in range(section_counts.get("basic", 0)):
            q, a = self.generate_all_question()
            training_set.append(("# Basic cases - all/none", q, a))

        # Numeric cases with specific complementary numbers
        for _ in range(section_counts.get("numeric", 0)):
            q, a = self.generate_numeric_question(force_complementary=True)
            training_set.append(("# Cases with specific numbers", q, a))

        # All cases
        for _ in range(section_counts.get("all", 0)):
            q, a = self.generate_all_question()
            training_set.append(("# Cases with 'all of'", q, a))

        # Some cases
        for _ in range(section_counts.get("some", 0)):
            q, a = self.generate_some_question()
            training_set.append(("# Cases with 'some of'", q, a))

        return training_set


def generate_training_and_test(
    section_counts: dict = {
        "basic": 2,  # all/none cases
        "numeric": 3,  # specific number cases
        "all": 2,  # 'all of' cases
        "some": 2,  # 'some of' cases
    },
    force_test_type: str | None = None,
    max_attempts: int = 100,
) -> dict:
    """Generate a structured training set and test question."""
    generator = TokenQuestionGenerator()

    # Generate test question based on force_test_type
    force_numeric = force_test_type == "numeric"
    force_all = force_test_type == "all"
    force_some = force_test_type == "some"
    force_special = force_test_type == "special"
    
    test_question, test_answer = generator.generate_test_question(
        force_numeric=force_numeric,
        force_special=force_special,
        force_all=force_all,
        force_some=force_some,
    )

    # Generate training set with sections
    training_set = []
    attempts = 0

    while len(training_set) < sum(section_counts.values()) and attempts < max_attempts:
        current_set = generator.generate_training_set(section_counts)

        # Filter out any questions that match the test question
        filtered_set = [
            (section, q, a)
            for section, q, a in current_set
            if not generator._questions_are_equivalent(q, test_question)
        ]

        if len(filtered_set) == sum(section_counts.values()):
            training_set = filtered_set
            break

        attempts += 1

    if attempts >= max_attempts:
        raise RuntimeError(
            "Failed to generate unique training set after maximum attempts"
        )

    # Format the introduction with explicit instructions
    introduction = """
Tokens can be either black or white. Complete the following sentences using number words (one, two, three, etc.) never digits (1, 2, 3, etc.).
Remember:
- The total number of tokens equals the sum of black and white tokens
- Always write numbers as words (e.g., 'two' not '2')
- When all tokens are one color, none are the other color
- When some tokens are one color, some are also the other color

For example, this is correct: 
Q: I have ten tokens, and five of them are black. How many of my tokens are white?
A: five of them are white

Whereas this is incorrect:
Q: I have ten tokens, and five of them are black. How many of my tokens are white?
A: 5 of them are white
"""

    return {
        "introduction": introduction.strip(),
        "training_questions": training_set,
        "test_question": test_question,
        "test_answer": test_answer,
    }


# Example usage
if __name__ == "__main__":
    # Custom section counts
    section_counts = {
        "basic": 2,  # all/none cases
        "numeric": 3,  # specific number cases
        "all": 2,  # 'all of' cases
        "some": 2,  # 'some of' cases
    }

    result = generate_training_and_test(
        section_counts=section_counts,
        force_test_type="numeric",  # Can be 'numeric', 'all', 'some', or None
    )

    print(result["introduction"])
    print("\nTraining Questions:")
    for section, q, a in result["training_questions"]:
        if section:
            print(f"\n{section}")
        print(f"{q}\n{a}\n")
    print("\nTest Question:")
    print(result["test_question"])
    print("\nExpected Answer:")
    print(result["test_answer"])

Tokens can be either black or white. Complete the following sentences using number words (one, two, three, etc.) never digits (1, 2, 3, etc.).
Remember:
- The total number of tokens equals the sum of black and white tokens
- Always write numbers as words (e.g., 'two' not '2')
- When all tokens are one color, none are the other color
- When some tokens are one color, some are also the other color

For example, this is correct: 
Q: I have ten tokens, and five of them are black. How many of my tokens are white?
A: five of them are white

Whereas this is incorrect:
Q: I have ten tokens, and five of them are black. How many of my tokens are white?
A: 5 of them are white

Training Questions:

# Basic cases - all/none
Q: I have nine tokens, and all of them are black. How many of my tokens are white?
A: none of them are white


# Basic cases - all/none
Q: I have ten tokens, and all of them are black. How many of my tokens are white?
A: none of them are white


# Cases with specific numbers
Q: 

In [6]:
def find_max_activation(model, sae, activation_store, feature_idx, num_batches=100):
    """
    Find the maximum activation for a given feature index. This is useful for
    calibrating the right amount of the feature to add.
    """
    max_activation = 0.0

    pbar = tqdm(range(num_batches))
    for _ in pbar:
        tokens = activation_store.get_batch_tokens()

        _, cache = model.run_with_cache(
            tokens,
            stop_at_layer=sae.cfg.hook_layer + 1,
            names_filter=[sae.cfg.hook_name],
        )
        sae_in = cache[sae.cfg.hook_name]
        feature_acts = sae.encode(sae_in).squeeze()

        feature_acts = feature_acts.flatten(0, 1)
        batch_max_activation = feature_acts[:, feature_idx].max().item()
        max_activation = max(max_activation, batch_max_activation)

        pbar.set_description(f"Max activation: {max_activation:.4f}")

    return max_activation


def steering(
    activations, steering_strength=1.0, steering_vector=None, max_act=1.0, hook=None
):  # noqa: ARG001
    """
    Apply steering to activations.

    Args:
        activations: Input activations
        steering_strength: Strength of steering effect
        steering_vector: Vector to steer towards
        max_act: Maximum activation value
        hook: Hook object (ignored but needed for compatibility)

    Returns:
        Modified activations
    """
    # Note if the feature fires anyway, we'd be adding to that here.
    return activations + max_act * steering_strength * steering_vector


def generate_with_steering(
    model,
    sae,
    prompt,
    steering_feature,
    max_act,
    steering_strength=1.0,
    max_new_tokens=95,
):
    input_ids = model.to_tokens(prompt, prepend_bos=sae.cfg.prepend_bos)

    steering_vector = sae.W_dec[steering_feature].to(model.cfg.device)

    steering_hook = partial(
        steering,
        steering_vector=steering_vector,
        steering_strength=steering_strength,
        max_act=max_act,
    )

    # standard transformerlens syntax for a hook context for generation
    with model.hooks(fwd_hooks=[(sae.cfg.hook_name, steering_hook)]):
        output = model.generate(
            input_ids,
            max_new_tokens=max_new_tokens,
            temperature=0.7,
            top_p=0.9,
            stop_at_eos=False if device == "mps" else True,
            prepend_bos=sae.cfg.prepend_bos,
        )

    return model.tokenizer.decode(output[0])

In [7]:
def evaluate_model_with_steering(
    model: HookedSAETransformer,
    sae: SAE,
    feature_to_steer: int | None = None,
    steering_strength: float = 1.0,
    max_act: float = 60.0,
    n_numeric: int = 10,
    n_special: int = 10,
    max_new_tokens: int = 5,
) -> dict:
    """
    Test model accuracy on token counting tasks with optional feature steering.

    Args:
        model: The transformer model
        sae: The sparse autoencoder
        feature_to_steer: Feature index to steer, or None for no steering
        steering_strength: Strength of steering (default 1.0)
        max_act: Maximum activation for the steered feature
        n_numeric: Number of numeric test questions
        n_special: Number of special test questions
        max_new_tokens: Maximum tokens to generate for each answer

    Returns:
        Dictionary containing accuracy metrics and test results
    """
    results = {
        "numeric_correct": 0,
        "special_correct": 0,
        "numeric_tests": [],
        "special_tests": [],
    }

    for test_type in ["numeric", "special"]:
        n_tests = n_numeric if test_type == "numeric" else n_special
        for _ in range(n_tests):
            test_data = generate_training_and_test(
                num_training_numeric=40,
                num_training_special=20,
                force_test_type=test_type,
            )

            prompt = (
                test_data["introduction"]
                + "\n\n"
                + "\n\n".join(
                    f"{q}\n{a}\n{label}"
                    for q, a, label in test_data["training_questions"]
                )
                + f"\n\n{test_data['test_question']}\nA: "
            )

            if feature_to_steer is not None:
                generated = generate_with_steering(
                    model,
                    sae,
                    prompt,
                    feature_to_steer,
                    max_act,
                    steering_strength=steering_strength,
                    max_new_tokens=max_new_tokens,
                )
            else:
                # Convert prompt to tokens first
                input_ids = model.to_tokens(prompt, prepend_bos=sae.cfg.prepend_bos)
                output = model.generate(
                    input_ids,
                    max_new_tokens=max_new_tokens,
                    # temperature=0.7,
                    # top_p=0.9,
                    stop_at_eos=False if device == "mps" else True,
                    prepend_bos=sae.cfg.prepend_bos,
                )
                generated = model.tokenizer.decode(output[0])

            test_result = {
                "prompt": prompt,
                "expected": test_data["test_answer"],
                "generated": generated,
                "correct": test_data["test_answer"] in generated,
            }

            if test_type == "numeric":
                results["numeric_tests"].append(test_result)
                if test_result["correct"]:
                    results["numeric_correct"] += 1
            else:
                results["special_tests"].append(test_result)
                if test_result["correct"]:
                    results["special_correct"] += 1

    # Calculate accuracies
    results["numeric_accuracy"] = results["numeric_correct"] / n_numeric
    results["special_accuracy"] = results["special_correct"] / n_special
    results["total_accuracy"] = (
        results["numeric_correct"] + results["special_correct"]
    ) / (n_numeric + n_special)

    return results


# # Example usage:
# if __name__ == "__main__":
#     # Test without steering
#     normal_results = evaluate_model_with_steering(model, sae)
#     print("\nResults without steering:")
#     print(f"Numeric accuracy: {normal_results['numeric_accuracy']:.2%}")
#     print(f"Special accuracy: {normal_results['special_accuracy']:.2%}")
#     print(f"Total accuracy: {normal_results['total_accuracy']:.2%}")

#     print("\nNumeric test results:")
#     for i, test in enumerate(normal_results["numeric_tests"], 1):
#         print(f"\nTest {i}:")
#         question = test["prompt"].split("A: ")[0].splitlines()[-1]
#         print(f"Question: {question}")
#         print(f"Expected: {test['expected']}")
#         print(f"Generated: {test['generated']}")
#         print(f"Correct: {test['correct']}")

#     print("\nSpecial test results:")
#     for i, test in enumerate(normal_results["special_tests"], 1):
#         print(f"\nTest {i}:")
#         question = test["prompt"].split("A: ")[0].splitlines()[-1]
#         print(f"Question: {question}")
#         print(f"Expected: {test['expected']}")
#         print(f"Generated: {test['generated']}")
#         print(f"Correct: {test['correct']}")

#     # Test with feature steering
#     feature_to_steer = 12257  # Replace with your feature of interest
#     steering_strength = 0.0
#     steered_results = evaluate_model_with_steering(
#         model,
#         sae,
#         feature_to_steer=feature_to_steer,
#         steering_strength=steering_strength,
#     )
#     print(
#         f"\nResults with feature {feature_to_steer} steered (strength {steering_strength}):"
#     )
#     print(f"Numeric accuracy: {steered_results['numeric_accuracy']:.2%}")
#     print(f"Special accuracy: {steered_results['special_accuracy']:.2%}")
#     print(f"Total accuracy: {steered_results['total_accuracy']:.2%}")

#     print("\nNumeric test results (with steering):")
#     for i, test in enumerate(steered_results["numeric_tests"], 1):
#         print(f"\nTest {i}:")
#         question = test["prompt"].split("A: ")[0].splitlines()[-1]
#         print(f"Question: {question}")
#         print(f"Expected: {test['expected']}")
#         print(f"Generated: {test['generated']}")
#         print(f"Correct: {test['correct']}")

#     print("\nSpecial test results (with steering):")
#     for i, test in enumerate(steered_results["special_tests"], 1):
#         print(f"\nTest {i}:")
#         question = test["prompt"].split("A: ")[0].splitlines()[-1]
#         print(f"Question: {question}")
#         print(f"Expected: {test['expected']}")
#         print(f"Generated: {test['generated']}")
#         print(f"Correct: {test['correct']}")

#     # Test with feature steering
#     feature_to_steer = 15441  # Replace with your feature of interest
#     steering_strength = 0.0
#     steered_results = evaluate_model_with_steering(
#         model,
#         sae,
#         feature_to_steer=feature_to_steer,
#         steering_strength=steering_strength,
#     )
#     print(
#         f"\nResults with feature {feature_to_steer} steered (strength {steering_strength}):"
#     )
#     print(f"Numeric accuracy: {steered_results['numeric_accuracy']:.2%}")
#     print(f"Special accuracy: {steered_results['special_accuracy']:.2%}")
#     print(f"Total accuracy: {steered_results['total_accuracy']:.2%}")

#     print("\nNumeric test results (with steering):")
#     for i, test in enumerate(steered_results["numeric_tests"], 1):
#         print(f"\nTest {i}:")
#         question = test["prompt"].split("A: ")[0].splitlines()[-1]
#         print(f"Question: {question}")
#         print(f"Expected: {test['expected']}")
#         print(f"Generated: {test['generated']}")
#         print(f"Correct: {test['correct']}")

#     print("\nSpecial test results (with steering):")
#     for i, test in enumerate(steered_results["special_tests"], 1):
#         print(f"\nTest {i}:")
#         question = test["prompt"].split("A: ")[0].splitlines()[-1]
#         print(f"Question: {question}")
#         print(f"Expected: {test['expected']}")
#         print(f"Generated: {test['generated']}")
#         print(f"Correct: {test['correct']}")

In [26]:
def evaluate_model_with_intervention(
    model: HookedSAETransformer,
    sae: SAE,
    feature_ids: int | list[int] | None = None,
    intervention_type: str = "none",  # "none", "steering", or "ablation"
    steering_strength: float = 1.0,
    max_act: float = 60.0,
    n_numeric: int = 10,
    n_all: int = 5,  # Number of 'all of' test questions
    n_some: int = 5,  # Number of 'some of' test questions
    max_new_tokens: int = 6,
    temperature: float = 0.0,
    top_p: float = 0.9,
    section_counts: dict | None = None,
) -> dict:
    """
    Test model accuracy on token counting tasks with optional feature steering or ablation.
    Allows separate control over 'all of' and 'some of' test questions.
    """
    if section_counts is None:
        section_counts = {
            "basic": 2,  # all/none cases
            "numeric": 3,  # specific number cases
            "all": 2,  # 'all of' cases
            "some": 2,  # 'some of' cases
        }

    results = {
        "numeric_correct_strict": 0,
        "numeric_correct_lenient": 0,
        "all_correct_strict": 0,
        "all_correct_lenient": 0,
        "some_correct_strict": 0,
        "some_correct_lenient": 0,
        "numeric_tests": [],
        "all_tests": [],
        "some_tests": [],
    }

    def convert_words_to_digits(text: str) -> str:
        """Convert number words to digits in the text."""
        word_to_digit = {
            "zero": "0",
            "none": "0",
            "one": "1",
            "two": "2",
            "three": "3",
            "four": "4",
            "five": "5",
            "six": "6",
            "seven": "7",
            "eight": "8",
            "nine": "9",
            "ten": "10",
        }
        for word, digit in word_to_digit.items():
            text = text.replace(word, digit)
        return text

    def check_answer_lenient(generated: str, expected: str, question: str) -> bool:
        """Check if answer is correct under lenient scoring rules."""
        generated = generated.lower()
        expected = expected.lower()

        generated_digits = convert_words_to_digits(generated)
        expected_digits = convert_words_to_digits(expected)

        if expected_digits in generated_digits:
            return True

        if "all of them are" in expected:
            question_words = question.lower().split()
            try:
                have_idx = question_words.index("have")
                total_tokens = convert_words_to_digits(question_words[have_idx + 1])
                return (
                    total_tokens in generated_digits
                    or f"{total_tokens} of them" in generated_digits
                )
            except (ValueError, IndexError):
                return False

        if "none of them are" in expected:
            return (
                "0 of them are" in generated_digits
                or "zero of them are" in generated_digits
            )

        return False

    if isinstance(feature_ids, int):
        feature_ids = [feature_ids]

    def ablate_feature_hook(feature_activations, hook=None, feature_ids=None):  # noqa: ARG001
        feature_activations[:, :, feature_ids] = 0
        return feature_activations

    # Define test types and their counts
    test_types = {"numeric": n_numeric, "all": n_all, "some": n_some}

    for test_type, n_tests in tqdm(test_types.items(), desc="Testing types"):
        for _ in tqdm(range(n_tests), desc=f"Testing {test_type}"):
            test_data = generate_training_and_test(
                section_counts=section_counts,
                force_test_type=test_type,
            )

            prompt_parts = [test_data["introduction"]]

            sections = {}
            for section, q, a in test_data["training_questions"]:
                if section not in sections:
                    sections[section] = []
                sections[section].append((q, a))

            for section, questions in sections.items():
                prompt_parts.append(f"\n\n{section}")
                for q, a in questions:
                    prompt_parts.append(f"{q}\n{a}")

            prompt_parts.append(f"\n\n{test_data['test_question']}\nA: ")

            prompt = "\n\n".join(prompt_parts)
            input_ids = model.to_tokens(prompt, prepend_bos=sae.cfg.prepend_bos)

            # Handle different intervention types
            if intervention_type == "none" or feature_ids is None:
                output = model.generate(
                    input_ids,
                    max_new_tokens=max_new_tokens,
                    temperature=temperature,
                    top_p=top_p,
                    stop_at_eos=False if device == "mps" else True,
                    prepend_bos=sae.cfg.prepend_bos,
                )
            elif intervention_type == "steering":
                steering_vector = sae.W_dec[feature_ids[0]].to(model.cfg.device)
                steering_hook = partial(
                    steering,
                    steering_vector=steering_vector,
                    steering_strength=steering_strength,
                    max_act=max_act,
                )

                with model.hooks(fwd_hooks=[(sae.cfg.hook_name, steering_hook)]):
                    output = model.generate(
                        input_ids,
                        max_new_tokens=max_new_tokens,
                        temperature=temperature,
                        top_p=top_p,
                        stop_at_eos=False if device == "mps" else True,
                        prepend_bos=sae.cfg.prepend_bos,
                    )
            elif intervention_type == "ablation":
                ablation_hook = partial(ablate_feature_hook, feature_ids=feature_ids)
                model.add_sae(sae)
                hook_point = sae.cfg.hook_name + ".hook_sae_acts_post"

                with model.hooks(fwd_hooks=[(hook_point, ablation_hook)]):
                    output = model.generate(
                        input_ids,
                        max_new_tokens=max_new_tokens,
                        top_p=top_p,
                        temperature=temperature,
                        stop_at_eos=False if device == "mps" else True,
                        prepend_bos=sae.cfg.prepend_bos,
                    )

                model.reset_hooks()
                model.reset_saes()

            generated = model.tokenizer.decode(output[0])
            generated_answer = generated.split("A: ")[-1].strip()
            expected_answer = test_data["test_answer"].replace("A: ", "").strip()
            test_question = test_data["test_question"].split("\n")[0].strip()

            # Create test result
            test_result = {
                "prompt": prompt,
                "expected": expected_answer,
                "generated": generated_answer,
                "correct_strict": expected_answer in generated_answer,
                "correct_lenient": check_answer_lenient(
                    generated_answer, expected_answer, test_question
                ),
                "section_counts": section_counts,
            }

            # Track type-specific results
            results[f"{test_type}_tests"].append(test_result)
            if test_result["correct_strict"]:
                results[f"{test_type}_correct_strict"] += 1
            if test_result["correct_lenient"]:
                results[f"{test_type}_correct_lenient"] += 1

    # Calculate accuracies for each type
    for test_type, n_tests in test_types.items():
        if n_tests > 0:
            results[f"{test_type}_accuracy_strict"] = (
                results[f"{test_type}_correct_strict"] / n_tests
            )
            results[f"{test_type}_accuracy_lenient"] = (
                results[f"{test_type}_correct_lenient"] / n_tests
            )
        else:
            results[f"{test_type}_accuracy_strict"] = None
            results[f"{test_type}_accuracy_lenient"] = None

    # Calculate total accuracies
    total_tests = n_numeric + n_all + n_some
    results["total_accuracy_strict"] = (
        results["numeric_correct_strict"]
        + results["all_correct_strict"]
        + results["some_correct_strict"]
    ) / total_tests
    results["total_accuracy_lenient"] = (
        results["numeric_correct_lenient"]
        + results["all_correct_lenient"]
        + results["some_correct_lenient"]
    ) / total_tests

    # Add configuration to results
    results["section_counts"] = section_counts
    results["test_counts"] = {"numeric": n_numeric, "all": n_all, "some": n_some}

    return results

In [44]:
normal_results = evaluate_model_with_intervention(
    model,
    sae,
    intervention_type="none",
    n_numeric=0,
    n_all=10,
    n_some=0,
    section_counts={"basic": 0, "numeric": 10, "some": 10, "all": 10},
)

Testing types:   0%|          | 0/3 [00:00<?, ?it/s]

Testing numeric: 0it [00:00, ?it/s]

Testing all:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing some: 0it [00:00, ?it/s]

In [45]:
normal_results["all_tests"]

[{'prompt': "Tokens can be either black or white. Complete the following sentences using number words (one, two, three, etc.) never digits (1, 2, 3, etc.).\nRemember:\n- The total number of tokens equals the sum of black and white tokens\n- Always write numbers as words (e.g., 'two' not '2')\n- When all tokens are one color, none are the other color\n- When some tokens are one color, some are also the other color\n\nFor example, this is correct: \nQ: I have ten tokens, and five of them are black. How many of my tokens are white?\nA: five of them are white\n\nWhereas this is incorrect:\nQ: I have ten tokens, and five of them are black. How many of my tokens are white?\nA: 5 of them are white\n\n\n\n# Cases with specific numbers\n\nQ: I have four tokens, and two of them are white. How many of my tokens are black?\nA: two of them are black\n\nQ: I have seven tokens, and five of them are black. How many of my tokens are white?\nA: two of them are white\n\nQ: I have nine tokens, and seven o

In [11]:
ablated_results_hub = evaluate_model_with_intervention(
    model,
    sae,
    feature_ids=[2283, 8084, 13772],  # Can ablate multiple features
    intervention_type="ablation",
    n_numeric=1,
    n_special=1,
)
ablated_results_hub["special_tests"]

Testing types:   0%|          | 0/2 [00:00<?, ?it/s]

Testing numeric:   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing special:   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

[{'prompt': "Tokens can be either black or white. Complete the following sentences using number words (one, two, three, etc.) never digits (1, 2, 3, etc.).\nRemember:\n- For 0, use 'zero' not '0'\n- The total number of tokens equals the sum of black and white tokens\n- Always write numbers as words (e.g., 'two' not '2')\n\nFor example, this is correct: \nQ: I have ten tokens, and five of them are black. How many of my tokens are white?\nA: five of them are white\n\nWhereas this is incorrect:\nQ: I have ten tokens, and five of them are black. How many of my tokens are white?\nA: 5 of them are white\n\n\n\n# Basic cases - all/none\n\nQ: I have six tokens, and all of them are black. How many of my tokens are white?\nA: none of them are white\n\nQ: I have six tokens, and all of them are white. How many of my tokens are black?\nA: none of them are black\n\n\n\n# Cases with specific numbers\n\nQ: I have five tokens, and four of them are white. How many of my tokens are black?\nA: one of them

In [12]:
steering_results_hub = evaluate_model_with_intervention(
    model,
    sae,
    feature_ids=[12257, 12649],  # Can ablate multiple features
    intervention_type="steering",
    steering_strength=-1.5,
    n_numeric=1,
    n_special=1,
)

Testing types:   0%|          | 0/2 [00:00<?, ?it/s]

Testing numeric:   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing special:   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

In [13]:
steering_results_hub["special_tests"]


[{'prompt': "Tokens can be either black or white. Complete the following sentences using number words (one, two, three, etc.) never digits (1, 2, 3, etc.).\nRemember:\n- For 0, use 'zero' not '0'\n- The total number of tokens equals the sum of black and white tokens\n- Always write numbers as words (e.g., 'two' not '2')\n\nFor example, this is correct: \nQ: I have ten tokens, and five of them are black. How many of my tokens are white?\nA: five of them are white\n\nWhereas this is incorrect:\nQ: I have ten tokens, and five of them are black. How many of my tokens are white?\nA: 5 of them are white\n\n\n\n# Basic cases - all/none\n\nQ: I have ten tokens, and none of them are white. How many of my tokens are black?\nA: all of them are black\n\nQ: I have eight tokens, and none of them are white. How many of my tokens are black?\nA: all of them are black\n\n\n\n# Cases with specific numbers\n\nQ: I have eight tokens, and one of them are white. How many of my tokens are black?\nA: seven of 

In [47]:
# Test without intervention
normal_results = evaluate_model_with_intervention(
    model,
    sae,
    intervention_type="none",
    section_counts={"basic": 20, "numeric": 20, "some": 20, "all": 20},
    n_numeric=50,
    n_all=50,
    n_some=50,
)

# # Test with steering
# steered_results = evaluate_model_with_intervention(
#     model,
#     sae,
#     feature_ids=12257,
#     intervention_type="steering",
#     steering_strength=1.0,
# )

Testing types:   0%|          | 0/3 [00:00<?, ?it/s]

Testing numeric:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing all:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing some:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

In [49]:
print(normal_results["numeric_accuracy_strict"])
print(normal_results["numeric_accuracy_lenient"])
# print(normal_results["special_accuracy_strict"])
# print(normal_results["special_accuracy_lenient"])
print(normal_results["all_accuracy_strict"])
print(normal_results["all_accuracy_lenient"])
print(normal_results["some_accuracy_strict"])
print(normal_results["some_accuracy_lenient"])
# Test with ablation
normal_results["all_tests"]

0.0
0.74
0.0
0.16
0.04
0.04


[{'prompt': "Tokens can be either black or white. Complete the following sentences using number words (one, two, three, etc.) never digits (1, 2, 3, etc.).\nRemember:\n- The total number of tokens equals the sum of black and white tokens\n- Always write numbers as words (e.g., 'two' not '2')\n- When all tokens are one color, none are the other color\n- When some tokens are one color, some are also the other color\n\nFor example, this is correct: \nQ: I have ten tokens, and five of them are black. How many of my tokens are white?\nA: five of them are white\n\nWhereas this is incorrect:\nQ: I have ten tokens, and five of them are black. How many of my tokens are white?\nA: 5 of them are white\n\n\n\n# Basic cases - all/none\n\nQ: I have ten tokens, and all of them are white. How many of my tokens are black?\nA: none of them are black\n\nQ: I have five tokens, and all of them are white. How many of my tokens are black?\nA: none of them are black\n\nQ: I have ten tokens, and all of them ar

In [21]:
normal_results["numeric_tests"]

[{'prompt': "Tokens can be either black or white. Complete the following sentences using number words (one, two, three, etc.) never digits (1, 2, 3, etc.).\nRemember:\n- For 0, use 'zero' not '0'\n- The total number of tokens equals the sum of black and white tokens\n- Always write numbers as words (e.g., 'two' not '2')\n\nFor example, this is correct: \nQ: I have ten tokens, and five of them are black. How many of my tokens are white?\nA: five of them are white\n\nWhereas this is incorrect:\nQ: I have ten tokens, and five of them are black. How many of my tokens are white?\nA: 5 of them are white\n\n\n\n# Basic cases - all/none\n\nQ: I have nine tokens, and some of them are black. How many of my tokens are white?\nA: some of them are white\n\nQ: I have nine tokens, and all of them are white. How many of my tokens are black?\nA: none of them are black\n\nQ: I have eight tokens, and none of them are white. How many of my tokens are black?\nA: all of them are black\n\nQ: I have five toke

In [17]:
normal_results["special_tests"]

[{'prompt': "Tokens can be either black or white. Complete the following sentences using number words (one, two, three, etc.) never digits (1, 2, 3, etc.).\nRemember:\n- For 0, use 'zero' not '0'\n- The total number of tokens equals the sum of black and white tokens\n- Always write numbers as words (e.g., 'two' not '2')\n\nFor example, this is correct: \nQ: I have ten tokens, and five of them are black. How many of my tokens are white?\nA: five of them are white\n\nWhereas this is incorrect:\nQ: I have ten tokens, and five of them are black. How many of my tokens are white?\nA: 5 of them are white\n\n\n\n# Basic cases - all/none\n\nQ: I have nine tokens, and some of them are white. How many of my tokens are black?\nA: some of them are black\n\nQ: I have eight tokens, and none of them are white. How many of my tokens are black?\nA: all of them are black\n\nQ: I have seven tokens, and none of them are white. How many of my tokens are black?\nA: all of them are black\n\nQ: I have three to

In [52]:
# Test with ablation
ablated_results_hub = evaluate_model_with_intervention(
    model,
    sae,
    feature_ids=[12257],  # Can ablate multiple features
    intervention_type="ablation",
    section_counts={"basic": 20, "numeric": 20, "some": 20, "all": 20},
    n_numeric=50,
    n_all=50,
    n_some=50,
)

ablated_results_some = evaluate_model_with_intervention(
    model,
    sae,
    feature_ids=[15441],  # Can ablate multiple features
    intervention_type="ablation",
    section_counts={"basic": 20, "numeric": 20, "some": 20, "all": 20},
    n_numeric=50,
    n_all=50,
    n_some=50,
)

ablated_results_all = evaluate_model_with_intervention(
    model,
    sae,
    feature_ids=[12649],  # Can ablate multiple features
    intervention_type="ablation",
    section_counts={"basic": 20, "numeric": 20, "some": 20, "all": 20},
    n_numeric=50,
    n_all=50,
    n_some=50,
)

ablation_results_spokes = evaluate_model_with_intervention(
    model,
    sae,
    feature_ids=[12649, 15441],  # Can ablate multiple features
    intervention_type="ablation",
    section_counts={"basic": 20, "numeric": 20, "some": 20, "all": 20},
    n_numeric=50,
    n_all=50,
    n_some=50,
)

ablation_results_hub_spoke_some = evaluate_model_with_intervention(
    model,
    sae,
    feature_ids=[12257, 15441],  # Can ablate multiple features
    intervention_type="ablation",
    section_counts={"basic": 20, "numeric": 20, "some": 20, "all": 20},
    n_numeric=50,
    n_all=50,
    n_some=50,
)

ablated_results_hub_spoke_all = evaluate_model_with_intervention(
    model,
    sae,
    feature_ids=[12257, 12649],  # Can ablate multiple features
    intervention_type="ablation",
    section_counts={"basic": 20, "numeric": 20, "some": 20, "all": 20},
    n_numeric=50,
    n_all=50,
    n_some=50,
)

Testing types:   0%|          | 0/3 [00:00<?, ?it/s]

Testing numeric:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing all:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing some:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing types:   0%|          | 0/3 [00:00<?, ?it/s]

Testing numeric:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing all:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing some:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing types:   0%|          | 0/3 [00:00<?, ?it/s]

Testing numeric:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing all:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing some:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing types:   0%|          | 0/3 [00:00<?, ?it/s]

Testing numeric:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing all:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing some:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing types:   0%|          | 0/3 [00:00<?, ?it/s]

Testing numeric:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing all:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing some:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing types:   0%|          | 0/3 [00:00<?, ?it/s]

Testing numeric:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing all:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Testing some:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Compare the effect of ablation of the hub alone both spokes alone hub and both spokes together and both spokes together I hope hub and spokes will be more effective than any of these 

In [53]:
print(normal_results["numeric_accuracy_strict"])
print(normal_results["numeric_accuracy_lenient"])
# print(normal_results["special_accuracy_strict"])
# print(normal_results["special_accuracy_lenient"])
print(normal_results["all_accuracy_strict"])
print(normal_results["all_accuracy_lenient"])
print(normal_results["some_accuracy_strict"])
print(normal_results["some_accuracy_lenient"])


0.0
0.74
0.0
0.16
0.04
0.04


In [54]:
print(ablated_results_hub["numeric_accuracy_strict"])
print(ablated_results_hub["numeric_accuracy_lenient"])
print(ablated_results_hub["all_accuracy_strict"])
print(ablated_results_hub["all_accuracy_lenient"])
print(ablated_results_hub["some_accuracy_strict"])
print(ablated_results_hub["some_accuracy_lenient"])

0.0
0.88
0.0
0.24
0.02
0.02


In [55]:
print(ablated_results_some["numeric_accuracy_strict"])
print(ablated_results_some["numeric_accuracy_lenient"])
print(ablated_results_some["all_accuracy_strict"])
print(ablated_results_some["all_accuracy_lenient"])
print(ablated_results_some["some_accuracy_strict"])
print(ablated_results_some["some_accuracy_lenient"])

0.0
0.82
0.0
0.32
0.02
0.02


In [56]:
print(ablated_results_all["numeric_accuracy_strict"])
print(ablated_results_all["numeric_accuracy_lenient"])
print(ablated_results_all["all_accuracy_strict"])
print(ablated_results_all["all_accuracy_lenient"])
print(ablated_results_all["some_accuracy_strict"])
print(ablated_results_all["some_accuracy_lenient"])


0.0
0.82
0.0
0.22
0.0
0.0


In [57]:
print(ablation_results_spokes["numeric_accuracy_strict"])
print(ablation_results_spokes["numeric_accuracy_lenient"])
print(ablation_results_spokes["all_accuracy_strict"])
print(ablation_results_spokes["all_accuracy_lenient"])
print(ablation_results_spokes["some_accuracy_strict"])
print(ablation_results_spokes["some_accuracy_lenient"])


0.0
0.82
0.0
0.24
0.02
0.02


In [58]:
print(ablation_results_hub_spoke_some["numeric_accuracy_strict"])
print(ablation_results_hub_spoke_some["numeric_accuracy_lenient"])
print(ablation_results_hub_spoke_some["all_accuracy_strict"])
print(ablation_results_hub_spoke_some["all_accuracy_lenient"])
print(ablation_results_hub_spoke_some["some_accuracy_strict"])
print(ablation_results_hub_spoke_some["some_accuracy_lenient"])


0.0
0.9
0.0
0.34
0.02
0.02


In [59]:
print(ablated_results_hub_spoke_all["numeric_accuracy_strict"])
print(ablated_results_hub_spoke_all["numeric_accuracy_lenient"])
print(ablated_results_hub_spoke_all["all_accuracy_strict"])
print(ablated_results_hub_spoke_all["all_accuracy_lenient"])
print(ablated_results_hub_spoke_all["some_accuracy_strict"])
print(ablated_results_hub_spoke_all["some_accuracy_lenient"])


0.0
0.92
0.0
0.38
0.02
0.02
