In [2]:
import mldaikon
import os
import polars as pl

trace_files = [file_name for file_name in os.listdir() if "_trace_" in file_name]
print(trace_files)

['_ml_daikon_pretrain_gpt_mldaikon_trace_VAR_2024-05-05_20-26-54_2642302.log', '_ml_daikon_pretrain_gpt_mldaikon_trace_VAR_2024-05-05_20-26-54_2642300.log', '_ml_daikon_pretrain_gpt_mldaikon_trace_API_2024-05-05_20-26-54_2643076.log', '_ml_daikon_pretrain_gpt_mldaikon_trace_VAR_2024-05-05_20-26-54_2642301.log', '_ml_daikon_pretrain_gpt_mldaikon_trace_API_2024-05-05_20-26-54_2643075.log', '_ml_daikon_pretrain_gpt_mldaikon_trace_API_2024-05-05_20-26-54_2643395.log', '_ml_daikon_pretrain_gpt_mldaikon_trace_API_2024-05-05_20-26-54_2642298.log', '_ml_daikon_pretrain_gpt_mldaikon_trace_VAR_2024-05-05_20-26-54_2642297.log', '_ml_daikon_pretrain_gpt_mldaikon_trace_VAR_2024-05-05_20-26-54_2642303.log', '_ml_daikon_pretrain_gpt_mldaikon_trace_VAR_2024-05-05_20-26-54_2642299.log', '_ml_daikon_pretrain_gpt_mldaikon_trace_API_2024-05-05_20-26-54_2643094.log', '_ml_daikon_pretrain_gpt_mldaikon_trace_API_2024-05-05_20-26-54_2642302.log', '_ml_daikon_pretrain_gpt_mldaikon_trace_API_2024-05-05_20-26-54

In [3]:
import mldaikon.ml_daikon_trace

trace = mldaikon.ml_daikon_trace.read_trace_file(trace_files)

In [None]:
trace

In [4]:
from typing import NamedTuple
import json

class VarId(NamedTuple):
    process_id: int
    var_name: str
    var_type: str

class PropState(NamedTuple):
    value: type
    timestamp: int


var_ids = trace.find_variable_identifiers()
# for each of such variables, let's construct their prop states
var_prop_values = {}


prop_prefix = "change.properties.new."
value_prefix = "change.value.new"

for var_id in var_ids.rows(named=True):
    var_values = trace.events.filter(
        pl.col('process_id') == var_id['process_id'],
        pl.col('var_name') == var_id['var_name'],
        pl.col('var_type') == var_id['var_type']
    )

    state_init = var_values.filter(pl.col('type') == 'state_init')
    assert len(state_init) == 1, f"Expected 1 state_init event, got {len(state_init)}"
    state_init = state_init.row(0, named=True)

    state_changes = var_values.filter(pl.col('type') == 'state_change')
    
    # init prop values for this variable
    prop_values = {}
    for column in state_init:
        if column.startswith(prop_prefix):
            prop_values[column.split(prop_prefix)[-1]] = [PropState(state_init[column], state_init['time'])]
        if column.startswith(value_prefix):
            prop_values['param_value'] = [PropState(state_init[column], state_init['time'])]
    
    for state_change in state_changes.rows(named=True):
        for prop_value in state_change:
            if prop_value.startswith(prop_prefix) and state_change[prop_value] != prop_values[prop_value.split(prop_prefix)[-1]][-1].value and state_change[prop_value] is not None:
                prop_values[prop_value.split(prop_prefix)[-1]].append(PropState(state_change[prop_value], state_change['time']))
            if (prop_value.startswith(value_prefix) 
                and state_change[prop_value] != prop_values['param_value'][-1].value) and state_change[prop_value] is not None:
                prop_values['param_value'].append(PropState(state_change[prop_value], state_change['time']))

    var_prop_values[VarId(var_id['process_id'], var_id['var_name'], var_id['var_type'])] = prop_values

In [5]:
from tqdm import tqdm

class Liveness(NamedTuple):
    start: float
    end: float

def compare(value1, value2):
    if type(value1) != type(value2):
        return False
    if isinstance(value1, list):
        if len(value1) != len(value2):
            return False
        for idx, val in enumerate(value1):
            if not compare(val, value2[idx]):
                return False
        return True
    if isinstance(value1, dict):
        if len(value1) != len(value2):
            return False
        for key in value1:
            if key not in value2:
                return False
            if not compare(value1[key], value2[key]):
                return False
        return True
    if isinstance(value1, float):
        return abs(value1 - value2) < 1e-6
    return value1 == value2

    # if numerical, compare with tolerance
    # if collections, compare and aggregate the results

hypothesis = {}  # key: (var1, prop1, var2, prop2), value: list of liveness intervals
# Liveness based Hypothesis generation
for var in tqdm(var_prop_values):
    prop_values = var_prop_values[var]
    for prop in prop_values:
        for other_var in var_prop_values:
            for other_prop in var_prop_values[other_var]:
                if other_var == var and other_prop == prop:
                    continue

                # if the type of the property is not the same, we can't compare them
                if type(prop_values[prop][0].value) != type(var_prop_values[other_var][other_prop][0].value):
                    continue
                
                for idx, value in enumerate(prop_values[prop]):
                    start = value.timestamp
                    end = float('inf')
                    if idx + 1 < len(prop_values[prop]):
                        end = prop_values[prop][idx + 1].timestamp
                    liveness = Liveness(start, end)
                    for other_idx, other_value in enumerate(var_prop_values[other_var][other_prop]):
                        other_start = other_value.timestamp
                        other_end = float('inf')
                        if other_idx + 1 < len(var_prop_values[other_var][other_prop]):
                            other_end = var_prop_values[other_var][other_prop][other_idx + 1].timestamp
                        other_liveness = Liveness(other_start, other_end)
                        if (liveness.start > other_liveness.start or liveness.end < other_liveness.end):
                            # no overlap between the liveness of the two properties, skipping
                            continue
                        
                        # in the hypothesis, we don't want to include the process_id as the final invariant should be independent of the specific process id but focus on behavior of the type
                        var_id_new = VarId(-1, var.var_name, var.var_type)
                        other_var_id_new = VarId(-1, other_var.var_name, other_var.var_type)
                        if compare(value.value, other_value.value):
                            if (var_id_new, prop, other_var_id_new, other_prop) not in hypothesis and (other_var_id_new, other_prop, var_id_new, prop) not in hypothesis:
                                hypothesis[(var_id_new, prop, other_var_id_new, other_prop)] = []
                            if (var_id_new, prop, other_var_id_new, other_prop) in hypothesis:
                                hypothesis[(var_id_new, prop, other_var_id_new, other_prop)].append((liveness, other_liveness))
                            else:
                                hypothesis[(other_var_id_new, other_prop, var_id_new, prop)].append((other_liveness, liveness))

100%|██████████| 416/416 [11:44<00:00,  1.69s/it]


In [6]:
# filtered hypothesis based on number of states seen
filtered_hypothesis = {}
num_states = []
for key in hypothesis:
    num_states.append(len(hypothesis[key]))
    if len(hypothesis[key]) > 10 * 8:
        filtered_hypothesis[key] = hypothesis[key]

In [6]:
len(filtered_hypothesis)

19

In [7]:
# now let's create dummy hypothesis and try to infer it's preconditionfrom
import mldaikon.invariant.base_cls as base_cls
base_cls.Hypothesis 

# the first key will be the variable, the second key will be the property, the third key will be the liveness interval, the value will be the list of traces for that liveness interval
variable_states = {}

all_var_insts = trace.find_variable_identifiers()
for fh in filtered_hypothesis:
    # collect positive examples & negative examples
    var1 = fh[0]
    prop1 = fh[1]
    var2 = fh[2]
    prop2 = fh[3]
    if var1 not in variable_states:
        variable_states[var1] = {}
    if prop1 not in variable_states[var1]:
        variable_states[var1][prop1] = []
    if var2 not in variable_states:
        variable_states[var2] = {}
    if prop2 not in variable_states[var2]:
        variable_states[var2][prop2] = []
    
    ## collect the traces for each var
    # 1. filter out variable instances
    var1_insts = all_var_insts.filter(pl.col('var_name') == var1.var_name, pl.col('var_type') == var1.var_type)
    var2_insts = all_var_insts.filter(pl.col('var_name') == var2.var_name, pl.col('var_type') == var2.var_type)
    for var1_inst in var1_insts.rows(named=True):
        var1_inst_traces = trace.events.filter(
            pl.col('process_id') == var1_inst['process_id'],
            pl.col('var_name') == var1.var_name,
            pl.col('var_type') == var1.var_type
        )

        # for the specific property, let's construct the states, liveness, and the traces corresponding to that
        init_state = var1_inst_traces.filter(pl.col('type') == 'state_init').row(0, named=True)
        prop_states = var1_inst_traces.filter(pl.col('type') == 'state_change')
        if prop1 == "param_value":
            prop1_key = value_prefix
        else:
            prop1_key = prop_prefix + prop1
        
        current_value = init_state[prop1_key]
        current_states = [init_state]
        current_time = init_state['time']
        for prop_state in prop_states.rows(named=True):
            if prop_state[prop1_key] != current_value:
                variable_states[var1][prop1].append((current_value, current_states, Liveness(current_time, prop_state['time'])))
                current_states = [prop_state]
                current_value = prop_state[prop1_key]
                current_time = prop_state['time']
            current_states.append(prop_state)
        variable_states[var1][prop1].append((current_value, current_states, Liveness(current_time, float('inf'))))

    for var2_inst in var2_insts.rows(named=True):
        var2_inst_traces = trace.events.filter(
            pl.col('process_id') == var2_inst['process_id'],
            pl.col('var_name') == var2.var_name,
            pl.col('var_type') == var2.var_type
        )

        # for the specific property, let's construct the states, liveness, and the traces corresponding to that
        init_state = var2_inst_traces.filter(pl.col('type') == 'state_init').row(0, named=True)
        prop_states = var2_inst_traces.filter(pl.col('type') == 'state_change')
        if prop2 == "param_value":
            prop2_key = value_prefix
        else:
            prop2_key = prop_prefix + prop2
        
        current_value = init_state[prop2_key]
        current_states = [init_state]
        current_time = init_state['time']
        for prop_state in prop_states.rows(named=True):
            if prop_state[prop2_key] != current_value:
                variable_states[var2][prop2].append((current_value, current_states, Liveness(current_time, prop_state['time'])))
                current_states = [prop_state]
                current_value = prop_state[prop2_key]
                current_time = prop_state['time']
            current_states.append(prop_state)
        variable_states[var2][prop2].append((current_value, current_states, Liveness(current_time, float('inf'))))

In [83]:
filtered_hypothesis

{(VarId(process_id=-1, var_name='module.6.mlp.dense_4h_to_h.bias', var_type='torch.cuda.BFloat16Tensor'),
  'param_value',
  VarId(process_id=-1, var_name='module.6.mlp.dense_4h_to_h.bias', var_type='torch.cuda.BFloat16Tensor'),
  'param_value'): [(Liveness(start=1714889278.087709, end=1714889288.517196),
   Liveness(start=1714889278.087901, end=1714889288.501111)),
  (Liveness(start=1714889292.737439, end=1714889296.946227),
   Liveness(start=1714889292.751838, end=1714889296.939809)),
  (Liveness(start=1714889335.25664, end=1714889339.469597),
   Liveness(start=1714889335.266765, end=1714889339.457893)),
  (Liveness(start=1714889352.027646, end=1714889356.277226),
   Liveness(start=1714889352.039857, end=1714889356.272593)),
  (Liveness(start=1714889407.929678, end=1714889412.169213),
   Liveness(start=1714889407.942938, end=1714889412.152295)),
  (Liveness(start=1714889428.842348, end=1714889432.941582),
   Liveness(start=1714889428.857435, end=1714889432.924473)),
  (Liveness(start

In [21]:
import mldaikon.invariant.base_cls as base_cls

def calc_liveness_overlap(liveness1, liveness2):
    if liveness1.start > liveness2.end or liveness1.end < liveness2.start:
        return 0
    return (min(liveness1.end, liveness2.end) - max(liveness1.start, liveness2.start)) / (max(liveness1.end, liveness2.end) - min(liveness1.start, liveness2.start))

# now, start to verify the hypothesis
filtered_hypothesis_with_exps = {}
for fh in filtered_hypothesis:
    # let's collect postivie and negative examples
    var1 = fh[0]
    prop1 = fh[1]
    var2 = fh[2]
    prop2 = fh[3]
    positive_examples = []
    negative_examples = []

    for var1_state in variable_states[var1][prop1]:
        for var2_state in variable_states[var2][prop2]:
            # if prop1 == prop2, var1 and var2 has to have different process_id
            if prop1 == prop2 and var1_state[1][0]['process_id'] == var2_state[1][0]['process_id']:
                continue

            overlap = calc_liveness_overlap(var1_state[2], var2_state[2])
            if overlap < 0.01:
                continue
            if compare(var1_state[0], var2_state[0]):
                positive_examples.append((var1_state[1] + var2_state[1]))
            else:
                negative_examples.append((var1_state[1] + var2_state[1]))

    filtered_hypothesis_with_exps[fh] = base_cls.Hypothesis(None, positive_examples, negative_examples)

In [35]:
import logging

class Precondition():
    def __init__(self, prop_name: str, _type: str, values: list|type):
        self.prop_name = prop_name
        if _type not in ["constant", "consistent"]:
            raise ValueError(f"Invalid type {_type}")
        self.type = _type # either "constant" or "consistent"
        self.values = values if isinstance(values, list) else [values]
    def verify(self, example) -> bool:
        if isinstance(example, list):
            example = pl.DataFrame(example)
        assert isinstance(example, pl.DataFrame), f"Expected example to be a DataFrame, got {type(example)}"
        
        # prop_key = prop_prefix + self.prop_name if self.prop_name != "param_value" else value_prefix
        prop_key = self.prop_name
        if prop_key not in example.columns:
            return False
        prop_values = example[prop_key].drop_nulls().unique().to_list()
        if self.type == "constant":
            return len(prop_values) == 1 and prop_values[0] in self.values
        if self.type == "consistent":
            return len(prop_values) == 1
    
    def try_relax(self) -> bool:
        if self.type == "consistent":
            self.type = "constant"
            return True
        return False # cannot relax further


def find_precondition(hypothesis: base_cls.Hypothesis) -> list | None:
    """Given a hypothesis, should return a list of preconditions
    that should be satisfied for the invariant to hold.

    The preconditions should be certain properties of the relevant events that
    should be satisfied for the invariant to hold.

    args:
        hypothesis: Hypothesis
            A hypothesis to find preconditions for.
    """

    logger = logging.getLogger(__name__)

    ## 1. Find consistent properties of the positive examples & negative examples
    positive_properties = []

    def find_conditions(example: list, key_to_skip: str = "value"):
        """A list of traces to find common properties from. The property should hold locally within the example."""
        try:
            example_df = pl.DataFrame(example)
        except:
            import pprint
            pprint.pprint(example)
            raise
        const_conds = {}
        # find properties that have only one value in the example
        for col in example_df.columns:
            if key_to_skip is not None and key_to_skip in col:
                continue

            # let's also skip anything with .old
            if ".old" in col:
                continue

            try:
                values = example_df.get_column(col).drop_nulls().unique().to_list()
            except:
                # .unique() might fail due to column having dtype 'list[null]' or something similar, let's just continue
                continue
            if len(values) == 1:
                # get the value of the property
                value = values[0]
                const_conds[col] = value
        return const_conds
    
    for example in hypothesis.positive_examples:
        conds = find_conditions(example)
        # print(f"found #conds: {len(conds)}")

        found = False
        for cond_name in conds:
            if "tensor_model_parallel" in cond_name:
                found = True
                break
        if not found:
            import pprint
            print("example no tensor_model_parallel:")
            pprint.pprint(example)
            return []
        if len(conds) == 0:
            print("example: ", example)
            # stop
            return []
        
        positive_properties.append(conds)

    # exclude those also hold in the negative examples
    # for each negative example, we verify the conds in the positive examples 

    # find the common properties 
    precondition_targets = set(positive_properties[0].keys())
    precondition_target_values = {
        key: []
        for key in precondition_targets
    }

    for pos_props in positive_properties:
        precondition_targets = precondition_targets.intersection(pos_props.keys())
        for key in pos_props:
            if key in precondition_targets:
                # precondition_target_values[key].append(pos_props[key])
                if pos_props[key] not in precondition_target_values[key]:
                    precondition_target_values[key].append(pos_props[key])

    preconditions = {key: Precondition(key, "constant", precondition_target_values[key]) if len(precondition_target_values[key]) == 1 else Precondition(key, "consistent", precondition_target_values[key])
        for key in precondition_targets
    }

    print(f"# Initial Precondition: {len(preconditions)}")

    """
    1. Only one value (assumes to be the prop == constant precondition)
    2. Multiple Values (first assumes to be prop == prop precondition, if it do not hold, relax to prop in [const1, const2, ...] but constant in one example precondition)
    """

    # values needed for checking the precondition
    for neg_exp in hypothesis.negative_examples:
        precond_targets_to_del = set()
        for precond_target in preconditions:
            precond = preconditions[precond_target]
            if precond.verify(neg_exp):
                if not precond.try_relax():
                    if 'tensor_model_parallel' in precond_target:
                        print("tensor_model_parallel failed")
                        import pprint
                        pprint.pprint(neg_exp)
                    # logger.error(f"Failed to relax the precondition {precond.prop_name}")
                    # delete the precondition
                    precond_targets_to_del.add(precond_target)
                # print("relaxed: ", precond.prop_name)
        for precond_target in precond_targets_to_del:
            del preconditions[precond_target]

    return preconditions

def find_precondition_1(hypothesis: base_cls.Hypothesis) -> list | None:
    """Given a hypothesis, should return a list of preconditions
    that should be satisfied for the invariant to hold.

    The preconditions should be certain properties of the relevant events that
    should be satisfied for the invariant to hold.

    args:
        hypothesis: Hypothesis
            A hypothesis to find preconditions for.
    """

    logger = logging.getLogger(__name__)

    ## 1. Find consistent properties of the positive examples & negative examples
    positive_properties = []

    def find_conditions(example: list, key_to_skip: str = "value"):
        """A list of traces to find common properties from. The property should hold locally within the example."""
        try:
            example_df = pl.DataFrame(example)
        except:
            import pprint
            pprint.pprint(example)
            raise
        const_conds = {}
        # find properties that have only one value in the example
        for col in example_df.columns:
            if key_to_skip is not None and key_to_skip in col:
                continue

            # let's also skip anything with .old
            if ".old" in col:
                continue

            try:
                values = example_df.get_column(col).drop_nulls().unique().to_list()
            except:
                # .unique() might fail due to column having dtype 'list[null]' or something similar, let's just continue
                continue
            if len(values) == 1:
                # get the value of the property
                value = values[0]
                const_conds[col] = value
        return const_conds
    
    for example in hypothesis.positive_examples:
        conds = find_conditions(example)
        # print(f"found #conds: {len(conds)}")

        found = False
        for cond_name in conds:
            if "tensor_model_parallel" in cond_name:
                found = True
                break
        if not found:
            import pprint
            print("example no tensor_model_parallel:")
            pprint.pprint(example)
            return []
        if len(conds) == 0:
            print("example: ", example)
            # stop
            return []
        
        positive_properties.append(conds)

    # exclude those also hold in the negative examples
    # for each negative example, we verify the conds in the positive examples 

    # find the common properties 
    precondition_targets = set(positive_properties[0].keys())
    precondition_target_values = {
        key: []
        for key in precondition_targets
    }

    for pos_props in positive_properties:
        precondition_targets = precondition_targets.intersection(pos_props.keys())
        for key in pos_props:
            if key in precondition_targets:
                # precondition_target_values[key].append(pos_props[key])
                if pos_props[key] not in precondition_target_values[key]:
                    precondition_target_values[key].append(pos_props[key])

    preconditions = {key: Precondition(key, "constant", precondition_target_values[key]) if len(precondition_target_values[key]) == 1 else Precondition(key, "consistent", precondition_target_values[key])
        for key in precondition_targets
    }

    print(f"# Initial Precondition: {len(preconditions)}")

    """
    1. Only one value (assumes to be the prop == constant precondition)
    2. Multiple Values (first assumes to be prop == prop precondition, if it do not hold, relax to prop in [const1, const2, ...] but constant in one example precondition)
    """
    # perform the same for the negative examples
    negative_properties = []
    for example in hypothesis.negative_examples:
        conds = find_conditions(example)
        negative_properties.append(conds)

    neg_precondition_targets = set(negative_properties[0].keys())
    neg_precondition_target_values = {
        key: []
        for key in neg_precondition_targets
    }

    for neg_props in negative_properties:
        neg_precondition_targets = neg_precondition_targets.intersection(neg_props.keys())
        for key in neg_props:
            if key in neg_precondition_targets:
                if neg_props[key] not in neg_precondition_target_values[key]:
                    neg_precondition_target_values[key].append(neg_props[key])
    
    neg_preconditions = {key: Precondition(key, "constant", neg_precondition_target_values[key]) if len(neg_precondition_target_values[key]) == 1 else Precondition(key, "consistent", neg_precondition_target_values[key])
        for key in neg_precondition_targets
    }

    set_targets_to_remove = set()
    # now let's remove the preconditions that are also satisfied in the negative examples
    for neg_precond_target in neg_preconditions:
        if neg_precond_target in preconditions:
            # check if the values have overlap, if yes, remove the precondition
            overlap = False
            for value in preconditions[neg_precond_target].values:
                if value in neg_preconditions[neg_precond_target].values:
                    overlap = True
                    break
            if overlap:
                print(f"Removing precondition {neg_precond_target}")
                set_targets_to_remove.add(neg_precond_target)
        
    for target in set_targets_to_remove:
        del preconditions[target]

    return preconditions



In [12]:
list(filtered_hypothesis_with_exps.keys())[2]

(VarId(process_id=-1, var_name='module.3.input_layernorm.bias', var_type='torch.cuda.BFloat16Tensor'),
 'param_value',
 VarId(process_id=-1, var_name='module.3.input_layernorm.bias', var_type='torch.cuda.BFloat16Tensor'),
 'param_value')

In [14]:
pre_conds

NameError: name 'pre_conds' is not defined

In [22]:
hypo = filtered_hypothesis_with_exps[list(filtered_hypothesis_with_exps.keys())[1]]
print(list(filtered_hypothesis_with_exps.keys())[5])
print(len(hypo.positive_examples))
print(len(hypo.negative_examples))
# hypo.negative_examples[1000]

(VarId(process_id=-1, var_name='module.6.input_layernorm.bias', var_type='torch.cuda.HalfTensor'), 'param_value', VarId(process_id=-1, var_name='module.6.input_layernorm.bias', var_type='torch.cuda.HalfTensor'), 'param_value')
8736
0


IndexError: list index out of range

In [23]:
pre_conds = find_precondition(hypo)

# Initial Precondition: 37


In [26]:
pre_conds['change.properties.new.tensor_model_parallel'].values

[False]

In [30]:
# let's manually generate a hypothesis, ignore the specific name of the variable but only about the variable type and the property

# the hypothesis is that torch.cuda.HalfTensor's param_value is consistent # this is an issue, this variable type is too much of a specific one, we should infer on torch.nn.Parameter level 

expected_type = "torch.cuda.HalfTensor"
expected_prop = "param_value"
hypo_on_type = base_cls.Hypothesis(None, [], [])

def calc_liveness_overlap(liveness1, liveness2):
    if liveness1.start > liveness2.end or liveness1.end < liveness2.start:
        return 0
    return (min(liveness1.end, liveness2.end) - max(liveness1.start, liveness2.start)) / (max(liveness1.end, liveness2.end) - min(liveness1.start, liveness2.start))


positive_examples = []
negative_examples = []
for var1 in variable_states:
    for var2 in variable_states:
        if var1 == var2:
            continue
        if var1.var_type != expected_type or var2.var_type != expected_type:
            continue
        for var1_state in variable_states[var1][expected_prop]:
            for var2_state in variable_states[var2][expected_prop]:
                # if prop1 == prop2, var1 and var2 has to have different process_id
                if prop1 == prop2 and var1_state[1][0] == var2_state[1][0]:
                    continue

                overlap = calc_liveness_overlap(var1_state[2], var2_state[2])
                if overlap < 0.01:
                    continue
                if compare(var1_state[0], var2_state[0]):
                    positive_examples.append((var1_state[1] + var2_state[1]))
                else:
                    negative_examples.append((var1_state[1] + var2_state[1]))

hypo_on_type.positive_examples = positive_examples
hypo_on_type.negative_examples = negative_examples
print(len(positive_examples))
print(len(negative_examples))

157056
3242496


In [31]:
pre_conds_on_type = find_precondition(hypo_on_type)

# Initial Precondition: 36
tensor_model_parallel failed
[{'change.properties.new._backward_hooks': None,
  'change.properties.new._base': None,
  'change.properties.new._cdata': 171888480,
  'change.properties.new._grad': None,
  'change.properties.new._grad_fn': None,
  'change.properties.new._has_symbolic_sizes_strides': False,
  'change.properties.new._post_accumulate_grad_hooks': None,
  'change.properties.new._python_dispatch': False,
  'change.properties.new._version': 1,
  'change.properties.new.ds_pipe_replicated': False,
  'change.properties.new.grad': None,
  'change.properties.new.grad_fn': None,
  'change.properties.new.is_cpu': False,
  'change.properties.new.is_cuda': True,
  'change.properties.new.is_ipu': False,
  'change.properties.new.is_leaf': True,
  'change.properties.new.is_meta': False,
  'change.properties.new.is_mkldnn': False,
  'change.properties.new.is_mps': False,
  'change.properties.new.is_mtia': False,
  'change.properties.new.is_nested': False,
  'chang

In [36]:
pre_conds_on_type_1 = find_precondition_1(hypo_on_type)

# Initial Precondition: 36


In [1]:
pre_conds_on_type_1

NameError: name 'pre_conds_on_type_1' is not defined

In [32]:
pre_conds_on_type

{}

In [29]:
variable_states[list(variable_states.keys())[0]].keys()

dict_keys(['param_value'])

In [16]:
count_step_issue = 0
for exp in hypo.negative_examples:
    if exp[0]['meta_vars.step'] == exp[1]['meta_vars.step'] and exp[2]['meta_vars.step'] == exp[3]['meta_vars.step']:
        if exp[2]['meta_vars.step'] != exp[1]['meta_vars.step']:
            count_step_issue += 1

In [17]:
len(hypo.negative_examples)

7648

In [12]:
count_step_issue

17208

In [103]:
val1 = filtered_hypothesis_with_exps[list(filtered_hypothesis_with_exps.keys())[0]].negative_examples[0][0]['change.value.new']
val2 = filtered_hypothesis_with_exps[list(filtered_hypothesis_with_exps.keys())[0]].negative_examples[0][1]['change.value.new']
compare(val1, val2)

True

In [96]:
# filtered_hypothesis[list(filtered_hypothesis_with_exps.keys())[0]]
list(filtered_hypothesis_with_exps.keys())[3]

(VarId(process_id=-1, var_name='module.3.mlp.dense_4h_to_h.bias', var_type='torch.cuda.BFloat16Tensor'),
 'param_value',
 VarId(process_id=-1, var_name='module.3.mlp.dense_4h_to_h.bias', var_type='torch.cuda.BFloat16Tensor'),
 'param_value')

In [None]:
print(pre_conds.keys())

In [None]:
count_type_constant = 0
count_type_consistent = 0
for pre_cond in pre_conds:
    if pre_conds[pre_cond].type == "constant":
        count_type_constant += 1
    else:
        count_type_consistent += 1


In [None]:
count_type_constant
# count_type_consistent

In [75]:
pre_cond_is_cuda = Precondition('change.properties.new.is_cuda', "constant", [True])

In [76]:
pre_cond_is_cuda.verify(filtered_hypothesis_with_exps[list(filtered_hypothesis_with_exps.keys())[0]].positive_examples[0])

True

In [None]:
pre_conds['change.properties.new.is_cuda'].verify(filtered_hypothesis_with_exps[list(filtered_hypothesis_with_exps.keys())[0]].positive_examples[0])

In [None]:
pre_conds['change.properties.new.is_cuda'].prop_name

In [None]:
pl.DataFrame(filtered_hypothesis_with_exps[list(filtered_hypothesis_with_exps.keys())[0]].positive_examples[0])

In [None]:
pre_conds['change.properties.new.is_cuda'].values

In [None]:
filtered_hypothesis_with_exps[list(filtered_hypothesis_with_exps.keys())[0]].positive_examples[0][3]['change.properties.new.is_cuda']