# Classification Rules For COVID-19 dataset Using PRISM Algorithm

<h2>PRISM Algorithm Implementation</h2>

In [72]:
class Rule:
    def __init__(self, conditions=None, class_label=None, accuracy=0.0, coverage=0):
        
        # conditions refer to attribute and its value
        # eg. {'diabetes': 'no', 'hypertension': 'no'}
        self.conditions = conditions
        
        # eg. 'alive' or 'dead' in COVID dataset
        # 'Yes' or 'No' in weather dataset
        self.class_label = class_label
        
        # accuracy = number of rows that satisfy both condition and class /(divided by) number of rows that
        # satisfy only the condition
        self.accuracy = accuracy
        
        
        # coverage refers to number of rows that satisfy only the condition
        self.coverage = coverage
        
    def __str__(self):
        
        # printing out the rule in a nice format
        # eg. 
        # if diabetes = no, then alive.
        # Accuracy = 0.7. Coverage = 21917
        
        str_rep = "If "
        
        counter = 0
        for condition in self.conditions:
            if counter == (len(self.conditions) - 1):
                if type(self.conditions[condition]) == int or type(self.conditions[condition]) == float:
                    str_rep += (str(condition) + " >= " + str(self.conditions[condition]))
                else:
                    str_rep += (str(condition) + " = " + str(self.conditions[condition]))
            else:
                if type(self.conditions[condition]) == int or type(self.conditions[condition]) == float:
                    str_rep += (str(condition) + " >= " + str(self.conditions[condition]) + " and ")
                else:
                    str_rep += (str(condition) + " = " + str(self.conditions[condition]) + " and ")
            
            counter += 1
        
        str_rep += (", then " + str(self.class_label)) 
        str_rep += (".\n Accuracy = " + str(self.accuracy))
        str_rep += (". Coverage = " + str(self.coverage))
        str_rep += "\n"
        
        return str_rep

In [50]:
def get_unique_class_label(rows):
    
    # from getting repetitive class labels, just get the unique class label
    # eg of output: ['alive', 'dead']

    # use set to hold the unique class label
    class_set = set()
    
    for row in rows:
        
        # get the last column since last column is where we have a class label
        class_label = row[-1]
        
        if class_label not in class_set:
            class_set.add(class_label)
            
    
    return list(class_set)
        

In [51]:
def copy_rows(rows):
    
    # copy the data to another nested list
    # there is another way to copy: using deepcopy that requires to import copy
    # the below way can also be used to copy data from one nested list to another nested list
    
    nested_lst = []
    for row in rows:
        inner_lst = []
        for each_data in row:
            inner_lst.append(each_data)
            
        nested_lst.append(inner_lst)
    return nested_lst

In [52]:
# get attributes that have not been used by the rule
def get_remaining_attributes(attributes, rule_node):
    
    # eg. if attribute called 'age' has been used, get remaining attributes other than 'age'
    
    # if the node is None, just return the attributes because none of them haven't been used yet.
    if rule_node is None:
        return attributes
     
    rule_dic = rule_node.conditions
    
    return [attribute for attribute in attributes if attribute not in rule_dic]

In [53]:
def get_unique_attribute_vals(rows, indx):
    
    # There are repetitive attribute values and we want to get the attribute values that are distinct.
    # eg of outcome:  ['overcast','sunny','rainy']
    # There are two lists: one for data and another for columns/attributes
    # Eg. each row = ['overcast',...], columns = ['Outlook',...]
    # with the help of indx, we can actually get the data associated with a particular column
    
    # a set to hold the unique attribute values
    attribute_val_set = set()
    
    lst = [row[indx] for row in rows]
    return list(set(lst))

In [54]:
def get_attribute_indx(attribute, attribute_lst):
    
    # we want to find the index of a particular attribute in the attribute list.
    # eg. attribute = 'Outlook', attribute_lst = ['Outlook',...]
    # so in the above example, indx will be 0
    
    
    for indx, attr in enumerate(attribute_lst):
        if attribute == attr:
            return indx
    

In [107]:
def get_condition_based_rows(rows, rule_node, attr_lst):
    
    # bring back rows that satisfy the condition(s)
    
    if rule_node is not None:
        rule_dic = rule_node.conditions
        
        # copy the data so that the original data will not be affected when we change the list
        covered_rows = copy_rows(rows)

        for key in rule_dic:
            indx = get_attribute_indx(key, attr_lst)
            
             # for numerical. eg. 32, 56...
            if isinstance(rule_dic[key], int) or isinstance(rule_dic[key], float):
                covered_rows = [row for row in covered_rows if row[indx] >= rule_dic[key]]
          
            else:
                # for classification groups. eg: True/False, Hot/Mild/Cool 
                covered_rows = [row for row in covered_rows if row[indx] == rule_dic[key]]

                
       
        return covered_rows

In [100]:
def calculate_accuracy(correct_rows, covered_rows):
    
    # accuracy = number of correct rows(satisfy both condition and class label) / number of covered_rows based on condition
    
    no_of_correct_class = len(correct_rows)
    no_of_covered_rows = len(covered_rows)
    
    # accuracy is between 0.0 and 1.0
    accuracy = no_of_correct_class / float(no_of_covered_rows)
    return accuracy

In [101]:
def copy_rule_node(rule_node):
    cloned_condition = {}
    for key in rule_node.conditions:
        cloned_condition[key] = rule_node.conditions[key]

    prev_high = Rule(conditions=cloned_condition,class_label=rule_node.class_label,accuracy=rule_node.accuracy,coverage=rule_node.coverage)

    return prev_high

In [118]:
def get_rule_with_highest_accuracy(possible_rules):
    
    # possible rules contains rule nodes and we want to find the highest accuracy rule among all the rules
    # if there is a tie in accuracy, we choose the rule with larger coverage.
    # if there is still tie, we can choose either one. In the code below, if there is still tie, I just choose the one that comes first
    
    max_acc_node = None
    
    highest_acc = 0
    highest_cov = 0
    
    
    for possible_rule_node in possible_rules:
        if possible_rule_node is None:
            continue
        
        acc = possible_rule_node.accuracy
        cov = possible_rule_node.coverage
        
        # if current node accuracy is higher than the max accuracy so far, choose the current rule as best rule
        if acc > highest_acc:
            highest_acc = acc
            highest_cov = cov
            
            max_acc_node = possible_rule_node
            
        elif acc == highest_acc:
            
            # if there is accuracy tie, select using the coverage 
            # if current node coverage is higher than max coverage so far, choose the current rule as best rule
            
            if cov > highest_cov:
                highest_cov = cov
                
                max_acc_node = possible_rule_node
                            
    return max_acc_node        

In [119]:
# remove rows which are covered by the rules
def get_remaining_rows(rows, covered_rows):
    
    # We choose the rows that are not covered by the rules. 
    
    return [row for row in rows if row not in covered_rows]



In [120]:
def learn_one_rule(rows, class_labels, attributes, accuracy_thresh, coverage_thresh):
    
    # mapping attribute to index
    attribute_indx_dic = {}
    counter = 0
    for attribute in attributes:
        attribute_indx_dic[attribute] = counter
        counter += 1
        
    
    # these are data that are covered by the rules and we copy the data because we don't want to have effect on original list
    rows_covered_by_rule = copy_rows(rows)
    
    # this keeps track of best rule so far. First, we initialize it to None because we haven't got any rule yet
    highest_node = None
    
    # this keeps track of rule before we add more conditions
    prev_highest_node = None
    
    while True:
        print("highest node")
        print(highest_node)
    
        # this keeps track of rules that have different class label.
        # eg. if we have two class labels-'alive' or 'dead', this list can contain rules for both labels and later choose the best rule
        best_classes_rule_lst = []

        for class_label in class_labels:
            
            # if we already have the highest node and its class label is not the same as the current class label,
            # we go on to next class label
            # let's say we have best rule so far: if diabetes = no, then alive.
            # Then, when we want to refine this rule, we don't need to consider class label 'dead'.
            
            if highest_node is not None and highest_node.class_label != class_label:
                continue

            # remaining attributes that haven't been used in the rule
            remaining_attributes = get_remaining_attributes(attributes, highest_node)

            # this stores the nodes generated from considering each attribute for each class label
            # eg. if class_label is 'alive', this will keep track of nodes produced from considering each attribute for the 'alive' class label
            possible_rules = []


            # go over all the remaining attributes
            for attr in remaining_attributes:

                indx = attribute_indx_dic[attr]

                # get unique attribute values such as 'Sunny', Rainy, ...
                remaining_attr_vals = get_unique_attribute_vals(rows_covered_by_rule, indx)

                # go over the remaining attribute values
                for attr_val in remaining_attr_vals:
                    
                    # note: both attribute and values of attribute results in condition
                    # eg. If 'Outlook' = 'Sunny' 

                    # create a rule node for each attribute values
                    potential_rule_node = Rule(conditions={attr: attr_val}, class_label=class_label)


                    # Simply, give me rows which satisfy the condition
                    # eg. give me rows that has Outlook = Sunny
                    only_condition_rows = get_condition_based_rows(rows_covered_by_rule, potential_rule_node, attributes)


                    # Simply, give me rows which satisfy both the condition and the class label
                    # Eg. give me rows that has 'Outlook' = 'Sunny' and class label = 'Yes'
                    # these are rows that satisfy both condition and class label
                    condition_class_label_rows = [row for row in only_condition_rows if row[-1] == class_label]


                 
                    # coverage refers to number of rows that satisfy the condition. In other words, it covers the rows that meet the conditions
                    cov = len(only_condition_rows)
                    
                    # calculate accuracy
                    calculated_acc = calculate_accuracy(condition_class_label_rows, only_condition_rows)
                    
                    # if the current coverage exceeds the coverage threshold, then we add the potential rule to the list
                    # let's say coverage threshold is 3. Even if the accuracy is 1.0(2/2), the coverage of that rule(ie. 2) is less than the coverage threshold(ie. 3)
                    # In the example above, we will not consider that rule because the rule covers less rows than the specified coverage threshold. 
                    if cov >= coverage_thresh:
                        potential_rule_node.accuracy = calculated_acc
                        potential_rule_node.coverage = cov
                    
                        possible_rules.append(potential_rule_node)
        
        
            # if there are no rules in possible_rules list for that class label, continue to the next class label 
            if len(possible_rules) == 0:
                continue
            
            
            # get the rule with highest accuracy(ie. best rule) among the same class label
            highest_accuracy_rule = get_rule_with_highest_accuracy(possible_rules)

            # add that rule to the best_classes_rule_lst so that we can compare this rule to the another rule for another class
            best_classes_rule_lst.append(highest_accuracy_rule)
            
        
            # if there is highest node, that means that it already has first condition and it is here for refining the rule
            # So just update the highest_node with more conditions, accuracy and coverage
            if highest_node is not None:

                for key in highest_accuracy_rule.conditions:
                    attri_value = highest_accuracy_rule.conditions[key]

                    highest_node.conditions[key] = attri_value

                highest_node.accuracy = highest_accuracy_rule.accuracy
                highest_node.coverage = highest_accuracy_rule.coverage
         
        '''
        # if there is no rule in the best_classes_rule_lst, that means that no best rule is found so just return None
        if highest_node is  None:
            if len(best_classes_rule_lst) == 0:
                break
        else:
            return None,None
        '''
        
        # if there is no best rule so far yet, find the best rule among the rules in best_classes_rule_lst
        # and assign the best rule to highest_node.
        if highest_node is None:
            # this node is the best among other nodes including nodes from other class labels
            
                
            if len(best_classes_rule_lst) == 0:
                return None, None
            
            highest_node = get_rule_with_highest_accuracy(best_classes_rule_lst)
           
            
            # there is no prev highest node because this is the first condition for this rule
            #prev_highest_node = copy_rule_node(highest_node)
       
        # if we cannot have better accuracy than before by adding more conditions or if the rule covers less rows than the coverage threshold,
        # just stop refining the rule
        if (prev_highest_node is not None and highest_node.accuracy == prev_highest_node.accuracy) or highest_node.coverage < coverage_thresh:
            if prev_highest_node is not None and highest_node.accuracy == prev_highest_node.accuracy:
                # the current highest node accuracy is no better than the prev accuracy
                # revert back to the rule with better accuracy
                highest_node = copy_rule_node(prev_highest_node)
            break
    
        # get the rows that meet the conditions of the rule and assign to rows_covered_by_rule
        # So this list gets smaller and smaller as the rule gets refined.
        rows_covered_by_rule = get_condition_based_rows(rows_covered_by_rule, highest_node, attributes)
        
    
                
        # if there is prev_highest_node or 
        # if the current best rule has better accuracy than prev best rule, just update the prev_highest_node
        if (prev_highest_node is None) or (prev_highest_node is not None and highest_node.accuracy > prev_highest_node.accuracy):
            prev_highest_node = copy_rule_node(highest_node)
            
    # By this step, we know number of rows that the best rule covers and we want to find the remaining rows.
    remaining_rows = get_remaining_rows(rows, rows_covered_by_rule)
        
    return remaining_rows, highest_node
    
    
   
        
        
    
    
    

In [121]:
def PRISM_algorithm(rows, col_lst, accuracy_thresh=1.0, coverage_thresh=3):
    
    # copy all the data to another list 
    # so that when we remove rows, it will not affect original lst
    # here, remaining_rows will first be equal to original rows
    remaining_rows = copy_rows(rows)
    
    # eg. ['Yes', 'No'] or ['alive', 'dead']
    class_labels = get_unique_class_label(rows)
    print(class_labels)
    class_labels.sort(reverse=True)
    
    # list of rule nodes 
    rules_lst = []
    
    # exclude the class label
    attributes = col_lst[:-1]
    
    # remaining_rows is not empty yet so learn a rule
    while len(remaining_rows) != 0:
        
        
        remaining_rows, rule_node = learn_one_rule(remaining_rows, class_labels, attributes, accuracy_thresh, coverage_thresh)
        
        
          
        # this means that all the rows that remain have coverage that is below the threshold so we do not consider them
        if rule_node is None:
            break
            
        
        
        
        # add node to rules
        rules_lst.append(rule_node)
    
    print('rules list')
    print(rules_lst)
    
    # Every rules in this list have accuracy >= accuracy_thresh
    # in other words, we remove rules with accuracy that is below the accuracy threshold
    better_rules = []
    for each_rule in rules_lst:
        if each_rule.accuracy >= accuracy_thresh:
            better_rules.append(each_rule)
    
    return better_rules

<h2>Testing PRISM Algorithm using small dataset: weather dataset</h2>

In [122]:
col_lst = ['Outlook', 'Temp', 'Humidity', 'Windy', 'Play']

data_rows = [
    ['Sunny', 'Hot', 'High', 'False', 'No'],
    ['Sunny', 'Hot', 'High', 'True', 'No'],
    ['Overcast', 'Hot', 'High', 'False', 'Yes'],
    ['Rainy', 'Mild', 'High', 'False', 'Yes'],
    ['Rainy', 'Cool', 'Normal', 'False', 'Yes'],
    ['Rainy', 'Cool', 'Normal', 'True', 'No'],
    ['Overcast', 'Cool', 'Normal', 'True', 'Yes'],
    ['Sunny', 'Mild', 'High', 'False', 'No'],
    ['Sunny', 'Cool', 'Normal', 'False', 'Yes'],
    ['Rainy', 'Mild', 'Normal', 'False', 'Yes'],
    ['Sunny', 'Mild', 'Normal', 'True', 'Yes'],
    ['Overcast', 'Mild', 'High', 'True', 'Yes'],
    ['Overcast', 'Hot', 'Normal', 'False', 'Yes'],
    ['Rainy', 'Mild', 'High', 'True', 'No']
]

In [123]:
# eg of rule
conditions_demo = {'Outlook': 'Overcast'}

rule_demo = Rule(conditions=conditions_demo, class_label='Yes',accuracy=1.0,coverage=5)
print("Eg of rule that is printed in nice format: ")
print(rule_demo)
    

Eg of rule that is printed in nice format: 
If Outlook = Overcast, then Yes.
 Accuracy = 1.0. Coverage = 5



In [124]:
print("getting attributes that are not used in the rule")

test_rule_node = Rule({'Humidity': 'normal'}, 'Yes', 1.0, 5)
print(get_remaining_attributes(col_lst[:-1], test_rule_node))
print("\n")

getting attributes that are not used in the rule
['Outlook', 'Temp', 'Windy']




In [125]:
conditions = {'outlook':'sunny'}
highest_rule = Rule(conditions=conditions,class_label='Yes',accuracy=0.6,coverage=20)
print("--------Before changes-------------")
print("highest_rule")
print(highest_rule)
print("\n")

prev_highest_rule = copy_rule_node(highest_rule)
print('Prev highest rule')
print(prev_highest_rule)
print("\n")

print("---------After changes---------")
highest_rule.conditions['temp'] = 'hot'
highest_rule.accuracy = 0.9
highest_rule.coverage = 10
print("highest_rule")
print(highest_rule)
print("\n")

print('Prev highest rule')
print(prev_highest_rule)
print("\n")

--------Before changes-------------
highest_rule
If outlook = sunny, then Yes.
 Accuracy = 0.6. Coverage = 20



Prev highest rule
If outlook = sunny, then Yes.
 Accuracy = 0.6. Coverage = 20



---------After changes---------
highest_rule
If outlook = sunny and temp = hot, then Yes.
 Accuracy = 0.9. Coverage = 10



Prev highest rule
If outlook = sunny, then Yes.
 Accuracy = 0.6. Coverage = 20





In [85]:
print("Getting remaining rows after first rule: if outlook = overcast, then yes")
test_rule_node = Rule({'Outlook': 'Overcast'}, 'Yes', 1.0, 4)
test_covered_rows = get_condition_based_rows(data_rows, test_rule_node, col_lst[:-1])


rem_data_testing = get_remaining_rows(data_rows, test_covered_rows)
print(rem_data_testing)
print("\n")

print("Getting remaining rows after second rule: if humidity = normal and windy = false, then yes")
test_rule_node = Rule({'Humidity': 'Normal', 'Windy': 'False'}, 'Yes', 1.0, 3)
test_covered_rows = get_condition_based_rows(rem_data_testing, test_rule_node, col_lst[:-1])
rem_data_testing = get_remaining_rows(rem_data_testing, test_covered_rows)
print(rem_data_testing)
print("\n")

print("Getting remaining rows after third rule: if humidity = high and outlook = sunny, then no")
test_rule_node = Rule({'Humidity': 'High', 'Outlook': 'Sunny'}, 'No', 1.0, 3)
test_covered_rows = get_condition_based_rows(rem_data_testing, test_rule_node, col_lst[:-1])
rem_data_testing = get_remaining_rows(rem_data_testing, test_covered_rows)
print(rem_data_testing)
print("\n")


Getting remaining rows after first rule: if outlook = overcast, then yes
[['Sunny', 'Hot', 'High', 'False', 'No'], ['Sunny', 'Hot', 'High', 'True', 'No'], ['Rainy', 'Mild', 'High', 'False', 'Yes'], ['Rainy', 'Cool', 'Normal', 'False', 'Yes'], ['Rainy', 'Cool', 'Normal', 'True', 'No'], ['Sunny', 'Mild', 'High', 'False', 'No'], ['Sunny', 'Cool', 'Normal', 'False', 'Yes'], ['Rainy', 'Mild', 'Normal', 'False', 'Yes'], ['Sunny', 'Mild', 'Normal', 'True', 'Yes'], ['Rainy', 'Mild', 'High', 'True', 'No']]


Getting remaining rows after second rule: if humidity = normal and windy = false, then yes
[['Sunny', 'Hot', 'High', 'False', 'No'], ['Sunny', 'Hot', 'High', 'True', 'No'], ['Rainy', 'Mild', 'High', 'False', 'Yes'], ['Rainy', 'Cool', 'Normal', 'True', 'No'], ['Sunny', 'Mild', 'High', 'False', 'No'], ['Sunny', 'Mild', 'Normal', 'True', 'Yes'], ['Rainy', 'Mild', 'High', 'True', 'No']]


Getting remaining rows after third rule: if humidity = high and outlook = sunny, then no
[['Rainy', 'Mild',

In [128]:
print("----------Testing PRISM Algorithm on weather dataset----------")

test_weather_rules = PRISM_algorithm(rows=data_rows, col_lst=col_lst, accuracy_thresh=1.0, coverage_thresh=3)
    
print("\n--------------- *Rules found* ---------------\n")

if (len(test_weather_rules)) == 0:
    print("No rules found!")
else:
    for rule in test_weather_rules:
        print(rule)

----------Testing PRISM Algorithm on weather dataset----------
['Yes', 'No']
highest node
None
highest node
If Outlook = Overcast, then Yes.
 Accuracy = 1.0. Coverage = 4

highest node
None
highest node
If Humidity = Normal, then Yes.
 Accuracy = 0.8. Coverage = 5

highest node
If Humidity = Normal and Windy = False, then Yes.
 Accuracy = 1.0. Coverage = 3

highest node
None
highest node
If Humidity = High, then No.
 Accuracy = 0.8. Coverage = 5

highest node
If Humidity = High and Outlook = Sunny, then No.
 Accuracy = 1.0. Coverage = 3

highest node
None
highest node
If Temp = Mild, then Yes.
 Accuracy = 0.6666666666666666. Coverage = 3

highest node
None
rules list
[<__main__.Rule object at 0x7fdb319a0c10>, <__main__.Rule object at 0x7fdb48bc20a0>, <__main__.Rule object at 0x7fdb57ea0730>, <__main__.Rule object at 0x7fdb43806be0>]

--------------- *Rules found* ---------------

If Outlook = Overcast, then Yes.
 Accuracy = 1.0. Coverage = 4

If Humidity = Normal and Windy = False, the

<b>Rules for weather dataset</b>

This is to test whether the algorith works fine for the dataset

Using accuracy_threshold = 1.0 and coverage_threshold = 3

If Outlook = Overcast, then Yes.
 Accuracy = 1.0. Coverage = 4

If Humidity = Normal and Windy = False, then Yes.
 Accuracy = 1.0. Coverage = 3

If Humidity = High and Outlook = Sunny, then No.
 Accuracy = 1.0. Coverage = 3
 
These rules are the same as the rules in step by step example given in slide.

<h2>Applying the PRISM Algorithm on Contact Lenses dataset</h2>

In [87]:
c_lenses_data_file = "../../data_ml_2020/contact_lenses.csv"

In [88]:
import pandas as pd
c_data = pd.read_csv(c_lenses_data_file)
c_data = c_data.dropna(how="any")
c_data.columns

Index(['id', 'age', 'spectacles', 'astigmatism', 'tear production rate',
       'lenses type'],
      dtype='object')

In [89]:
c_data_rows = c_data.to_numpy().tolist()
print("number of rows: ",len(c_data_rows))
print("\n")
print(c_data_rows[:5])

number of rows:  24


[[1, 1, 1, 1, 1, 3], [2, 1, 1, 1, 2, 2], [3, 1, 1, 2, 1, 3], [4, 1, 1, 2, 2, 1], [5, 1, 2, 1, 1, 3]]


In [90]:
c_col_lst = c_data.columns.to_numpy().tolist()

#take out id
c_col_lst = c_col_lst[1:]

print("number of columns: ", len(c_col_lst))
print("\n")
print(c_col_lst)

number of columns:  5


['age', 'spectacles', 'astigmatism', 'tear production rate', 'lenses type']


In [91]:
# converting from numeric to values that can be classified
# eg. from [1,1,1,1,3] to ['young', 'myope', 'no', 'reduced', 'none']
new_c_rows_lst = []
for c_list in c_data_rows:
    each_c_row_lst = []
    for i in range(len(c_list)):
        if i == 0: #the value at this index belongs to id
            continue
            
        new_str = ""
        if i == 1: # belongs to age
            
            if c_list[i] == 1:
                new_str = 'young'
            elif c_list[i] == 2:
                new_str = 'pre-presbyopic'
            elif c_list[i] == 3:
                new_str = 'presbyopic'
        elif i == 2: # belongs to spectacle prescription
            if c_list[i] == 1:
                new_str = 'myope'
            elif c_list[i] == 2:
                new_str = 'hypermetrope'
        elif i == 3: # belongs to Astigmatism
            if c_list[i] == 1:
                new_str = 'no'
            elif c_list[i] == 2:
                new_str = 'yes'
        elif i == 4: # belongs to Tear Production rate
            if c_list[i] == 1:
                new_str = 'reduced'
            elif c_list[i] == 2:
                new_str = 'normal'
        elif i == 5: # belongs to Recommended lenses
            if c_list[i] == 3:
                new_str = 'none'
            elif c_list[i] == 2:
                new_str = 'soft'
            elif c_list[i] == 1:
                new_str = 'hard'
                
        each_c_row_lst.append(new_str)
        
    new_c_rows_lst.append(each_c_row_lst)

print(new_c_rows_lst)

[['young', 'myope', 'no', 'reduced', 'none'], ['young', 'myope', 'no', 'normal', 'soft'], ['young', 'myope', 'yes', 'reduced', 'none'], ['young', 'myope', 'yes', 'normal', 'hard'], ['young', 'hypermetrope', 'no', 'reduced', 'none'], ['young', 'hypermetrope', 'no', 'normal', 'soft'], ['young', 'hypermetrope', 'yes', 'reduced', 'none'], ['young', 'hypermetrope', 'yes', 'normal', 'hard'], ['pre-presbyopic', 'myope', 'no', 'reduced', 'none'], ['pre-presbyopic', 'myope', 'no', 'normal', 'soft'], ['pre-presbyopic', 'myope', 'yes', 'reduced', 'none'], ['pre-presbyopic', 'myope', 'yes', 'normal', 'hard'], ['pre-presbyopic', 'hypermetrope', 'no', 'reduced', 'none'], ['pre-presbyopic', 'hypermetrope', 'no', 'normal', 'soft'], ['pre-presbyopic', 'hypermetrope', 'yes', 'reduced', 'none'], ['pre-presbyopic', 'hypermetrope', 'yes', 'normal', 'none'], ['presbyopic', 'myope', 'no', 'reduced', 'none'], ['presbyopic', 'myope', 'no', 'normal', 'none'], ['presbyopic', 'myope', 'yes', 'reduced', 'none'], [

In [129]:
# testing on small dataset
print("---------- PRISM Algorithm on Contact Lenses dataset----------")



c_lenses_rules = PRISM_algorithm(rows=new_c_rows_lst, 
                              col_lst=c_col_lst, accuracy_thresh=1.0, coverage_thresh=2)

print("\n--------------- *Rules found* ---------------\n")

if (len(c_lenses_rules)) == 0:
    print("No rules found!")
else:
    for rule in c_lenses_rules:
        print(rule)

---------- PRISM Algorithm on Contact Lenses dataset----------
['hard', 'none', 'soft']
highest node
None
highest node
If tear production rate = reduced, then none.
 Accuracy = 1.0. Coverage = 12

highest node
None
highest node
If astigmatism = no, then soft.
 Accuracy = 0.8333333333333334. Coverage = 6

highest node
If astigmatism = no and spectacles = hypermetrope, then soft.
 Accuracy = 1.0. Coverage = 3

highest node
None
highest node
If astigmatism = yes, then hard.
 Accuracy = 0.6666666666666666. Coverage = 6

highest node
If astigmatism = yes and spectacles = myope, then hard.
 Accuracy = 1.0. Coverage = 3

highest node
None
highest node
If age = presbyopic, then none.
 Accuracy = 1.0. Coverage = 2

highest node
None
highest node
If spectacles = myope, then soft.
 Accuracy = 1.0. Coverage = 2

highest node
None
highest node
If spectacles = hypermetrope, then none.
 Accuracy = 0.5. Coverage = 2

rules list
[<__main__.Rule object at 0x7fdb319a0820>, <__main__.Rule object at 0x7fdb

<b>Rules for Contact lens dataset</b>

I also find the rules manually for this contact lens dataset(dataset from the slide) and walk through it step by step on word document which I share with you on google classroom.

I see that rules found through step by step and rules found through running a program are the same.

I use accuracy threshold = 1.0 and coverage threshold = 2 and I get the following rules from doing manually.

Rule 1: If tear_prod_rate = reduced, then none

Accuracy = 1.0. Coverage = 12


Rule 2: If astigmatism=no and spectacles=hypermetrope, then soft

Accuracy = 1.0. Coverage = 3


Rule 3: If astigmatism=yes and spectacles=myope, then hard

Accuracy = 1.0. Coverage = 3


Rule 4: If age = presbyopic, then none

Accuracy = 1.0. Coverage = 2


Rule 5: If spectacles=myope, then soft	

Accuracy = 1.0. Coverage = 2


As you can see, the above rules produced by a program is the same as the one I do manually on word document.



<h2>Applying the PRISM Algorithm on COVID-19 dataset</h2>

In this dataset we have the following attributes:
1. sex: 1 -woman, 2-man
2. age: numeric
3. diabetes: yes/no
4. copd (chronic obstructive pulmonary disease): yes/no
5. asthma: yes/no
6. imm_supr (suppressed immune system): yes/no
7. hypertension: yes/no
8. cardiovascular: yes/no
9. renal_chronic: yes/no
10. tobacco: yes/no	
11. outcome: alive/dead

In [35]:
covid_data_file = "../../data_ml_2020/covid_categorical_good.csv"

In [36]:
import pandas as pd
data = pd.read_csv(covid_data_file)
data = data.dropna(how="any")
data.columns

Index(['sex', 'age', 'diabetes', 'copd', 'asthma', 'imm_supr', 'hypertension',
       'cardiovascular', 'obesity', 'renal_chronic', 'tobacco', 'outcome'],
      dtype='object')

In [42]:
covid_data_rows = data.to_numpy().tolist()
print("number of rows: ",len(covid_data_rows))
print("\n")
print(covid_data_rows[:5])

number of rows:  219179


[['male', 27, 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'alive'], ['male', 24, 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'alive'], ['female', 54, 'no', 'no', 'no', 'no', 'no', 'no', 'yes', 'no', 'no', 'alive'], ['male', 30, 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'alive'], ['female', 60, 'yes', 'no', 'no', 'no', 'yes', 'yes', 'no', 'no', 'no', 'dead']]


In [38]:
covid_col_lst = data.columns.to_numpy().tolist()
print("number of columns: ", len(covid_col_lst))
print("\n")
print(covid_col_lst)

number of columns:  12


['sex', 'age', 'diabetes', 'copd', 'asthma', 'imm_supr', 'hypertension', 'cardiovascular', 'obesity', 'renal_chronic', 'tobacco', 'outcome']


In [39]:

rule_node_testing = Rule(conditions={'age': 30},class_label='alive',accuracy=1.0,coverage=3)

get_remaining_attributes(attributes=covid_col_lst[:-1], rule_node=rule_node_testing)


['sex',
 'diabetes',
 'copd',
 'asthma',
 'imm_supr',
 'hypertension',
 'cardiovascular',
 'obesity',
 'renal_chronic',
 'tobacco']

In [40]:
# testing on small dataset
print("---------- PRISM Algorithm on small COVID dataset----------")



covid_rules = PRISM_algorithm(rows=covid_data_rows[:1000], 
                              col_lst=covid_col_lst, accuracy_thresh=0.6, coverage_thresh=30)

for rule in covid_rules:
    print(rule)



---------- PRISM Algorithm on small COVID dataset----------
['dead', 'alive']
highest node
None
highest node
If sex = female, then alive.
 Accuracy = 0.8698347107438017. Coverage = 484

highest node
If sex = female and hypertension = no, then alive.
 Accuracy = 0.9285714285714286. Coverage = 364

highest node
If sex = female and hypertension = no and diabetes = no, then alive.
 Accuracy = 0.9473684210526315. Coverage = 323

highest node
If sex = female and hypertension = no and diabetes = no and obesity = no, then alive.
 Accuracy = 0.9615384615384616. Coverage = 260

highest node
If sex = female and hypertension = no and diabetes = no and obesity = no and cardiovascular = no, then alive.
 Accuracy = 0.9649805447470817. Coverage = 257

highest node
If sex = female and hypertension = no and diabetes = no and obesity = no and cardiovascular = no and imm_supr = no, then alive.
 Accuracy = 0.9682539682539683. Coverage = 252

highest node
If sex = female and hypertension = no and diabetes =

highest node
If age >= 66, then dead.
 Accuracy = 0.6176470588235294. Coverage = 34

highest node
If age >= 66 and imm_supr = no, then dead.
 Accuracy = 0.6363636363636364. Coverage = 33

highest node
None
highest node
If diabetes = yes, then alive.
 Accuracy = 0.6578947368421053. Coverage = 76

highest node
If diabetes = yes and cardiovascular = no, then alive.
 Accuracy = 0.6811594202898551. Coverage = 69

highest node
If diabetes = yes and cardiovascular = no and sex = male, then alive.
 Accuracy = 0.7045454545454546. Coverage = 44

highest node
If diabetes = yes and cardiovascular = no and sex = male and imm_supr = no, then alive.
 Accuracy = 0.7380952380952381. Coverage = 42

highest node
If diabetes = yes and cardiovascular = no and sex = male and imm_supr = no and renal_chronic = no, then alive.
 Accuracy = 0.7435897435897436. Coverage = 39

highest node
If diabetes = yes and cardiovascular = no and sex = male and imm_supr = no and renal_chronic = no and tobacco = no, then alive

In [41]:

print("---------- PRISM Algorithm on COVID dataset----------")

#coverage_threshold_in_percent = 3
#coverage_thresh_val = int((coverage_threshold_in_percent/100) * len(covid_data_rows))
#print("Coverage Threshold in terms of number of rows: ",coverage_thresh_val)

# coverage_thresh is represented in terms of number of records covered by the rule.

covid_rules = PRISM_algorithm(rows=covid_data_rows, col_lst=covid_col_lst, 
                              accuracy_thresh=0.9, coverage_thresh=20)

if (len(covid_rules)) == 0:
    print("No rules found!")
else:
    for rule in covid_rules:
        print(rule)


---------- PRISM Algorithm on COVID dataset----------
['dead', 'alive']
highest node
None
highest node
If hypertension = no, then alive.
 Accuracy = 0.9118543984283984. Coverage = 175108

highest node
If hypertension = no and sex = female, then alive.
 Accuracy = 0.9427718841276063. Coverage = 79087

highest node
If hypertension = no and sex = female and diabetes = no, then alive.
 Accuracy = 0.9552886682291595. Coverage = 72107

highest node
If hypertension = no and sex = female and diabetes = no and tobacco = yes, then alive.
 Accuracy = 0.9643374891272832. Coverage = 3449

highest node
If hypertension = no and sex = female and diabetes = no and tobacco = yes and obesity = no, then alive.
 Accuracy = 0.9736316420295645. Coverage = 2503

highest node
If hypertension = no and sex = female and diabetes = no and tobacco = yes and obesity = no and asthma = yes, then alive.
 Accuracy = 0.9775280898876404. Coverage = 89

highest node
If hypertension = no and sex = female and diabetes = no a

highest node
None
highest node
If hypertension = no, then alive.
 Accuracy = 0.8860354235158442. Coverage = 116194

highest node
If hypertension = no and asthma = yes, then alive.
 Accuracy = 0.9117132867132867. Coverage = 2288

highest node
If hypertension = no and asthma = yes and diabetes = no, then alive.
 Accuracy = 0.9271558650241029. Coverage = 1867

highest node
If hypertension = no and asthma = yes and diabetes = no and obesity = no, then alive.
 Accuracy = 0.935832732516222. Coverage = 1387

highest node
If hypertension = no and asthma = yes and diabetes = no and obesity = no and cardiovascular = yes, then alive.
 Accuracy = 0.9565217391304348. Coverage = 23

highest node
None
highest node
If hypertension = no, then alive.
 Accuracy = 0.886021468352687. Coverage = 116171

highest node
If hypertension = no and asthma = yes, then alive.
 Accuracy = 0.9112582781456954. Coverage = 2265

highest node
If hypertension = no and asthma = yes and diabetes = no, then alive.
 Accuracy = 

highest node
If hypertension = no and diabetes = no and obesity = no and tobacco = yes, then alive.
 Accuracy = 0.9093071190740447. Coverage = 6307

highest node
If hypertension = no and diabetes = no and obesity = no and tobacco = yes and copd = no, then alive.
 Accuracy = 0.9174296926095488. Coverage = 6116

highest node
If hypertension = no and diabetes = no and obesity = no and tobacco = yes and copd = no and renal_chronic = no, then alive.
 Accuracy = 0.9192536327608983. Coverage = 6056

highest node
If hypertension = no and diabetes = no and obesity = no and tobacco = yes and copd = no and renal_chronic = no and imm_supr = no, then alive.
 Accuracy = 0.9202607823470411. Coverage = 5982

highest node
If hypertension = no and diabetes = no and obesity = no and tobacco = yes and copd = no and renal_chronic = no and imm_supr = no and cardiovascular = no, then alive.
 Accuracy = 0.9211281878061138. Coverage = 5921

highest node
None
highest node
If hypertension = no, then alive.
 Accu

highest node
If asthma = yes and sex = female and tobacco = yes, then alive.
 Accuracy = 0.9292929292929293. Coverage = 99

highest node
If asthma = yes and sex = female and tobacco = yes and imm_supr = yes, then alive.
 Accuracy = 1.0. Coverage = 31

highest node
None
highest node
If diabetes = no, then alive.
 Accuracy = 0.8119810201660735. Coverage = 40464

highest node
If diabetes = no and hypertension = no, then alive.
 Accuracy = 0.8467364097297991. Coverage = 15581

highest node
If diabetes = no and hypertension = no and obesity = yes, then alive.
 Accuracy = 0.8636009353078722. Coverage = 11547

highest node
If diabetes = no and hypertension = no and obesity = yes and copd = no, then alive.
 Accuracy = 0.8669447896325226. Coverage = 11266

highest node
If diabetes = no and hypertension = no and obesity = yes and copd = no and cardiovascular = no, then alive.
 Accuracy = 0.8690334945696815. Coverage = 10957

highest node
If diabetes = no and hypertension = no and obesity = yes a

highest node
None
highest node
If diabetes = no, then alive.
 Accuracy = 0.7885397791377483. Coverage = 29249

highest node
If diabetes = no and sex = female, then alive.
 Accuracy = 0.8201999527670629. Coverage = 12703

highest node
If diabetes = no and sex = female and copd = no, then alive.
 Accuracy = 0.8300162296062185. Coverage = 11707

highest node
If diabetes = no and sex = female and copd = no and hypertension = no, then alive.
 Accuracy = 0.8510808646917534. Coverage = 1249

highest node
If diabetes = no and sex = female and copd = no and hypertension = no and renal_chronic = no, then alive.
 Accuracy = 0.860774818401937. Coverage = 826

highest node
If diabetes = no and sex = female and copd = no and hypertension = no and renal_chronic = no and asthma = no, then alive.
 Accuracy = 0.8686868686868687. Coverage = 792

highest node
If diabetes = no and sex = female and copd = no and hypertension = no and renal_chronic = no and asthma = no and cardiovascular = no, then alive.
 A

highest node
None
highest node
If asthma = yes, then alive.
 Accuracy = 0.7824074074074074. Coverage = 1512

highest node
If asthma = yes and sex = female, then alive.
 Accuracy = 0.7991913746630728. Coverage = 742

highest node
If asthma = yes and sex = female and diabetes = no, then alive.
 Accuracy = 0.8201219512195121. Coverage = 328

highest node
If asthma = yes and sex = female and diabetes = no and imm_supr = no, then alive.
 Accuracy = 0.84. Coverage = 275

highest node
If asthma = yes and sex = female and diabetes = no and imm_supr = no and age >= 75, then alive.
 Accuracy = 0.8636363636363636. Coverage = 22

highest node
None
highest node
If asthma = yes, then alive.
 Accuracy = 0.7812080536912752. Coverage = 1490

highest node
If asthma = yes and copd = no, then alive.
 Accuracy = 0.7976. Coverage = 1250

highest node
If asthma = yes and copd = no and renal_chronic = no, then alive.
 Accuracy = 0.8083832335329342. Coverage = 1169

highest node
If asthma = yes and copd = no a

highest node
None
highest node
If asthma = yes, then alive.
 Accuracy = 0.7684691546077684. Coverage = 1313

highest node
If asthma = yes and copd = no, then alive.
 Accuracy = 0.7847157502329916. Coverage = 1073

highest node
If asthma = yes and copd = no and renal_chronic = no, then alive.
 Accuracy = 0.7963709677419355. Coverage = 992

highest node
If asthma = yes and copd = no and renal_chronic = no and cardiovascular = no, then alive.
 Accuracy = 0.8053993250843644. Coverage = 889

highest node
If asthma = yes and copd = no and renal_chronic = no and cardiovascular = no and diabetes = yes, then alive.
 Accuracy = 0.8140350877192982. Coverage = 570

highest node
If asthma = yes and copd = no and renal_chronic = no and cardiovascular = no and diabetes = yes and tobacco = yes, then alive.
 Accuracy = 0.8620689655172413. Coverage = 29

highest node
If asthma = yes and copd = no and renal_chronic = no and cardiovascular = no and diabetes = yes and tobacco = yes and hypertension = yes, 

highest node
None
highest node
If diabetes = no, then alive.
 Accuracy = 0.7596270988929499. Coverage = 18879

highest node
If diabetes = no and copd = no, then alive.
 Accuracy = 0.7702865398507104. Coverage = 16612

highest node
If diabetes = no and copd = no and hypertension = no, then alive.
 Accuracy = 0.7815668202764977. Coverage = 2170

highest node
If diabetes = no and copd = no and hypertension = no and cardiovascular = yes, then alive.
 Accuracy = 0.7959949937421777. Coverage = 799

highest node
If diabetes = no and copd = no and hypertension = no and cardiovascular = yes and obesity = no, then alive.
 Accuracy = 0.8094435075885329. Coverage = 593

highest node
If diabetes = no and copd = no and hypertension = no and cardiovascular = yes and obesity = no and imm_supr = no, then alive.
 Accuracy = 0.8110918544194108. Coverage = 577

highest node
If diabetes = no and copd = no and hypertension = no and cardiovascular = yes and obesity = no and imm_supr = no and renal_chronic = 

highest node
If diabetes = no and copd = no and renal_chronic = no and cardiovascular = no and tobacco = no, then alive.
 Accuracy = 0.7777777777777778. Coverage = 8469

highest node
If diabetes = no and copd = no and renal_chronic = no and cardiovascular = no and tobacco = no and asthma = yes, then alive.
 Accuracy = 0.7840909090909091. Coverage = 176

highest node
If diabetes = no and copd = no and renal_chronic = no and cardiovascular = no and tobacco = no and asthma = yes and imm_supr = no, then alive.
 Accuracy = 0.8110236220472441. Coverage = 127

highest node
None
highest node
If diabetes = no, then alive.
 Accuracy = 0.7473074851911685. Coverage = 14856

highest node
If diabetes = no and copd = no, then alive.
 Accuracy = 0.7591548176979903. Coverage = 12589

highest node
If diabetes = no and copd = no and renal_chronic = no, then alive.
 Accuracy = 0.7688497523390203. Coverage = 10902

highest node
If diabetes = no and copd = no and renal_chronic = no and cardiovascular = no, 

highest node
If hypertension = no and obesity = yes and copd = no and tobacco = no, then alive.
 Accuracy = 0.7657869934024505. Coverage = 2122

highest node
If hypertension = no and obesity = yes and copd = no and tobacco = no and imm_supr = no, then alive.
 Accuracy = 0.7723029045643154. Coverage = 1928

highest node
If hypertension = no and obesity = yes and copd = no and tobacco = no and imm_supr = no and cardiovascular = no, then alive.
 Accuracy = 0.7762611275964392. Coverage = 1685

highest node
If hypertension = no and obesity = yes and copd = no and tobacco = no and imm_supr = no and cardiovascular = no and diabetes = no, then alive.
 Accuracy = 0.7794117647058824. Coverage = 68

highest node
If hypertension = no and obesity = yes and copd = no and tobacco = no and imm_supr = no and cardiovascular = no and diabetes = no and age >= 32, then alive.
 Accuracy = 0.803921568627451. Coverage = 51

highest node
If hypertension = no and obesity = yes and copd = no and tobacco = no and

highest node
None
highest node
If obesity = yes, then alive.
 Accuracy = 0.7068223028105167. Coverage = 8824

highest node
If obesity = yes and diabetes = no, then alive.
 Accuracy = 0.7230619210141395. Coverage = 2051

highest node
If obesity = yes and diabetes = no and copd = no, then alive.
 Accuracy = 0.738014854827819. Coverage = 1481

highest node
If obesity = yes and diabetes = no and copd = no and tobacco = yes, then alive.
 Accuracy = 0.7629233511586453. Coverage = 561

highest node
If obesity = yes and diabetes = no and copd = no and tobacco = yes and renal_chronic = no, then alive.
 Accuracy = 0.7782026768642447. Coverage = 523

highest node
If obesity = yes and diabetes = no and copd = no and tobacco = yes and renal_chronic = no and cardiovascular = no, then alive.
 Accuracy = 0.7880184331797235. Coverage = 434

highest node
If obesity = yes and diabetes = no and copd = no and tobacco = yes and renal_chronic = no and cardiovascular = no and hypertension = yes, then alive.
 

highest node
If renal_chronic = no, then alive.
 Accuracy = 0.692210628488231. Coverage = 20605

highest node
If renal_chronic = no and sex = female, then alive.
 Accuracy = 0.7123812225216282. Coverage = 7051

highest node
If renal_chronic = no and sex = female and copd = no, then alive.
 Accuracy = 0.7217771303714494. Coverage = 5492

highest node
If renal_chronic = no and sex = female and copd = no and diabetes = no, then alive.
 Accuracy = 0.7262247838616714. Coverage = 347

highest node
If renal_chronic = no and sex = female and copd = no and diabetes = no and imm_supr = no, then alive.
 Accuracy = 0.7401960784313726. Coverage = 204

highest node
If renal_chronic = no and sex = female and copd = no and diabetes = no and imm_supr = no and age >= 23, then alive.
 Accuracy = 0.75. Coverage = 196

highest node
If renal_chronic = no and sex = female and copd = no and diabetes = no and imm_supr = no and age >= 23 and tobacco = no, then alive.
 Accuracy = 0.7528089887640449. Coverage = 1

highest node
If hypertension = no and diabetes = no, then alive.
 Accuracy = 0.7059538274605103. Coverage = 1646

highest node
If hypertension = no and diabetes = no and cardiovascular = yes, then alive.
 Accuracy = 0.7211895910780669. Coverage = 269

highest node
If hypertension = no and diabetes = no and cardiovascular = yes and copd = no, then alive.
 Accuracy = 0.7435897435897436. Coverage = 195

highest node
If hypertension = no and diabetes = no and cardiovascular = yes and copd = no and tobacco = yes, then alive.
 Accuracy = 0.7647058823529411. Coverage = 34

highest node
If hypertension = no and diabetes = no and cardiovascular = yes and copd = no and tobacco = yes and sex = male, then alive.
 Accuracy = 0.7931034482758621. Coverage = 29

highest node
If hypertension = no and diabetes = no and cardiovascular = yes and copd = no and tobacco = yes and sex = male and obesity = yes, then alive.
 Accuracy = 0.8076923076923077. Coverage = 26

highest node
None
highest node
If hyperte

highest node
None
highest node
If renal_chronic = no, then alive.
 Accuracy = 0.6750992397228016. Coverage = 14863

highest node
If renal_chronic = no and obesity = yes, then alive.
 Accuracy = 0.6894220283533261. Coverage = 4585

highest node
If renal_chronic = no and obesity = yes and copd = no, then alive.
 Accuracy = 0.7010647010647011. Coverage = 3663

highest node
If renal_chronic = no and obesity = yes and copd = no and hypertension = no, then alive.
 Accuracy = 0.7152658662092625. Coverage = 583

highest node
If renal_chronic = no and obesity = yes and copd = no and hypertension = no and cardiovascular = yes, then alive.
 Accuracy = 0.7289719626168224. Coverage = 107

highest node
If renal_chronic = no and obesity = yes and copd = no and hypertension = no and cardiovascular = yes and sex = male, then alive.
 Accuracy = 0.75. Coverage = 48

highest node
If renal_chronic = no and obesity = yes and copd = no and hypertension = no and cardiovascular = yes and sex = male and imm_sup

highest node
If renal_chronic = no and obesity = yes and copd = no and sex = male and cardiovascular = no and imm_supr = no and tobacco = yes, then alive.
 Accuracy = 0.7086092715231788. Coverage = 302

highest node
If renal_chronic = no and obesity = yes and copd = no and sex = male and cardiovascular = no and imm_supr = no and tobacco = yes and age >= 25, then alive.
 Accuracy = 0.71. Coverage = 300

highest node
None
highest node
If diabetes = no, then alive.
 Accuracy = 0.6722370766488414. Coverage = 4488

highest node
If diabetes = no and tobacco = yes, then alive.
 Accuracy = 0.6879323597232898. Coverage = 1301

highest node
If diabetes = no and tobacco = yes and copd = no, then alive.
 Accuracy = 0.7218468468468469. Coverage = 888

highest node
If diabetes = no and tobacco = yes and copd = no and obesity = no, then alive.
 Accuracy = 0.7355889724310777. Coverage = 798

highest node
If diabetes = no and tobacco = yes and copd = no and obesity = no and renal_chronic = no, then ali

highest node
None
highest node
If renal_chronic = no, then alive.
 Accuracy = 0.6581570856237772. Coverage = 10733

highest node
If renal_chronic = no and copd = no, then alive.
 Accuracy = 0.6650760897601238. Coverage = 7754

highest node
If renal_chronic = no and copd = no and tobacco = no, then alive.
 Accuracy = 0.6690616467883317. Coverage = 6959

highest node
If renal_chronic = no and copd = no and tobacco = no and hypertension = no, then alive.
 Accuracy = 0.678743961352657. Coverage = 414

highest node
If renal_chronic = no and copd = no and tobacco = no and hypertension = no and sex = female, then alive.
 Accuracy = 0.6870229007633588. Coverage = 131

highest node
If renal_chronic = no and copd = no and tobacco = no and hypertension = no and sex = female and obesity = no, then alive.
 Accuracy = 0.7087378640776699. Coverage = 103

highest node
If renal_chronic = no and copd = no and tobacco = no and hypertension = no and sex = female and obesity = no and asthma = no, then aliv

highest node
If renal_chronic = no and copd = no and cardiovascular = no, then alive.
 Accuracy = 0.6636136552872606. Coverage = 6005

highest node
If renal_chronic = no and copd = no and cardiovascular = no and tobacco = no, then alive.
 Accuracy = 0.6659881569207994. Coverage = 5404

highest node
If renal_chronic = no and copd = no and cardiovascular = no and tobacco = no and imm_supr = no, then alive.
 Accuracy = 0.6675424413904608. Coverage = 4948

highest node
If renal_chronic = no and copd = no and cardiovascular = no and tobacco = no and imm_supr = no and sex = male, then alive.
 Accuracy = 0.6678151601134982. Coverage = 4934

highest node
If renal_chronic = no and copd = no and cardiovascular = no and tobacco = no and imm_supr = no and sex = male and age >= 1, then alive.
 Accuracy = 0.6678832116788321. Coverage = 4932

highest node
If renal_chronic = no and copd = no and cardiovascular = no and tobacco = no and imm_supr = no and sex = male and age >= 1 and asthma = no, then al

highest node
If diabetes = no, then alive.
 Accuracy = 0.6338083927157562. Coverage = 2526

highest node
If diabetes = no and copd = no, then alive.
 Accuracy = 0.6410891089108911. Coverage = 1212

highest node
If diabetes = no and copd = no and sex = female, then alive.
 Accuracy = 0.6486486486486487. Coverage = 444

highest node
If diabetes = no and copd = no and sex = female and cardiovascular = no, then alive.
 Accuracy = 0.6630434782608695. Coverage = 368

highest node
If diabetes = no and copd = no and sex = female and cardiovascular = no and imm_supr = yes, then alive.
 Accuracy = 0.676923076923077. Coverage = 65

highest node
If diabetes = no and copd = no and sex = female and cardiovascular = no and imm_supr = yes and obesity = no, then alive.
 Accuracy = 0.6944444444444444. Coverage = 36

highest node
If diabetes = no and copd = no and sex = female and cardiovascular = no and imm_supr = yes and obesity = no and age >= 34, then alive.
 Accuracy = 0.72. Coverage = 25

highest n

highest node
If renal_chronic = no and sex = female, then alive.
 Accuracy = 0.6337926033357505. Coverage = 1379

highest node
If renal_chronic = no and sex = female and diabetes = yes, then alive.
 Accuracy = 0.6463963963963963. Coverage = 888

highest node
If renal_chronic = no and sex = female and diabetes = yes and age >= 87, then alive.
 Accuracy = 0.68. Coverage = 25

highest node
If renal_chronic = no and sex = female and diabetes = yes and age >= 87 and hypertension = yes, then alive.
 Accuracy = 0.7142857142857143. Coverage = 21

highest node
If renal_chronic = no and sex = female and diabetes = yes and age >= 87 and hypertension = yes and asthma = no, then alive.
 Accuracy = 0.7. Coverage = 20

highest node
None
highest node
If renal_chronic = no, then alive.
 Accuracy = 0.6222281592305637. Coverage = 3743

highest node
If renal_chronic = no and sex = female, then alive.
 Accuracy = 0.6328182487122884. Coverage = 1359

highest node
If renal_chronic = no and sex = female and d

highest node
If diabetes = no, then alive.
 Accuracy = 0.615765989092712. Coverage = 2017

highest node
If diabetes = no and tobacco = no, then alive.
 Accuracy = 0.6233766233766234. Coverage = 1540

highest node
If diabetes = no and tobacco = no and sex = male, then alive.
 Accuracy = 0.6276150627615062. Coverage = 956

highest node
If diabetes = no and tobacco = no and sex = male and obesity = yes, then alive.
 Accuracy = 0.6408450704225352. Coverage = 142

highest node
If diabetes = no and tobacco = no and sex = male and obesity = yes and copd = yes, then alive.
 Accuracy = 0.6865671641791045. Coverage = 67

highest node
If diabetes = no and tobacco = no and sex = male and obesity = yes and copd = yes and cardiovascular = no, then alive.
 Accuracy = 0.7222222222222222. Coverage = 54

highest node
None
highest node
If renal_chronic = no, then alive.
 Accuracy = 0.6136692730958587. Coverage = 3453

highest node
If renal_chronic = no and copd = no, then alive.
 Accuracy = 0.62442040185

highest node
If diabetes = no, then alive.
 Accuracy = 0.6060606060606061. Coverage = 1782

highest node
If diabetes = no and obesity = no, then alive.
 Accuracy = 0.6146304675716441. Coverage = 1326

highest node
If diabetes = no and obesity = no and renal_chronic = no, then alive.
 Accuracy = 0.62109375. Coverage = 768

highest node
If diabetes = no and obesity = no and renal_chronic = no and sex = male, then alive.
 Accuracy = 0.6270833333333333. Coverage = 480

highest node
If diabetes = no and obesity = no and renal_chronic = no and sex = male and imm_supr = no, then alive.
 Accuracy = 0.6304347826086957. Coverage = 460

highest node
If diabetes = no and obesity = no and renal_chronic = no and sex = male and imm_supr = no and tobacco = no, then alive.
 Accuracy = 0.636734693877551. Coverage = 245

highest node
If diabetes = no and obesity = no and renal_chronic = no and sex = male and imm_supr = no and tobacco = no and cardiovascular = no, then alive.
 Accuracy = 0.642533936651583

highest node
If renal_chronic = no, then alive.
 Accuracy = 0.5936468646864687. Coverage = 2424

highest node
If renal_chronic = no and hypertension = no, then alive.
 Accuracy = 0.6090425531914894. Coverage = 752

highest node
If renal_chronic = no and hypertension = no and cardiovascular = no, then alive.
 Accuracy = 0.6273584905660378. Coverage = 636

highest node
If renal_chronic = no and hypertension = no and cardiovascular = no and obesity = no, then alive.
 Accuracy = 0.639821029082774. Coverage = 447

highest node
If renal_chronic = no and hypertension = no and cardiovascular = no and obesity = no and diabetes = yes, then alive.
 Accuracy = 0.6444444444444445. Coverage = 315

highest node
If renal_chronic = no and hypertension = no and cardiovascular = no and obesity = no and diabetes = yes and tobacco = yes, then alive.
 Accuracy = 0.6582278481012658. Coverage = 79

highest node
If renal_chronic = no and hypertension = no and cardiovascular = no and obesity = no and diabetes =

highest node
If diabetes = no, then alive.
 Accuracy = 0.5827402135231317. Coverage = 1124

highest node
If diabetes = no and sex = female, then alive.
 Accuracy = 0.595903165735568. Coverage = 537

highest node
If diabetes = no and sex = female and tobacco = no, then alive.
 Accuracy = 0.608. Coverage = 500

highest node
If diabetes = no and sex = female and tobacco = no and obesity = no, then alive.
 Accuracy = 0.6163522012578616. Coverage = 318

highest node
If diabetes = no and sex = female and tobacco = no and obesity = no and age >= 40, then alive.
 Accuracy = 0.6232394366197183. Coverage = 284

highest node
If diabetes = no and sex = female and tobacco = no and obesity = no and age >= 40 and cardiovascular = yes, then alive.
 Accuracy = 0.6333333333333333. Coverage = 30

highest node
If diabetes = no and sex = female and tobacco = no and obesity = no and age >= 40 and cardiovascular = yes and asthma = no, then alive.
 Accuracy = 0.6551724137931034. Coverage = 29

highest node
If

highest node
If diabetes = no, then alive.
 Accuracy = 0.5712515489467163. Coverage = 807

highest node
If diabetes = no and cardiovascular = no, then alive.
 Accuracy = 0.5819935691318328. Coverage = 622

highest node
If diabetes = no and cardiovascular = no and hypertension = no, then alive.
 Accuracy = 0.5951219512195122. Coverage = 205

highest node
If diabetes = no and cardiovascular = no and hypertension = no and renal_chronic = no, then alive.
 Accuracy = 0.6190476190476191. Coverage = 168

highest node
If diabetes = no and cardiovascular = no and hypertension = no and renal_chronic = no and imm_supr = no, then alive.
 Accuracy = 0.6363636363636364. Coverage = 154

highest node
If diabetes = no and cardiovascular = no and hypertension = no and renal_chronic = no and imm_supr = no and sex = male, then alive.
 Accuracy = 0.6418918918918919. Coverage = 148

highest node
If diabetes = no and cardiovascular = no and hypertension = no and renal_chronic = no and imm_supr = no and sex =

highest node
If obesity = yes, then alive.
 Accuracy = 0.5534962089300758. Coverage = 1187

highest node
If obesity = yes and imm_supr = no, then alive.
 Accuracy = 0.5619918699186992. Coverage = 984

highest node
If obesity = yes and imm_supr = no and tobacco = yes, then alive.
 Accuracy = 0.5807692307692308. Coverage = 260

highest node
If obesity = yes and imm_supr = no and tobacco = yes and diabetes = yes, then alive.
 Accuracy = 0.6149068322981367. Coverage = 161

highest node
If obesity = yes and imm_supr = no and tobacco = yes and diabetes = yes and age >= 76, then alive.
 Accuracy = 0.7. Coverage = 20

highest node
None
highest node
If age >= 76, then dead.
 Accuracy = 0.5538922155688623. Coverage = 334

highest node
If age >= 76 and diabetes = no, then dead.
 Accuracy = 0.6065573770491803. Coverage = 61

highest node
If age >= 76 and diabetes = no and cardiovascular = no, then dead.
 Accuracy = 0.6571428571428571. Coverage = 35

highest node
If age >= 76 and diabetes = no and 

highest node
If obesity = yes, then alive.
 Accuracy = 0.5559599636032757. Coverage = 1099

highest node
If obesity = yes and renal_chronic = no, then alive.
 Accuracy = 0.5654205607476636. Coverage = 428

highest node
If obesity = yes and renal_chronic = no and diabetes = yes, then alive.
 Accuracy = 0.5784615384615385. Coverage = 325

highest node
If obesity = yes and renal_chronic = no and diabetes = yes and asthma = no, then alive.
 Accuracy = 0.5884353741496599. Coverage = 294

highest node
If obesity = yes and renal_chronic = no and diabetes = yes and asthma = no and sex = male, then alive.
 Accuracy = 0.609375. Coverage = 192

highest node
If obesity = yes and renal_chronic = no and diabetes = yes and asthma = no and sex = male and cardiovascular = yes, then alive.
 Accuracy = 0.6363636363636364. Coverage = 55

highest node
If obesity = yes and renal_chronic = no and diabetes = yes and asthma = no and sex = male and cardiovascular = yes and imm_supr = no, then alive.
 Accuracy =

highest node
If renal_chronic = no, then alive.
 Accuracy = 0.5460277427490542. Coverage = 793

highest node
If renal_chronic = no and age >= 73, then alive.
 Accuracy = 0.625. Coverage = 24

highest node
If renal_chronic = no and age >= 73 and imm_supr = no, then alive.
 Accuracy = 0.6818181818181818. Coverage = 22

highest node
If renal_chronic = no and age >= 73 and imm_supr = no and hypertension = yes, then alive.
 Accuracy = 0.65. Coverage = 20

highest node
None
highest node
If age >= 73, then dead.
 Accuracy = 0.5514018691588785. Coverage = 214

highest node
If age >= 73 and hypertension = no, then dead.
 Accuracy = 0.6296296296296297. Coverage = 27

highest node
If age >= 73 and hypertension = no and obesity = no, then dead.
 Accuracy = 0.68. Coverage = 25

highest node
None
highest node
If sex = female, then alive.
 Accuracy = 0.5463494667760459. Coverage = 1219

highest node
If sex = female and asthma = yes, then alive.
 Accuracy = 0.5535714285714286. Coverage = 56

highest n

highest node
If age >= 73, then dead.
 Accuracy = 0.543010752688172. Coverage = 186

highest node
If age >= 73 and obesity = yes, then dead.
 Accuracy = 0.5897435897435898. Coverage = 39

highest node
If age >= 73 and obesity = yes and copd = no, then dead.
 Accuracy = 0.6333333333333333. Coverage = 30

highest node
None
highest node
If sex = female, then alive.
 Accuracy = 0.5452127659574468. Coverage = 1128

highest node
If sex = female and cardiovascular = no, then alive.
 Accuracy = 0.5520504731861199. Coverage = 951

highest node
If sex = female and cardiovascular = no and diabetes = no, then alive.
 Accuracy = 0.5689655172413793. Coverage = 58

highest node
If sex = female and cardiovascular = no and diabetes = no and tobacco = no, then alive.
 Accuracy = 0.6122448979591837. Coverage = 49

highest node
If sex = female and cardiovascular = no and diabetes = no and tobacco = no and hypertension = yes, then alive.
 Accuracy = 0.6666666666666666. Coverage = 27

highest node
None
high

highest node
If age >= 68, then dead.
 Accuracy = 0.5457875457875457. Coverage = 273

highest node
If age >= 68 and obesity = yes, then dead.
 Accuracy = 0.6. Coverage = 40

highest node
If age >= 68 and obesity = yes and renal_chronic = no, then dead.
 Accuracy = 0.65. Coverage = 20

highest node
None
highest node
If cardiovascular = no, then alive.
 Accuracy = 0.5384615384615384. Coverage = 1859

highest node
If cardiovascular = no and obesity = yes, then alive.
 Accuracy = 0.5495750708215298. Coverage = 353

highest node
If cardiovascular = no and obesity = yes and age >= 62, then alive.
 Accuracy = 0.5806451612903226. Coverage = 62

highest node
If cardiovascular = no and obesity = yes and age >= 62 and tobacco = no, then alive.
 Accuracy = 0.6140350877192983. Coverage = 57

highest node
None
highest node
If cardiovascular = no, then alive.
 Accuracy = 0.5360710321864595. Coverage = 1802

highest node
If cardiovascular = no and sex = female, then alive.
 Accuracy = 0.54532163742690

highest node
If age >= 64, then dead.
 Accuracy = 0.6. Coverage = 40

highest node
If age >= 64 and obesity = no, then dead.
 Accuracy = 0.6666666666666666. Coverage = 30

highest node
None
highest node
If age >= 61, then dead.
 Accuracy = 0.5423728813559322. Coverage = 177

highest node
If age >= 61 and copd = yes, then dead.
 Accuracy = 0.6888888888888889. Coverage = 45

highest node
If age >= 61 and copd = yes and renal_chronic = no, then dead.
 Accuracy = 0.72. Coverage = 25

highest node
If age >= 61 and copd = yes and renal_chronic = no and cardiovascular = no, then dead.
 Accuracy = 0.75. Coverage = 20

highest node
None
highest node
If renal_chronic = no, then alive.
 Accuracy = 0.5451895043731778. Coverage = 343

highest node
If renal_chronic = no and copd = yes, then alive.
 Accuracy = 0.5625. Coverage = 192

highest node
If renal_chronic = no and copd = yes and diabetes = no, then alive.
 Accuracy = 0.5873015873015873. Coverage = 63

highest node
If renal_chronic = no and co

highest node
If sex = male, then alive.
 Accuracy = 0.5341726618705036. Coverage = 556

highest node
If sex = male and age >= 60, then alive.
 Accuracy = 0.55. Coverage = 20

highest node
None
highest node
If tobacco = yes, then alive.
 Accuracy = 0.5384615384615384. Coverage = 117

highest node
If tobacco = yes and age >= 51, then alive.
 Accuracy = 0.6226415094339622. Coverage = 53

highest node
If tobacco = yes and age >= 51 and sex = male, then alive.
 Accuracy = 0.6818181818181818. Coverage = 44

highest node
If tobacco = yes and age >= 51 and sex = male and renal_chronic = yes, then alive.
 Accuracy = 0.75. Coverage = 28

highest node
If tobacco = yes and age >= 51 and sex = male and renal_chronic = yes and copd = no, then alive.
 Accuracy = 0.7619047619047619. Coverage = 21

highest node
None
highest node
If age >= 57, then dead.
 Accuracy = 0.5324675324675324. Coverage = 154

highest node
If age >= 57 and sex = female, then dead.
 Accuracy = 0.631578947368421. Coverage = 57

hi

highest node
If age >= 53, then dead.
 Accuracy = 0.5490196078431373. Coverage = 51

highest node
If age >= 53 and sex = male, then dead.
 Accuracy = 0.5862068965517241. Coverage = 29

highest node
None
highest node
If sex = male, then alive.
 Accuracy = 0.5664335664335665. Coverage = 143

highest node
If sex = male and age >= 51, then alive.
 Accuracy = 0.5833333333333334. Coverage = 24

highest node
None
highest node
If sex = male, then alive.
 Accuracy = 0.5630252100840336. Coverage = 119

highest node
If sex = male and age >= 43, then alive.
 Accuracy = 0.5694444444444444. Coverage = 72

highest node
None
highest node
If obesity = no, then alive.
 Accuracy = 0.5555555555555556. Coverage = 198

highest node
If obesity = no and asthma = no, then alive.
 Accuracy = 0.5647668393782384. Coverage = 193

highest node
If obesity = no and asthma = no and sex = male, then alive.
 Accuracy = 0.5714285714285714. Coverage = 42

highest node
If obesity = no and asthma = no and sex = male and cop

<h3>Summary of the rules discovered from the dataset</h3>

<b>Using Modified version</b>

<b>For accuracy_threshold = 0.9, coverage_thresh = 20:</b>

    If hypertension = no and sex = female and diabetes = no and tobacco = yes and obesity = no and asthma = yes and copd = no, then alive.
     Accuracy = 0.9886363636363636. Coverage = 88

    If hypertension = no and sex = female and diabetes = no and tobacco = yes and obesity = no and copd = no and cardiovascular = yes, then alive.
     Accuracy = 1.0. Coverage = 35

    If hypertension = no and sex = female and diabetes = no and tobacco = yes and obesity = no and copd = no and imm_supr = no and renal_chronic = no, then alive.
     Accuracy = 0.9762624082865775. Coverage = 2317

    If hypertension = no and sex = female and diabetes = no and asthma = yes and obesity = no and imm_supr = no and copd = no and cardiovascular = no, then alive.
     Accuracy = 0.9673784104389087. Coverage = 1686

    If hypertension = no and sex = female and diabetes = no and obesity = no and copd = no and imm_supr = no and renal_chronic = no and cardiovascular = no and age >= 2, then alive.
     Accuracy = 0.9620346479911537. Coverage = 54260

    If hypertension = no and asthma = yes and diabetes = no and copd = no and imm_supr = no and sex = female and tobacco = no and cardiovascular = yes, then alive.
     Accuracy = 0.9629629629629629. Coverage = 27

    If hypertension = no and asthma = yes and diabetes = no and copd = no and imm_supr = no and sex = female and tobacco = no and age >= 24, then alive.
     Accuracy = 0.9580838323353293. Coverage = 501

    If hypertension = no and asthma = yes and diabetes = no and obesity = no and cardiovascular = yes, then alive.
     Accuracy = 0.9565217391304348. Coverage = 23

    If hypertension = no and asthma = yes and diabetes = no and obesity = no and copd = no and imm_supr = no and renal_chronic = no and tobacco = no, then alive.
     Accuracy = 0.9509043927648578. Coverage = 1161

    If hypertension = no and diabetes = no and sex = female and tobacco = yes and obesity = yes and cardiovascular = no and asthma = yes and renal_chronic = no and age >= 37, then alive.
     Accuracy = 1.0. Coverage = 28

    If hypertension = no and diabetes = no and sex = female and obesity = yes and tobacco = yes and cardiovascular = no and copd = no and asthma = no and imm_supr = no, then alive.
     Accuracy = 0.9456264775413712. Coverage = 846

    If hypertension = no and diabetes = no and sex = female and obesity = yes and copd = no and cardiovascular = no and imm_supr = no and renal_chronic = no and age >= 1 and asthma = no, then alive.
     Accuracy = 0.9367217799550929. Coverage = 9798

    If hypertension = no and diabetes = no and obesity = no and tobacco = yes and copd = no and asthma = yes and imm_supr = no, then alive.
     Accuracy = 0.9306930693069307. Coverage = 101

    If hypertension = no and diabetes = no and obesity = no and tobacco = yes and copd = no and renal_chronic = no and imm_supr = no and cardiovascular = no, then alive.
     Accuracy = 0.9211281878061138. Coverage = 5921

    If hypertension = no and diabetes = no and obesity = no and copd = no and renal_chronic = no and imm_supr = no and sex = female and cardiovascular = no, then alive.
     Accuracy = 0.9603960396039604. Coverage = 303

    If hypertension = no and diabetes = no and obesity = no and copd = no and renal_chronic = no and imm_supr = no and cardiovascular = no, then alive.
     Accuracy = 0.9117814456459875. Coverage = 63252

    If asthma = yes and hypertension = no and obesity = yes and tobacco = yes and renal_chronic = no and sex = male and copd = no, then alive.
     Accuracy = 0.9642857142857143. Coverage = 56

    If asthma = yes and hypertension = no and obesity = yes and diabetes = no and sex = male and renal_chronic = no and copd = no and imm_supr = no, then alive.
     Accuracy = 0.9177215189873418. Coverage = 316

    If diabetes = no and hypertension = no and obesity = yes and tobacco = yes and copd = no and cardiovascular = no and sex = female and renal_chronic = no, then alive.
     Accuracy = 0.90625. Coverage = 32

    If asthma = yes and sex = female and tobacco = yes and imm_supr = yes, then alive.
     Accuracy = 1.0. Coverage = 31

    If diabetes = no and hypertension = no and obesity = yes and copd = no and cardiovascular = no and imm_supr = no and renal_chronic = no and asthma = yes, then alive.
     Accuracy = 0.92. Coverage = 25

    If asthma = yes and sex = female and tobacco = yes and hypertension = yes and renal_chronic = no and obesity = yes, then alive.
     Accuracy = 1.0. Coverage = 20

    If asthma = yes and sex = female and diabetes = no and obesity = no and imm_supr = no and cardiovascular = no and copd = no and renal_chronic = no, then alive.
     Accuracy = 0.9029535864978903. Coverage = 237

    If diabetes = no and sex = female and copd = no and hypertension = no and renal_chronic = no and asthma = no and cardiovascular = no and tobacco = yes and age >= 28, then alive.
     Accuracy = 0.9090909090909091. Coverage = 22

    If diabetes = no and sex = female and copd = no and hypertension = no and renal_chronic = no and asthma = no and cardiovascular = no and obesity = yes and age >= 23, then alive.
     Accuracy = 0.900990099009901. Coverage = 101

    If asthma = yes and copd = no and renal_chronic = no and cardiovascular = no and diabetes = yes and tobacco = yes and hypertension = yes, then alive.
     Accuracy = 0.9. Coverage = 20

    If asthma = yes and imm_supr = yes and hypertension = yes and renal_chronic = no and diabetes = yes, then alive.
     Accuracy = 0.95. Coverage = 20

    If asthma = yes and obesity = no and imm_supr = yes and sex = male and tobacco = no and cardiovascular = no, then alive.
     Accuracy = 0.9523809523809523. Coverage = 21

    If hypertension = no and age >= 93 and cardiovascular = no and asthma = no and tobacco = no and imm_supr = no and obesity = no, then alive.
     Accuracy = 0.9. Coverage = 20

    If asthma = yes and sex = female and obesity = no and diabetes = no and copd = yes and hypertension = no and tobacco = no, then alive.
     Accuracy = 0.9047619047619048. Coverage = 21

    If hypertension = no and diabetes = no and sex = female and copd = yes and imm_supr = yes and obesity = no, then alive.
     Accuracy = 0.9090909090909091. Coverage = 22


To find the rules that have class label 'dead', the accuracy_threshold has to be lower. Accroding to experiment, I find that rules with class label 'dead' are more likely to be appeared below accuracy threshold of 0.6.

If hypertension = no and sex = female and diabetes = no and tobacco = yes and obesity = no and asthma = yes and copd = no, then alive.
 Accuracy = 0.9886363636363636. Coverage = 88

If hypertension = no and sex = female and diabetes = no and tobacco = yes and obesity = no and copd = no and cardiovascular = yes, then alive.
 Accuracy = 1.0. Coverage = 35
 

I am not very surprised by the rules since I know that if there is no hypertension, it is more likely to be identified as 'alive' with high accuracy.

However, I am surprised to see rules with tobaco = yes and find that the accuracy of those rules are much much higher than accuracy of other rules. I am also suprised that people who have asthma can survive COVID.