# Classification Rules For COVID-19 dataset Using PRISM Algorithm

<h2>PRISM Algorithm Implementation</h2>

In [1]:
class Rule:
    def __init__(self, conditions=None, class_label=None, accuracy=0.0, coverage=0):
        
        # conditions refer to attribute and its value
        # eg. {'diabetes': 'no', 'hypertension': 'no'}
        self.conditions = conditions
        
        # eg. 'alive' or 'dead' in COVID dataset
        # 'Yes' or 'No' in weather dataset
        self.class_label = class_label
        
        # accuracy = number of rows that satisfy both condition and class /(divided by) number of rows that
        # satisfy only the condition
        self.accuracy = accuracy
        
        
        # coverage refers to number of rows that satisfy only the condition
        self.coverage = coverage
        
    def __str__(self):
        
        # printing out the rule in a nice format
        # eg. 
        # if diabetes = no, then alive.
        # Accuracy = 0.7. Coverage = 21917
        
        str_rep = "If "
        
        counter = 0
        for condition in self.conditions:
            if counter == (len(self.conditions) - 1):
                if type(self.conditions[condition]) == int or type(self.conditions[condition]) == float:
                    str_rep += (str(condition) + " >= " + str(self.conditions[condition]))
                else:
                    str_rep += (str(condition) + " = " + str(self.conditions[condition]))
            else:
                if type(self.conditions[condition]) == int or type(self.conditions[condition]) == float:
                    str_rep += (str(condition) + " >= " + str(self.conditions[condition]) + " and ")
                else:
                    str_rep += (str(condition) + " = " + str(self.conditions[condition]) + " and ")
            
            counter += 1
        
        str_rep += (", then " + self.class_label) 
        str_rep += (".\n Accuracy = " + str(self.accuracy))
        str_rep += (". Coverage = " + str(self.coverage))
        str_rep += "\n"
        
        return str_rep

In [2]:
def get_unique_class_label(rows):
    
    # from getting repetitive class labels, just get the unique class label
    # eg of output: ['alive', 'dead']

    # use set to hold the unique class label
    class_set = set()
    
    for row in rows:
        
        # get the last column since last column is where we have a class label
        class_label = row[-1]
        
        if class_label not in class_set:
            class_set.add(class_label)
            
    return list(class_set)
        

In [3]:
def copy_rows(rows):
    
    # copy the data to another nested list
    # there is another way to copy: using deepcopy that requires to import copy
    # the below way can also be used to copy data from one nested list to another nested list
    
    nested_lst = []
    for row in rows:
        inner_lst = []
        for each_data in row:
            inner_lst.append(each_data)
            
        nested_lst.append(inner_lst)
    return nested_lst

In [4]:
# get attributes that have not been used by the rule
def get_remaining_attributes(attributes, rule_node):
    
    # eg. if attribute called 'age' has been used, get remaining attributes other than 'age'
    
    # if the node is None, just return the attributes because none of them haven't been used yet.
    if rule_node is None:
        return attributes
     
    rule_dic = rule_node.conditions
    
    return [attribute for attribute in attributes if attribute not in rule_dic]

In [5]:
def get_unique_attribute_vals(rows, indx):
    
    # There are repetitive attribute values and we want to get the attribute values that are distinct.
    # eg of outcome:  ['overcast','sunny','rainy']
    # There are two lists: one for data and another for columns/attributes
    # Eg. each row = ['overcast',...], columns = ['Outlook',...]
    # with the help of indx, we can actually get the data associated with a particular column
    
    # a set to hold the unique attribute values
    attribute_val_set = set()
    
    lst = [row[indx] for row in rows]
    return list(set(lst))

In [6]:
def get_attribute_indx(attribute, attribute_lst):
    
    # we want to find the index of a particular attribute in the attribute list.
    # eg. attribute = 'Outlook', attribute_lst = ['Outlook',...]
    # so in the above example, indx will be 0
    
    
    for indx, attr in enumerate(attribute_lst):
        if attribute == attr:
            return indx
    

In [7]:
def get_condition_based_rows(rows, rule_node, attr_lst):
    
    # bring back rows that satisfy the condition(s)
    
    if rule_node is not None:
        rule_dic = rule_node.conditions
        
        # copy the data so that the original data will not be affected when we change the list
        covered_rows = copy_rows(rows)

        for key in rule_dic:
            indx = get_attribute_indx(key, attr_lst)
            
             # for numerical. eg. 32, 56...
            if isinstance(rule_dic[key], int) or isinstance(rule_dic[key], float):
                covered_rows = [row for row in covered_rows if row[indx] >= rule_dic[key]]
          
            else:
                # for classification groups. eg: True/False, Hot/Mild/Cool 
                covered_rows = [row for row in covered_rows if row[indx] == rule_dic[key]]

        return covered_rows

In [8]:
def calculate_accuracy(correct_rows, covered_rows):
    
    # accuracy = number of correct rows(satisfy both condition and class label) / number of covered_rows based on condition
    
    no_of_correct_class = len(correct_rows)
    no_of_covered_rows = len(covered_rows)
    
    # accuracy is between 0.0 and 1.0
    accuracy = no_of_correct_class / float(no_of_covered_rows)
    return accuracy

In [9]:
def get_rule_with_highest_accuracy(possible_rules):
    
    # possible rules contains rule nodes and we want to find the highest accuracy rule among all the rules
    # if there is a tie in accuracy, we choose the rule with larger coverage.
    # if there is still tie, we can choose either one. In the code below, if there is still tie, I just choose the one that comes first
    
    max_acc_node = None
    
    highest_acc = 0
    highest_cov = 0
    
    
    for possible_rule_node in possible_rules:
        if possible_rule_node is None:
            continue
        
        acc = possible_rule_node.accuracy
        cov = possible_rule_node.coverage
        
        # if current node accuracy is higher than the max accuracy so far, choose the current rule as best rule
        if acc > highest_acc:
            highest_acc = acc
            highest_cov = cov
            
            max_acc_node = possible_rule_node
            
        elif acc == highest_acc:
            
            # if there is accuracy tie, select using the coverage 
            # if current node coverage is higher than max coverage so far, choose the current rule as best rule
            
            if cov > highest_cov:
                highest_cov = cov
                
                max_acc_node = possible_rule_node
                
    return max_acc_node        

In [10]:
# remove rows which are covered by the rules
def get_remaining_rows(rows, covered_rows):
    
    # We choose the rows that are not covered by the rules. 
    
    return [row for row in rows if row not in covered_rows]



In [35]:
def learn_one_rule(rows, class_labels, attributes, accuracy_thresh, coverage_thresh):
    
    # mapping attribute to index
    attribute_indx_dic = {}
    counter = 0
    for attribute in attributes:
        attribute_indx_dic[attribute] = counter
        counter += 1
        
    
    # these are data that are covered by the rules and we copy the data because we don't want to have effect on original list
    rows_covered_by_rule = copy_rows(rows)
    
    # this keeps track of best rule so far. First, we initialize it to None because we haven't got any rule yet
    highest_node = None
    
    while True:
        print("highest_node: ")
        print(highest_node)
    
        # this keeps track of rules that have different class label.
        # eg. if we have two class labels-'alive' or 'dead', this list can contain rules for both labels and later choose the best rule
        best_classes_rule_lst = []

        for class_label in class_labels:
            
            # if we already have the highest node and its class label is not the same as the current class label,
            # we go on to next class label
            # let's say we have best rule so far: if diabetes = no, then alive.
            # Then, when we want to refine this rule, we don't need to consider class label 'dead'.
            
            if highest_node is not None and highest_node.class_label != class_label:
                continue

            # remaining attributes that haven't been used in the rule
            remaining_attributes = get_remaining_attributes(attributes, highest_node)

            # this stores the nodes generated from considering each attribute for each class label
            # eg. if class_label is 'alive', this will keep track of nodes produced from considering each attribute for the 'alive' class label
            possible_rules = []


            # go over all the remaining attributes
            for attr in remaining_attributes:

                indx = attribute_indx_dic[attr]

                # get unique attribute values such as 'Sunny', Rainy, ...
                remaining_attr_vals = get_unique_attribute_vals(rows_covered_by_rule, indx)

                # go over the remaining attribute values
                for attr_val in remaining_attr_vals:
                    
                    # note: both attribute and values of attribute results in condition
                    # eg. If 'Outlook' = 'Sunny' 

                    # create a rule node for each attribute values
                    potential_rule_node = Rule(conditions={attr: attr_val}, class_label=class_label)


                    # Simply, give me rows which satisfy the condition
                    # eg. give me rows that has Outlook = Sunny
                    only_condition_rows = get_condition_based_rows(rows_covered_by_rule, potential_rule_node, attributes)


                    # Simply, give me rows which satisfy both the condition and the class label
                    # Eg. give me rows that has 'Outlook' = 'Sunny' and class label = 'Yes'
                    # these are rows that satisfy both condition and class label
                    condition_class_label_rows = [row for row in only_condition_rows if row[-1] == class_label]


                 
                    # coverage refers to number of rows that satisfy the condition. In other words, it covers the rows that meet the conditions
                    cov = len(only_condition_rows)
                    
                    # calculate accuracy
                    calculated_acc = calculate_accuracy(condition_class_label_rows, only_condition_rows)
                    
                    # if the current coverage exceeds the coverage threshold, then we add the potential rule to the list
                    # let's say coverage threshold is 3. Even if the accuracy is 1.0(2/2), the coverage of that rule(ie. 2) is less than the coverage threshold(ie. 3)
                    # In the example above, we will not consider that rule because the rule covers less rows than the specified coverage threshold. 
                    if cov >= coverage_thresh:
                        potential_rule_node.accuracy = calculated_acc
                        potential_rule_node.coverage = cov
                    
                        possible_rules.append(potential_rule_node)
        
        
            # if there are no rules in possible_rules list for that class label, continue to the next class label 
            if len(possible_rules) == 0:
                continue
            
            
            # get the rule with highest accuracy(ie. best rule) among the same class label
            highest_accuracy_rule = get_rule_with_highest_accuracy(possible_rules)

            # add that rule to the best_classes_rule_lst so that we can compare this rule to the another rule for another class
            best_classes_rule_lst.append(highest_accuracy_rule)
            
        
            # if there is highest node, that means that it already has first condition and it is here for refining the rule
            # So just update the highest_node with more conditions, accuracy and coverage
            if highest_node is not None:

                for key in highest_accuracy_rule.conditions:
                    attri_value = highest_accuracy_rule.conditions[key]

                    highest_node.conditions[key] = attri_value

                highest_node.accuracy = highest_accuracy_rule.accuracy
                highest_node.coverage = highest_accuracy_rule.coverage
         
        print("Best classes rule lst")
        print(best_classes_rule_lst)
        
        # if there is no rule in the best_classes_rule_lst, that means that no best rule is found so just return None
        if len(best_classes_rule_lst) == 0:
            return None, None
        
        # if there is no best rule so far yet, find the best rule among the rules in best_classes_rule_lst
        # and assign the best rule to highest_node.
        if highest_node is None:
            # this node is the best among other nodes including nodes from other class labels
            highest_node = get_rule_with_highest_accuracy(best_classes_rule_lst)
        
        # get the rows that meet the conditions of the rule and assign to rows_covered_by_rule
        # So this list gets smaller and smaller as the rule gets refined.
        rows_covered_by_rule = get_condition_based_rows(rows_covered_by_rule, highest_node, attributes)

        # if the accuracy threshold has been reached or if the rule covers less rows than the coverage threshold,
        # just stop refining the rule
        if highest_node.accuracy >= accuracy_thresh or highest_node.coverage < coverage_thresh:
            break
            
    # By this step, we know number of rows that the best rule covers and we want to find the remaining rows.
    remaining_rows = get_remaining_rows(rows, rows_covered_by_rule)
        
    return remaining_rows, highest_node
    
    
   
        
        
    
    
    

In [36]:
def PRISM_algorithm(rows, col_lst, accuracy_thresh=1.0, coverage_thresh=3):
    
    # copy all the data to another list 
    # so that when we remove rows, it will not affect original lst
    # here, remaining_rows will first be equal to original rows
    remaining_rows = copy_rows(rows)
    
    # eg. ['Yes', 'No'] or ['alive', 'dead']
    class_labels = get_unique_class_label(rows)
    
    # list of rule nodes 
    rules_lst = []
    
    # exclude the class label
    attributes = col_lst[:-1]
    
    # remaining_rows is not empty yet so learn a rule
    while len(remaining_rows) != 0:
        
        
        remaining_rows, rule_node = learn_one_rule(remaining_rows, class_labels, attributes, accuracy_thresh, coverage_thresh)
        
          
        # this means that all the rows that remain have coverage that is below the threshold so we do not consider them
        if rule_node is None:
            break
            
        print(rules_lst)
        
        
        
        # add node to rules
        rules_lst.append(rule_node)
        
    return rules_lst

<h2>Testing PRISM Algorithm using small dataset: weather dataset</h2>

In [13]:
col_lst = ['Outlook', 'Temp', 'Humidity', 'Windy', 'Play']

data_rows = [
    ['Sunny', 'Hot', 'High', 'False', 'No'],
    ['Sunny', 'Hot', 'High', 'True', 'No'],
    ['Overcast', 'Hot', 'High', 'False', 'Yes'],
    ['Rainy', 'Mild', 'High', 'False', 'Yes'],
    ['Rainy', 'Cool', 'Normal', 'False', 'Yes'],
    ['Rainy', 'Cool', 'Normal', 'True', 'No'],
    ['Overcast', 'Cool', 'Normal', 'True', 'Yes'],
    ['Sunny', 'Mild', 'High', 'False', 'No'],
    ['Sunny', 'Cool', 'Normal', 'False', 'Yes'],
    ['Rainy', 'Mild', 'Normal', 'False', 'Yes'],
    ['Sunny', 'Mild', 'Normal', 'True', 'Yes'],
    ['Overcast', 'Mild', 'High', 'True', 'Yes'],
    ['Overcast', 'Hot', 'Normal', 'False', 'Yes'],
    ['Rainy', 'Mild', 'High', 'True', 'No']
]

In [14]:
# eg of rule
conditions_demo = {'Outlook': 'Overcast'}

rule_demo = Rule(conditions=conditions_demo, class_label='Yes',accuracy=1.0,coverage=5)
print("Eg of rule that is printed in nice format: ")
print(rule_demo)
    

Eg of rule that is printed in nice format: 
If Outlook = Overcast, then Yes.
 Accuracy = 1.0. Coverage = 5



In [15]:
print("getting attributes that are not used in the rule")

test_rule_node = Rule({'Humidity': 'normal'}, 'Yes', 1.0, 5)
print(get_remaining_attributes(col_lst[:-1], test_rule_node))
print("\n")

getting attributes that are not used in the rule
['Outlook', 'Temp', 'Windy']




In [16]:
print("Getting remaining rows after first rule: if outlook = overcast, then yes")
test_rule_node = Rule({'Outlook': 'Overcast'}, 'Yes', 1.0, 4)
test_covered_rows = get_condition_based_rows(data_rows, test_rule_node, col_lst[:-1])


rem_data_testing = get_remaining_rows(data_rows, test_covered_rows)
print(rem_data_testing)
print("\n")

print("Getting remaining rows after second rule: if humidity = normal and windy = false, then yes")
test_rule_node = Rule({'Humidity': 'Normal', 'Windy': 'False'}, 'Yes', 1.0, 3)
test_covered_rows = get_condition_based_rows(rem_data_testing, test_rule_node, col_lst[:-1])
rem_data_testing = get_remaining_rows(rem_data_testing, test_covered_rows)
print(rem_data_testing)
print("\n")

print("Getting remaining rows after third rule: if humidity = high and outlook = sunny, then no")
test_rule_node = Rule({'Humidity': 'High', 'Outlook': 'Sunny'}, 'No', 1.0, 3)
test_covered_rows = get_condition_based_rows(rem_data_testing, test_rule_node, col_lst[:-1])
rem_data_testing = get_remaining_rows(rem_data_testing, test_covered_rows)
print(rem_data_testing)
print("\n")


Getting remaining rows after first rule: if outlook = overcast, then yes
[['Sunny', 'Hot', 'High', 'False', 'No'], ['Sunny', 'Hot', 'High', 'True', 'No'], ['Rainy', 'Mild', 'High', 'False', 'Yes'], ['Rainy', 'Cool', 'Normal', 'False', 'Yes'], ['Rainy', 'Cool', 'Normal', 'True', 'No'], ['Sunny', 'Mild', 'High', 'False', 'No'], ['Sunny', 'Cool', 'Normal', 'False', 'Yes'], ['Rainy', 'Mild', 'Normal', 'False', 'Yes'], ['Sunny', 'Mild', 'Normal', 'True', 'Yes'], ['Rainy', 'Mild', 'High', 'True', 'No']]


Getting remaining rows after second rule: if humidity = normal and windy = false, then yes
[['Sunny', 'Hot', 'High', 'False', 'No'], ['Sunny', 'Hot', 'High', 'True', 'No'], ['Rainy', 'Mild', 'High', 'False', 'Yes'], ['Rainy', 'Cool', 'Normal', 'True', 'No'], ['Sunny', 'Mild', 'High', 'False', 'No'], ['Sunny', 'Mild', 'Normal', 'True', 'Yes'], ['Rainy', 'Mild', 'High', 'True', 'No']]


Getting remaining rows after third rule: if humidity = high and outlook = sunny, then no
[['Rainy', 'Mild',

In [37]:
print("----------Testing PRISM Algorithm on weather dataset----------")

test_weather_rules = PRISM_algorithm(rows=data_rows, col_lst=col_lst, accuracy_thresh=1.0, coverage_thresh=3)

for rule in test_weather_rules:
    print(rule)

----------Testing PRISM Algorithm on weather dataset----------
highest_node: 
None
Best classes rule lst
[<__main__.Rule object at 0x7fd2632b7910>, <__main__.Rule object at 0x7fd263430520>]
[]
highest_node: 
None
Best classes rule lst
[<__main__.Rule object at 0x7fd2632b76d0>, <__main__.Rule object at 0x7fd2632b7bb0>]
highest_node: 
If Humidity = Normal, then Yes.
 Accuracy = 0.8. Coverage = 5

Best classes rule lst
[<__main__.Rule object at 0x7fd2632b7c10>]
[<__main__.Rule object at 0x7fd2632b7910>]
highest_node: 
None
Best classes rule lst
[<__main__.Rule object at 0x7fd2632b7d60>, <__main__.Rule object at 0x7fd267050250>]
highest_node: 
If Humidity = High, then No.
 Accuracy = 0.8. Coverage = 5

Best classes rule lst
[<__main__.Rule object at 0x7fd267050a60>]
[<__main__.Rule object at 0x7fd2632b7910>, <__main__.Rule object at 0x7fd2632b76d0>]
highest_node: 
None
Best classes rule lst
[<__main__.Rule object at 0x7fd267050f10>, <__main__.Rule object at 0x7fd267050730>]
highest_node: 


<h2>Applying the PRISM Algorithm on COVID-19 dataset</h2>

In this dataset we have the following attributes:
1. sex: 1 -woman, 2-man
2. age: numeric
3. diabetes: yes/no
4. copd (chronic obstructive pulmonary disease): yes/no
5. asthma: yes/no
6. imm_supr (suppressed immune system): yes/no
7. hypertension: yes/no
8. cardiovascular: yes/no
9. renal_chronic: yes/no
10. tobacco: yes/no	
11. outcome: alive/dead

In [19]:
covid_data_file = "../../data_ml_2020/covid_categorical_good.csv"

In [20]:
import pandas as pd
data = pd.read_csv(covid_data_file)
data = data.dropna(how="any")
data.columns

Index(['sex', 'age', 'diabetes', 'copd', 'asthma', 'imm_supr', 'hypertension',
       'cardiovascular', 'obesity', 'renal_chronic', 'tobacco', 'outcome'],
      dtype='object')

In [21]:
covid_data_rows = data.to_numpy().tolist()
print("number of rows: ",len(covid_data_rows))
print("\n")
print(data_rows[:5])

number of rows:  219179


[['Sunny', 'Hot', 'High', 'False', 'No'], ['Sunny', 'Hot', 'High', 'True', 'No'], ['Overcast', 'Hot', 'High', 'False', 'Yes'], ['Rainy', 'Mild', 'High', 'False', 'Yes'], ['Rainy', 'Cool', 'Normal', 'False', 'Yes']]


In [22]:
covid_col_lst = data.columns.to_numpy().tolist()
print("number of columns: ", len(covid_col_lst))
print("\n")
print(covid_col_lst)

number of columns:  12


['sex', 'age', 'diabetes', 'copd', 'asthma', 'imm_supr', 'hypertension', 'cardiovascular', 'obesity', 'renal_chronic', 'tobacco', 'outcome']


In [23]:

rule_node_testing = Rule(conditions={'age': 30},class_label='alive',accuracy=1.0,coverage=3)

get_remaining_attributes(attributes=covid_col_lst[:-1], rule_node=rule_node_testing)


['sex',
 'diabetes',
 'copd',
 'asthma',
 'imm_supr',
 'hypertension',
 'cardiovascular',
 'obesity',
 'renal_chronic',
 'tobacco']

In [38]:
# testing on small dataset
print("---------- PRISM Algorithm on small COVID dataset----------")



covid_rules = PRISM_algorithm(rows=covid_data_rows[:1000], 
                              col_lst=covid_col_lst, accuracy_thresh=0.4, coverage_thresh=30)

for rule in covid_rules:
    print(rule)



---------- PRISM Algorithm on small COVID dataset----------
highest_node: 
None
Best classes rule lst
[<__main__.Rule object at 0x7fd2632b7580>, <__main__.Rule object at 0x7fd26efe3040>]
[]
highest_node: 
None
Best classes rule lst
[<__main__.Rule object at 0x7fd2632b74f0>, <__main__.Rule object at 0x7fd2634301c0>]
[<__main__.Rule object at 0x7fd2632b7580>]
highest_node: 
None
Best classes rule lst
[<__main__.Rule object at 0x7fd2634304c0>, <__main__.Rule object at 0x7fd263430580>]
[<__main__.Rule object at 0x7fd2632b7580>, <__main__.Rule object at 0x7fd2632b74f0>]
highest_node: 
None
Best classes rule lst
[<__main__.Rule object at 0x7fd263430df0>, <__main__.Rule object at 0x7fd2634305e0>]
[<__main__.Rule object at 0x7fd2632b7580>, <__main__.Rule object at 0x7fd2632b74f0>, <__main__.Rule object at 0x7fd263430580>]
highest_node: 
None
Best classes rule lst
[<__main__.Rule object at 0x7fd2634309a0>, <__main__.Rule object at 0x7fd2632b7a60>]
[<__main__.Rule object at 0x7fd2632b7580>, <__m

In [None]:

print("---------- PRISM Algorithm on COVID dataset----------")

#coverage_threshold_in_percent = 3
#coverage_thresh_val = int((coverage_threshold_in_percent/100) * len(covid_data_rows))
#print("Coverage Threshold in terms of number of rows: ",coverage_thresh_val)

covid_rules = PRISM_algorithm(rows=covid_data_rows, col_lst=covid_col_lst, 
                              accuracy_thresh=0.6, coverage_thresh=30)

if (len(covid_rules)) == 0:
    print("No rules found!")
else:
    for rule in covid_rules:
        print(rule)


---------- PRISM Algorithm on COVID dataset----------
highest_node: 
None
Best classes rule lst
[<__main__.Rule object at 0x7fd267050850>, <__main__.Rule object at 0x7fd26a5c0730>]


<h3>Summary of the rules discovered from the dataset</h3>

For accuracy_threshold = 0.8, coverage_thresh is 21917 (around 9.999%):

    If hypertension = no, then alive.
        Accuracy = 0.9118543984283984. Coverage = 175108

    If diabetes = no and renal_chronic = no and copd = no, then alive.
        Accuracy = 0.8030269987351158. Coverage = 22927


For accuracy_threshold = 0.8, coverage_thresh = 43836 (around 20%):
    
    If hypertension = no, then alive.
        Accuracy = 0.9118543984283984. Coverage = 175108
        
        
        
For accuracy_threshold = 0.8, coverage_thresh = 17534 (around 8%):
    
    If hypertension = no, then alive.
        Accuracy = 0.9118543984283984. Coverage = 175108

    If diabetes = no and renal_chronic = no and copd = no, then alive.
        Accuracy = 0.8030269987351158. Coverage = 22927
        
        
For accuracy_threshold = 0.6, coverage_thresh = 21917 (around 10%):

    If hypertension = no, then alive.
        Accuracy = 0.9118543984283984. Coverage = 175108

    If diabetes = no, then alive.
        Accuracy = 0.7902519387632098. Coverage = 24887

For accuracy_threshold = 0.4, coverage_thresh = 6575 (around 3%):
    
    If hypertension = no, then alive.
        Accuracy = 0.9118543984283984. Coverage = 175108

    If diabetes = no, then alive.
        Accuracy = 0.7902519387632098. Coverage = 24887

    If sex = female, then alive.
        Accuracy = 0.7126284388465363. Coverage = 9051

    If renal_chronic = no, then alive.
        Accuracy = 0.6717284502009826. Coverage = 8956

