In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
import copy
import random

In [2]:
def make_discrete(column):
    max = np.max(column)
    min = np.min(column)
    inv = (max - min)/5
    for i in range(len(column)):
        if(column[i] <= inv):
            column[i] = 1
        elif(inv < column[i] <= (2*inv)):
            column[i] = 2
        elif((2*inv) < column[i] <= (3*inv)):
            column[i] = 3
        elif((3*inv) < column[i] <= (4*inv)):
            column[i] = 4
        elif(column[i] > (4*inv)):
            column[i] = 5

In [3]:
def discretization(data):
    for column in data.T:
        if(type(column[0]) != int):
           continue
        elif(len(np.unique(column)) > 5):
            make_discrete(column)

In [4]:
def calc_total_entropy(train_data, label, class_list):
    total_row = train_data.shape[0] #the total size of the dataset
    total_entr = 0

    for c in class_list: #for each class in the label
        total_class_count = train_data[train_data[label] == c].shape[0] #number of the class
        total_class_entr = - (total_class_count/total_row)*np.log2(total_class_count/total_row) #entropy of the class
        total_entr += total_class_entr #adding the class entropy to the total entropy of the dataset

    return total_entr

In [5]:
def calc_entropy(feature_value_data, label, class_list):
    class_count = feature_value_data.shape[0]
    entropy = 0

    for c in class_list:
        label_class_count = feature_value_data[feature_value_data[label] == c].shape[0] #row count of class c
        entropy_class = 0
        if label_class_count != 0:
            probability_class = label_class_count/class_count #probability of the class
            entropy_class = - probability_class * np.log2(probability_class)  #entropy
        entropy += entropy_class
    return entropy

In [6]:
def calc_info_gain(feature_name, train_data, label, class_list):
    feature_value_list = train_data[feature_name].unique() #unqiue values of the feature
    total_row = train_data.shape[0]
    feature_info = 0.0

    for feature_value in feature_value_list:
        feature_value_data = train_data[train_data[feature_name] == feature_value] #filtering rows with that feature_value
        feature_value_count = feature_value_data.shape[0]
        feature_value_entropy = calc_entropy(feature_value_data, label, class_list) #calculcating entropy for the feature value
        feature_value_probability = feature_value_count/total_row
        feature_info += feature_value_probability * feature_value_entropy #calculating information of the feature value

    return calc_total_entropy(train_data, label, class_list) - feature_info #calculating information gain by subtracting

In [7]:
def find_most_informative_feature(train_data, label, class_list):
    feature_list = train_data.columns.drop(label) #finding the feature names in the dataset
                                            #N.B. label is not a feature, so dropping it
    max_info_gain = -1
    max_info_feature = None

    for feature in feature_list:  #for each feature in the dataset
        feature_info_gain = calc_info_gain(feature, train_data, label, class_list)
        if max_info_gain < feature_info_gain: #selecting feature name with highest information gain
            max_info_gain = feature_info_gain
            max_info_feature = feature

    return max_info_feature

In [8]:
def generate_sub_tree(feature_name, train_data, label, class_list):
    feature_value_count_dict = train_data[feature_name].value_counts(sort=False) #dictionary of the count of unqiue feature value
    tree = {} #sub tree or node

    for feature_value, count in feature_value_count_dict.iteritems():
        feature_value_data = train_data[train_data[feature_name] == feature_value] #dataset with only feature_name = feature_value

        assigned_to_node = False #flag for tracking feature_value is pure class or not
        for c in class_list: #for each class
            class_count = feature_value_data[feature_value_data[label] == c].shape[0] #count of class c

            if class_count == count: #count of (feature_value = count) of class (pure class)
                tree[feature_value] = c #adding node to the tree
                train_data = train_data[train_data[feature_name] != feature_value] #removing rows with feature_value
                assigned_to_node = True
        if not assigned_to_node: #not pure class
            tree[feature_value] = "?" #as feature_value is not a pure class, it should be expanded further,
                                      #so the branch is marking with ?

    return tree, train_data

In [9]:
def make_tree(root, prev_feature_value, train_data, label, class_list):
    if train_data.shape[0] != 0: #if dataset becomes enpty after updating
        max_info_feature = find_most_informative_feature(train_data, label, class_list) #most informative feature
        tree, train_data = generate_sub_tree(max_info_feature, train_data, label, class_list) #getting tree node and updated dataset
        next_root = None

        if prev_feature_value != None: #add to intermediate node of the tree
            root[prev_feature_value] = dict()
            root[prev_feature_value][max_info_feature] = tree
            next_root = root[prev_feature_value][max_info_feature]
        else: #add to root of the tree
            root[max_info_feature] = tree
            next_root = root[max_info_feature]

        for node, branch in list(next_root.items()): #iterating the tree node
            if branch == "?": #if it is expandable
                feature_value_data = train_data[train_data[max_info_feature] == node] #using the updated dataset
                make_tree(next_root, node, feature_value_data, label, class_list) #recursive call with updated dataset

In [10]:
def id3(train_data_m, label):
    train_data = train_data_m.copy() #getting a copy of the dataset
    tree = {} #tree which will be updated
    class_list = train_data[label].unique() #getting unqiue classes of the label
    make_tree(tree, None, train_data, label, class_list) #start calling recursion
    return tree

In [11]:
def predict(tree, instance):
    if not isinstance(tree, dict): #if it is leaf node
        return tree #return the value
    else:
        root_node = next(iter(tree)) #getting first key/feature name of the dictionary
        feature_value = instance[root_node] #value of the feature
        if feature_value in tree[root_node]: #checking the feature value in current tree node
            return predict(tree[root_node][feature_value], instance) #goto next feature
        else:
            return None

In [12]:
def evaluate(tree, test_data_m):
    res=[]
    flag = 1
    for index, row in test_data_m.iterrows(): #for each row in the dataset
        result = predict(tree, test_data_m.iloc[index]) #predict the row
        if(result is not None):
            res.append(result)
        else:
            res.append("None")
    return res

In [13]:
def dict_generator(indict, pre=None):
    pre = pre[:] if pre else []
    if isinstance(indict, dict):
        for key, value in indict.items():
            if isinstance(value, dict):
                for d in dict_generator(value, pre + [key]):
                    yield d
            elif isinstance(value, list) or isinstance(value, tuple):
                for v in value:
                    for d in dict_generator(v, pre + [key]):
                        yield d
            else:
                yield pre + [key, value]
    else:
        yield pre + [indict]

In [14]:
def sublist(sub_list, test_list):
    if(set(sub_list[:-2]).issubset(set(test_list[:-2]))):
        return True
    return False

In [15]:
def get_twigs(l):
    for i in range(len(l)):
        for j in range(len(l)):
            if(i != j and sublist(l[i], l[j])):
                l[i].clear()
    return list(filter(None, l))

In [16]:
def majority(l):
    yes = l.count("Yes")
    no = l.count("No")
    if(yes > no):
        return "Yes"
    elif(no > yes):
        return "No"
    else:
        return random.choice(["Yes", "No"])

In [17]:
def isLeaf(d):
    for i in list(d.values()):
        if(isinstance(i, dict)):
            return False
    return True

In [18]:
def cut(tree, twig_name):
    if(isinstance(tree, dict)):
        for key, value in tree.items():
            if(twig_name in value and isLeaf(tree[key][twig_name])):
                choice = majority(list(tree[key][twig_name].values()))
                tree[key] = choice
            else:
                cut(value, twig_name)
        return

In [19]:
def prune_tree(tree, last_acc, df, df_test, test):
    current_acc = last_acc
    while(current_acc >= last_acc):
        paths = []
        for i in dict_generator(tree):
            paths.append(i)
        gains = []
        twig_paths = get_twigs(paths)
        twigs = [i[-3] for i in twig_paths]
        for i in twigs:
            gains.append(calc_info_gain(i, df, "Attrition", df["Attrition"].unique()))
        min_index = gains.index(min(gains))
        old_tree = copy.deepcopy(tree)
        cut(tree, twigs[min_index])

        result2 = evaluate(tree, df_test)
        nones2 = []
        truth2 = test[:, 1].copy()

        for i in range(len(result2)):
            if(result2[i] == "None"):
                nones2.append(i)
        nones2.reverse()
        for j in nones2:
            result2.pop(j)
            truth2 = np.delete(truth2, j)
        current_acc = accuracy_score(truth2, result2)
        if(current_acc < last_acc):
            return old_tree
        last_acc = current_acc

In [20]:
def calculate_scores(key, tree, df_test, test):
    dict_res2 = {key: []}
    result2 = evaluate(tree, df_test)
    nones2 = []
    truth2 = test[:, 1].copy()
    for i in range(len(result2)):
        if(result2[i] == "None"):
            nones2.append(i)
    nones2.reverse()
    for j in nones2:
        result2.pop(j)
        truth2 = np.delete(truth2, j)
    dict_res2[list(dict_res2)[0]].append(accuracy_score(truth2, result2))
    dict_res2[list(dict_res2)[0]].append(np.average(precision_score(truth2, result2, average=None)))
    dict_res2[list(dict_res2)[0]].append(np.average(recall_score(truth2, result2, average=None)))
    dict_res2[list(dict_res2)[0]].append(np.average(f1_score(truth2, result2, average=None)))
    return dict_res2

# Part 1

## Overview

We are required to implement a decision tree model to predict whether an employee has a potential attrition or not.
In the first part, we will apply 5-fold CV and find the best model.
In the second part, we will split our data as train-test-validation. We will form a model based of validation data and then prune it. After the pruning, we will compare pre-pruned and post-pruned trees according to the test data.

In [39]:
# read the csv file
df = pd.read_csv("WA_Fn-UseC_-HR-Employee-Attrition.csv", encoding='cp1254')
data = df.to_numpy()  # convert it to numpy array
column_names = list(df.columns)
column_names[0] = column_names[0][3:]

In [22]:
# shuffle the data
np.random.shuffle(data)

# discretization of the data
discretization(data)

# create dictionary for results
dict_res = {"fold0": [], "fold1": [], "fold2": [], "fold3": [], "fold4": []}

In [23]:
# k-fold and train-test split
X = np.delete(data,1,1)
kf = KFold(n_splits=5, random_state=None)
fold_num = 0
prev_acc = 0
for train_index , test_index in kf.split(X):
    data_train, data_test = data[train_index,:], data[test_index,:]
    df_temp = pd.DataFrame(data_train, columns = column_names)
    df_test = pd.DataFrame(data_test, columns = column_names)
    tree = id3(df_temp, 'Attrition')
    result = evaluate(tree, df_test) #evaluating the test dataset
    nones = []
    truth = data_test[:, 1].copy()

    for i in range(len(result)):
        if(result[i] == "None"):
             nones.append(i)
    nones.reverse()
    for j in nones:
        result.pop(j)
        truth = np.delete(truth, j)
    if accuracy_score(truth,result) > prev_acc:
        tree1 = tree
    prev_acc = accuracy_score(truth,result)
    dict_res[list(dict_res)[fold_num]].append(accuracy_score(truth, result))
    dict_res[list(dict_res)[fold_num]].append(np.average(precision_score(truth, result, average=None)))
    dict_res[list(dict_res)[fold_num]].append(np.average(recall_score(truth, result, average=None)))
    dict_res[list(dict_res)[fold_num]].append(np.average(f1_score(truth, result, average=None)))
    fold_num +=1

In [24]:
mux = pd.MultiIndex.from_product([['Accuracy', 'Precision', 'Recall', 'F1 Score']])

df_res = pd.DataFrame.from_dict(dict_res, orient='index', columns = mux)
df_res.head()

Unnamed: 0,Accuracy,Precision,Recall,F1 Score
fold0,0.794964,0.579348,0.608349,0.588148
fold1,0.789286,0.589713,0.578014,0.582733
fold2,0.771429,0.584241,0.603429,0.590905
fold3,0.748252,0.572892,0.568548,0.570463
fold4,0.80427,0.635685,0.618931,0.626013


In [25]:
for i in dict_generator(tree1): 
    print(i)

['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 4, 'Yes']
['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 2, 'EnvironmentSatisfaction', 4, 'Age', 4, 'No']
['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 2, 'EnvironmentSatisfaction', 4, 'Age', 5, 'Yes']
['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 2, 'EnvironmentSatisfaction', 1, 'No']
['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 2, 'EnvironmentSatisfaction', 2, 'Yes']
['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 3, 'JobInvolvement', 1, 'Age', 4, 'Yes']
['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 3, 'JobInvolvement', 1, 'Age', 5, 'No']
['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 3, 'JobInvolvement', 2, 'EnvironmentSatisfaction', 1, 'Yes']
['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 3, 'JobInvolv

Below, you can see the paths for the best model out of 5-fold cross validation. The paths are from root to leaves and they are from left to right:

    ['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 4, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 2, 'EnvironmentSatisfaction', 4, 'Age', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 2, 'EnvironmentSatisfaction', 4, 'Age', 5, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 2, 'EnvironmentSatisfaction', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 2, 'EnvironmentSatisfaction', 2, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 3, 'JobInvolvement', 1, 'Age', 4, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 3, 'JobInvolvement', 1, 'Age', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 3, 'JobInvolvement', 2, 'EnvironmentSatisfaction', 1, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 3, 'JobInvolvement', 2, 'EnvironmentSatisfaction', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 3, 'JobInvolvement', 2, 'EnvironmentSatisfaction', 3, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 3, 'JobInvolvement', 2, 'EnvironmentSatisfaction', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 3, 'JobInvolvement', 3, 'MonthlyRate', 1, 'Age', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 3, 'JobInvolvement', 3, 'MonthlyRate', 1, 'Age', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 3, 'JobInvolvement', 3, 'MonthlyRate', 1, 'Age', 3, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 3, 'JobInvolvement', 3, 'MonthlyRate', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 3, 'JobInvolvement', 3, 'MonthlyRate', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 3, 'JobInvolvement', 3, 'MonthlyRate', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 3, 'JobInvolvement', 3, 'MonthlyRate', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Representative', 'WorkLifeBalance', 3, 'JobInvolvement', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Divorced', 'HourlyRate', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Divorced', 'HourlyRate', 5, 'DistanceFromHome', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Divorced', 'HourlyRate', 5, 'DistanceFromHome', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Divorced', 'HourlyRate', 5, 'DistanceFromHome', 3, 'DailyRate', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Divorced', 'HourlyRate', 5, 'DistanceFromHome', 3, 'DailyRate', 3, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Divorced', 'HourlyRate', 5, 'DistanceFromHome', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Divorced', 'HourlyRate', 5, 'DistanceFromHome', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Divorced', 'HourlyRate', 3, 'DistanceFromHome', 4, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Divorced', 'HourlyRate', 3, 'DistanceFromHome', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Divorced', 'HourlyRate', 3, 'DistanceFromHome', 2, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Married', 'WorkLifeBalance', 1, 'DailyRate', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Married', 'WorkLifeBalance', 1, 'DailyRate', 2, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Married', 'WorkLifeBalance', 1, 'DailyRate', 4, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Married', 'WorkLifeBalance', 1, 'DailyRate', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Married', 'WorkLifeBalance', 2, 'DistanceFromHome', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Married', 'WorkLifeBalance', 2, 'DistanceFromHome', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Married', 'WorkLifeBalance', 2, 'DistanceFromHome', 3, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Married', 'WorkLifeBalance', 2, 'DistanceFromHome', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Married', 'WorkLifeBalance', 2, 'DistanceFromHome', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Married', 'WorkLifeBalance', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Married', 'WorkLifeBalance', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 1, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 2, 'DailyRate', 1, 'Age', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 2, 'DailyRate', 1, 'Age', 3, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 2, 'DailyRate', 2, 'Age', 4, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 2, 'DailyRate', 2, 'Age', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 2, 'DailyRate', 2, 'Age', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 2, 'DailyRate', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 2, 'DailyRate', 4, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 2, 'DailyRate', 5, 'BusinessTravel', 'Travel_Rarely', 'Yes']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 2, 'DailyRate', 5, 'BusinessTravel', 'Non-Travel', 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 2, 'DailyRate', 5, 'BusinessTravel', 'Travel_Frequently', 'Yes']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 3, 'NumCompaniesWorked', 1, 'Gender', 'Female', 'Age', 4, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 3, 'NumCompaniesWorked', 1, 'Gender', 'Female', 'Age', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 3, 'NumCompaniesWorked', 1, 'Gender', 'Female', 'Age', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 3, 'NumCompaniesWorked', 1, 'Gender', 'Male', 'Yes']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 3, 'NumCompaniesWorked', 2, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 3, 'NumCompaniesWorked', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 3, 'NumCompaniesWorked', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 5, 'WorkLifeBalance', 1, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 5, 'WorkLifeBalance', 2, 'Age', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 5, 'WorkLifeBalance', 2, 'Age', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 5, 'WorkLifeBalance', 2, 'Age', 3, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 5, 'WorkLifeBalance', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Laboratory Technician', 'MaritalStatus', 'Single', 'TrainingTimesLastYear', 5, 'WorkLifeBalance', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 1, 'EducationField', 'Technical Degree', 'DailyRate', 4, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 1, 'EducationField', 'Technical Degree', 'DailyRate', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 1, 'EducationField', 'Technical Degree', 'DailyRate', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 1, 'EducationField', 'Life Sciences', 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 1, 'EducationField', 'Other', 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 1, 'EducationField', 'Medical', 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 2, 'EducationField', 'Technical Degree', 'Yes']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 2, 'EducationField', 'Life Sciences', 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 2, 'EducationField', 'Other', 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 2, 'EducationField', 'Medical', 'NumCompaniesWorked', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 2, 'EducationField', 'Medical', 'NumCompaniesWorked', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 2, 'EducationField', 'Medical', 'NumCompaniesWorked', 2, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 3, 'NumCompaniesWorked', 1, 'DistanceFromHome', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 3, 'NumCompaniesWorked', 1, 'DistanceFromHome', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 3, 'NumCompaniesWorked', 1, 'DistanceFromHome', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 3, 'NumCompaniesWorked', 1, 'DistanceFromHome', 4, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 3, 'NumCompaniesWorked', 1, 'DistanceFromHome', 5, 'Age', 4, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 3, 'NumCompaniesWorked', 1, 'DistanceFromHome', 5, 'Age', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 3, 'NumCompaniesWorked', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 3, 'NumCompaniesWorked', 3, 'DailyRate', 2, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 3, 'NumCompaniesWorked', 3, 'DailyRate', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 3, 'NumCompaniesWorked', 3, 'DailyRate', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 3, 'NumCompaniesWorked', 3, 'DailyRate', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 3, 'NumCompaniesWorked', 4, 'Age', 4, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 3, 'NumCompaniesWorked', 4, 'Age', 5, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 3, 'NumCompaniesWorked', 4, 'Age', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 3, 'NumCompaniesWorked', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 4, 'HourlyRate', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 4, 'HourlyRate', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 4, 'HourlyRate', 3, 'Education', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 4, 'HourlyRate', 3, 'Education', 3, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Research Scientist', 'EmployeeNumber', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Director', 'TrainingTimesLastYear', 1, 'DailyRate', 5, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Research Director', 'TrainingTimesLastYear', 1, 'DailyRate', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Director', 'TrainingTimesLastYear', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Director', 'TrainingTimesLastYear', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Director', 'TrainingTimesLastYear', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Research Director', 'TrainingTimesLastYear', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Manufacturing Director', 'Education', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Manufacturing Director', 'Education', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Manufacturing Director', 'Education', 3, 'NumCompaniesWorked', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Manufacturing Director', 'Education', 3, 'NumCompaniesWorked', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Manufacturing Director', 'Education', 3, 'NumCompaniesWorked', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Manufacturing Director', 'Education', 3, 'NumCompaniesWorked', 4, 'BusinessTravel', 'Travel_Rarely', 'No']
    ['OverTime', 'No', 'JobRole', 'Manufacturing Director', 'Education', 3, 'NumCompaniesWorked', 4, 'BusinessTravel', 'Travel_Frequently', 'Yes']
    ['OverTime', 'No', 'JobRole', 'Manufacturing Director', 'Education', 3, 'NumCompaniesWorked', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Manufacturing Director', 'Education', 4, 'TrainingTimesLastYear', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Manufacturing Director', 'Education', 4, 'TrainingTimesLastYear', 2, 'TotalWorkingYears', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Manufacturing Director', 'Education', 4, 'TrainingTimesLastYear', 2, 'TotalWorkingYears', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Manufacturing Director', 'Education', 4, 'TrainingTimesLastYear', 2, 'TotalWorkingYears', 3, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Manufacturing Director', 'Education', 4, 'TrainingTimesLastYear', 2, 'TotalWorkingYears', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Manufacturing Director', 'Education', 4, 'TrainingTimesLastYear', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Manufacturing Director', 'Education', 4, 'TrainingTimesLastYear', 4, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Manufacturing Director', 'Education', 4, 'TrainingTimesLastYear', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Manufacturing Director', 'Education', 5, 'DistanceFromHome', 1, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Manufacturing Director', 'Education', 5, 'DistanceFromHome', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Manufacturing Director', 'Education', 5, 'DistanceFromHome', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 1, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 2, 'JobInvolvement', 1, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 2, 'JobInvolvement', 2, 'JobSatisfaction', 1, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 2, 'JobInvolvement', 2, 'JobSatisfaction', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 2, 'JobInvolvement', 2, 'JobSatisfaction', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 2, 'JobInvolvement', 2, 'JobSatisfaction', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 2, 'JobInvolvement', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 2, 'JobInvolvement', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 3, 'NumCompaniesWorked', 1, 'JobSatisfaction', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 3, 'NumCompaniesWorked', 1, 'JobSatisfaction', 2, 'BusinessTravel', 'Travel_Frequently', 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 3, 'NumCompaniesWorked', 1, 'JobSatisfaction', 2, 'BusinessTravel', 'Non-Travel', 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 3, 'NumCompaniesWorked', 1, 'JobSatisfaction', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 3, 'NumCompaniesWorked', 1, 'JobSatisfaction', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 3, 'NumCompaniesWorked', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 3, 'NumCompaniesWorked', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 3, 'NumCompaniesWorked', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 3, 'NumCompaniesWorked', 5, 'DailyRate', 1, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 3, 'NumCompaniesWorked', 5, 'DailyRate', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 3, 'NumCompaniesWorked', 5, 'DailyRate', 3, 'EducationField', 'Marketing', 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 3, 'NumCompaniesWorked', 5, 'DailyRate', 3, 'EducationField', 'Life Sciences', 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 3, 'NumCompaniesWorked', 5, 'DailyRate', 3, 'EducationField', 'Medical', 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 3, 'NumCompaniesWorked', 5, 'DailyRate', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 3, 'NumCompaniesWorked', 5, 'DailyRate', 5, 'Age', 4, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 3, 'NumCompaniesWorked', 5, 'DailyRate', 5, 'Age', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 0, 'WorkLifeBalance', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 1, 'YearsInCurrentRole', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 1, 'YearsInCurrentRole', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 1, 'YearsInCurrentRole', 3, 'JobSatisfaction', 1, 'EducationField', 'Medical', 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 1, 'YearsInCurrentRole', 3, 'JobSatisfaction', 1, 'EducationField', 'Life Sciences', 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 1, 'YearsInCurrentRole', 3, 'JobSatisfaction', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 1, 'YearsInCurrentRole', 3, 'JobSatisfaction', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 1, 'YearsInCurrentRole', 3, 'JobSatisfaction', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 1, 'YearsInCurrentRole', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 1, 'YearsInCurrentRole', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 2, 'TrainingTimesLastYear', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 2, 'TrainingTimesLastYear', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 2, 'TrainingTimesLastYear', 3, 'EnvironmentSatisfaction', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 2, 'TrainingTimesLastYear', 3, 'EnvironmentSatisfaction', 2, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 2, 'TrainingTimesLastYear', 3, 'EnvironmentSatisfaction', 3, 'DistanceFromHome', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 2, 'TrainingTimesLastYear', 3, 'EnvironmentSatisfaction', 3, 'DistanceFromHome', 2, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 2, 'TrainingTimesLastYear', 3, 'EnvironmentSatisfaction', 3, 'DistanceFromHome', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 2, 'TrainingTimesLastYear', 3, 'EnvironmentSatisfaction', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 2, 'TrainingTimesLastYear', 4, 'Age', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 2, 'TrainingTimesLastYear', 4, 'Age', 5, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 2, 'TrainingTimesLastYear', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 3, 'EmployeeNumber', 1, 'DailyRate', 1, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 3, 'EmployeeNumber', 1, 'DailyRate', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 3, 'EmployeeNumber', 1, 'DailyRate', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 3, 'EmployeeNumber', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 3, 'EmployeeNumber', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 3, 'EmployeeNumber', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Sales Executive', 'StockOptionLevel', 3, 'EmployeeNumber', 5, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Healthcare Representative', 'MonthlyRate', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Healthcare Representative', 'MonthlyRate', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Healthcare Representative', 'MonthlyRate', 3, 'TrainingTimesLastYear', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Healthcare Representative', 'MonthlyRate', 3, 'TrainingTimesLastYear', 2, 'RelationshipSatisfaction', 1, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Healthcare Representative', 'MonthlyRate', 3, 'TrainingTimesLastYear', 2, 'RelationshipSatisfaction', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Healthcare Representative', 'MonthlyRate', 3, 'TrainingTimesLastYear', 2, 'RelationshipSatisfaction', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Healthcare Representative', 'MonthlyRate', 3, 'TrainingTimesLastYear', 2, 'RelationshipSatisfaction', 4, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Healthcare Representative', 'MonthlyRate', 3, 'TrainingTimesLastYear', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Healthcare Representative', 'MonthlyRate', 3, 'TrainingTimesLastYear', 4, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Healthcare Representative', 'MonthlyRate', 3, 'TrainingTimesLastYear', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Healthcare Representative', 'MonthlyRate', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Healthcare Representative', 'MonthlyRate', 5, 'DistanceFromHome', 1, 'No']
    ['OverTime', 'No', 'JobRole', 'Healthcare Representative', 'MonthlyRate', 5, 'DistanceFromHome', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Healthcare Representative', 'MonthlyRate', 5, 'DistanceFromHome', 4, 'Age', 4, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Healthcare Representative', 'MonthlyRate', 5, 'DistanceFromHome', 4, 'Age', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Healthcare Representative', 'MonthlyRate', 5, 'DistanceFromHome', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Manager', 'JobInvolvement', 1, 'DailyRate', 1, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Manager', 'JobInvolvement', 1, 'DailyRate', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Manager', 'JobInvolvement', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Manager', 'JobInvolvement', 3, 'No']
    ['OverTime', 'No', 'JobRole', 'Manager', 'JobInvolvement', 4, 'No']
    ['OverTime', 'No', 'JobRole', 'Human Resources', 'DistanceFromHome', 1, 'EducationField', 'Technical Degree', 'Yes']
    ['OverTime', 'No', 'JobRole', 'Human Resources', 'DistanceFromHome', 1, 'EducationField', 'Life Sciences', 'No']
    ['OverTime', 'No', 'JobRole', 'Human Resources', 'DistanceFromHome', 1, 'EducationField', 'Human Resources', 'No']
    ['OverTime', 'No', 'JobRole', 'Human Resources', 'DistanceFromHome', 1, 'EducationField', 'Medical', 'Yes']
    ['OverTime', 'No', 'JobRole', 'Human Resources', 'DistanceFromHome', 2, 'No']
    ['OverTime', 'No', 'JobRole', 'Human Resources', 'DistanceFromHome', 4, 'Age', 4, 'Yes']
    ['OverTime', 'No', 'JobRole', 'Human Resources', 'DistanceFromHome', 4, 'Age', 5, 'No']
    ['OverTime', 'No', 'JobRole', 'Human Resources', 'DistanceFromHome', 5, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 1, 'JobSatisfaction', 1, 'EducationField', 'Life Sciences', 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 1, 'JobSatisfaction', 1, 'EducationField', 'Medical', 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 1, 'JobSatisfaction', 2, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 1, 'JobSatisfaction', 3, 'Age', 4, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 1, 'JobSatisfaction', 3, 'Age', 5, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 1, 'JobSatisfaction', 4, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 2, 'TrainingTimesLastYear', 1, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 2, 'TrainingTimesLastYear', 2, 'EducationField', 'Marketing', 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 2, 'TrainingTimesLastYear', 2, 'EducationField', 'Life Sciences', 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 2, 'TrainingTimesLastYear', 2, 'EducationField', 'Other', 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 2, 'TrainingTimesLastYear', 2, 'EducationField', 'Medical', 'BusinessTravel', 'Travel_Rarely', 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 2, 'TrainingTimesLastYear', 2, 'EducationField', 'Medical', 'BusinessTravel', 'Non-Travel', 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 2, 'TrainingTimesLastYear', 2, 'EducationField', 'Medical', 'BusinessTravel', 'Travel_Frequently', 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 2, 'TrainingTimesLastYear', 3, 'Age', 4, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 2, 'TrainingTimesLastYear', 3, 'Age', 5, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 2, 'TrainingTimesLastYear', 4, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 2, 'TrainingTimesLastYear', 5, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 3, 'NumCompaniesWorked', 1, 'JobInvolvement', 1, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 3, 'NumCompaniesWorked', 1, 'JobInvolvement', 2, 'BusinessTravel', 'Travel_Rarely', 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 3, 'NumCompaniesWorked', 1, 'JobInvolvement', 2, 'BusinessTravel', 'Travel_Frequently', 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 3, 'NumCompaniesWorked', 1, 'JobInvolvement', 3, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 3, 'NumCompaniesWorked', 1, 'JobInvolvement', 4, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 3, 'NumCompaniesWorked', 2, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 3, 'NumCompaniesWorked', 3, 'Age', 4, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 3, 'NumCompaniesWorked', 3, 'Age', 5, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 3, 'NumCompaniesWorked', 4, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 3, 'NumCompaniesWorked', 5, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 4, 'WorkLifeBalance', 1, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 4, 'WorkLifeBalance', 2, 'MonthlyRate', 2, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 4, 'WorkLifeBalance', 2, 'MonthlyRate', 3, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 4, 'WorkLifeBalance', 2, 'MonthlyRate', 4, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 4, 'WorkLifeBalance', 2, 'MonthlyRate', 5, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 4, 'WorkLifeBalance', 3, 'EmployeeNumber', 1, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 4, 'WorkLifeBalance', 3, 'EmployeeNumber', 2, 'Age', 4, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 4, 'WorkLifeBalance', 3, 'EmployeeNumber', 2, 'Age', 5, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 4, 'WorkLifeBalance', 3, 'EmployeeNumber', 3, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 4, 'WorkLifeBalance', 3, 'EmployeeNumber', 5, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 4, 'WorkLifeBalance', 4, 'RelationshipSatisfaction', 1, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 4, 'WorkLifeBalance', 4, 'RelationshipSatisfaction', 2, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 4, 'WorkLifeBalance', 4, 'RelationshipSatisfaction', 3, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 4, 'WorkLifeBalance', 4, 'RelationshipSatisfaction', 4, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 5, 'Age', 4, 'EnvironmentSatisfaction', 1, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 5, 'Age', 4, 'EnvironmentSatisfaction', 2, 'BusinessTravel', 'Travel_Rarely', 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 5, 'Age', 4, 'EnvironmentSatisfaction', 2, 'BusinessTravel', 'Travel_Frequently', 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 5, 'Age', 4, 'EnvironmentSatisfaction', 3, 'JobRole', 'Laboratory Technician', 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 5, 'Age', 4, 'EnvironmentSatisfaction', 3, 'JobRole', 'Research Scientist', 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 5, 'Age', 4, 'EnvironmentSatisfaction', 4, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 5, 'Age', 5, 'MonthlyRate', 1, 'NumCompaniesWorked', 4, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 5, 'Age', 5, 'MonthlyRate', 1, 'NumCompaniesWorked', 1, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 5, 'Age', 5, 'MonthlyRate', 1, 'NumCompaniesWorked', 2, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 5, 'Age', 5, 'MonthlyRate', 2, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 5, 'Age', 5, 'MonthlyRate', 3, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 5, 'Age', 5, 'MonthlyRate', 4, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 5, 'Age', 5, 'MonthlyRate', 5, 'Department', 'Research & Development', 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 5, 'Age', 5, 'MonthlyRate', 5, 'Department', 'Sales', 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 5, 'Age', 3, 'EmployeeNumber', 1, 'No']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 5, 'Age', 3, 'EmployeeNumber', 2, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 5, 'Age', 3, 'EmployeeNumber', 3, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 1, 'DailyRate', 5, 'Age', 3, 'EmployeeNumber', 5, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Healthcare Representative', 'DistanceFromHome', 1, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Healthcare Representative', 'DistanceFromHome', 2, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Healthcare Representative', 'DistanceFromHome', 3, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Healthcare Representative', 'DistanceFromHome', 4, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Healthcare Representative', 'DistanceFromHome', 5, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Research Scientist', 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Human Resources', 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Laboratory Technician', 'JobInvolvement', 1, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Laboratory Technician', 'JobInvolvement', 2, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Laboratory Technician', 'JobInvolvement', 3, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Laboratory Technician', 'JobInvolvement', 4, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 1, 'JobSatisfaction', 1, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 1, 'JobSatisfaction', 2, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 1, 'JobSatisfaction', 3, 'Age', 4, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 1, 'JobSatisfaction', 3, 'Age', 5, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 1, 'JobSatisfaction', 3, 'Age', 3, 'DailyRate', 1, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 1, 'JobSatisfaction', 3, 'Age', 3, 'DailyRate', 5, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 1, 'JobSatisfaction', 4, 'WorkLifeBalance', 1, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 1, 'JobSatisfaction', 4, 'WorkLifeBalance', 2, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 1, 'JobSatisfaction', 4, 'WorkLifeBalance', 3, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 2, 'JobInvolvement', 1, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 2, 'JobInvolvement', 2, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 2, 'JobInvolvement', 3, 'DailyRate', 1, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 2, 'JobInvolvement', 3, 'DailyRate', 3, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 2, 'JobInvolvement', 3, 'DailyRate', 4, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 2, 'JobInvolvement', 3, 'DailyRate', 5, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 2, 'JobInvolvement', 4, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 3, 'RelationshipSatisfaction', 1, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 3, 'RelationshipSatisfaction', 2, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 3, 'RelationshipSatisfaction', 3, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 3, 'RelationshipSatisfaction', 4, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 4, 'RelationshipSatisfaction', 1, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 4, 'RelationshipSatisfaction', 2, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 4, 'RelationshipSatisfaction', 3, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 4, 'RelationshipSatisfaction', 4, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Executive', 'DistanceFromHome', 5, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Sales Representative', 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Manufacturing Director', 'RelationshipSatisfaction', 1, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Manufacturing Director', 'RelationshipSatisfaction', 2, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Manufacturing Director', 'RelationshipSatisfaction', 3, 'No']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Manufacturing Director', 'RelationshipSatisfaction', 4, 'Age', 4, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 2, 'JobRole', 'Manufacturing Director', 'RelationshipSatisfaction', 4, 'Age', 5, 'No']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Healthcare Representative', 'Age', 4, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Healthcare Representative', 'Age', 5, 'No']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Human Resources', 'BusinessTravel', 'Travel_Rarely', 'No']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Human Resources', 'BusinessTravel', 'Travel_Frequently', 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Research Director', 'No']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Laboratory Technician', 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Manager', 'DailyRate', 4, 'No']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Manager', 'DailyRate', 5, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Manager', 'DailyRate', 3, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Sales Executive', 'JobSatisfaction', 1, 'DailyRate', 1, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Sales Executive', 'JobSatisfaction', 1, 'DailyRate', 2, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Sales Executive', 'JobSatisfaction', 1, 'DailyRate', 3, 'No']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Sales Executive', 'JobSatisfaction', 1, 'DailyRate', 4, 'No']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Sales Executive', 'JobSatisfaction', 1, 'DailyRate', 5, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Sales Executive', 'JobSatisfaction', 2, 'BusinessTravel', 'Travel_Rarely', 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Sales Executive', 'JobSatisfaction', 2, 'BusinessTravel', 'Non-Travel', 'No']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Sales Executive', 'JobSatisfaction', 3, 'No']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Sales Executive', 'JobSatisfaction', 4, 'No']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Manufacturing Director', 'TrainingTimesLastYear', 1, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Manufacturing Director', 'TrainingTimesLastYear', 2, 'No']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Manufacturing Director', 'TrainingTimesLastYear', 4, 'No']
    ['OverTime', 'Yes', 'JobLevel', 3, 'JobRole', 'Manufacturing Director', 'TrainingTimesLastYear', 5, 'No']
    ['OverTime', 'Yes', 'JobLevel', 4, 'JobRole', 'Manager', 'No']
    ['OverTime', 'Yes', 'JobLevel', 4, 'JobRole', 'Healthcare Representative', 'No']
    ['OverTime', 'Yes', 'JobLevel', 4, 'JobRole', 'Sales Executive', 'EducationField', 'Marketing', 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 4, 'JobRole', 'Sales Executive', 'EducationField', 'Life Sciences', 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 4, 'JobRole', 'Sales Executive', 'EducationField', 'Medical', 'No']
    ['OverTime', 'Yes', 'JobLevel', 4, 'JobRole', 'Research Director', 'No']
    ['OverTime', 'Yes', 'JobLevel', 4, 'JobRole', 'Manufacturing Director', 'No']
    ['OverTime', 'Yes', 'JobLevel', 5, 'TrainingTimesLastYear', 1, 'No']
    ['OverTime', 'Yes', 'JobLevel', 5, 'TrainingTimesLastYear', 2, 'JobSatisfaction', 1, 'DailyRate', 2, 'No']
    ['OverTime', 'Yes', 'JobLevel', 5, 'TrainingTimesLastYear', 2, 'JobSatisfaction', 1, 'DailyRate', 3, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 5, 'TrainingTimesLastYear', 2, 'JobSatisfaction', 2, 'Yes']
    ['OverTime', 'Yes', 'JobLevel', 5, 'TrainingTimesLastYear', 2, 'JobSatisfaction', 3, 'No']
    ['OverTime', 'Yes', 'JobLevel', 5, 'TrainingTimesLastYear', 2, 'JobSatisfaction', 4, 'No']
    ['OverTime', 'Yes', 'JobLevel', 5, 'TrainingTimesLastYear', 3, 'No']
    ['OverTime', 'Yes', 'JobLevel', 5, 'TrainingTimesLastYear', 4, 'No']
    ['OverTime', 'Yes', 'JobLevel', 5, 'TrainingTimesLastYear', 5, 'No']

In [36]:
for i in dict_generator(tree1): 
    if(i[-1] == "Yes"):
        rule = ""
        for j in range(len(i)-1):
            if(j % 2 == 0):
                rule += str(i[j]) + " = "
            else:
                rule += str(i[j]) + " & "
        rule = rule[:-2] + "|"
        print(rule)

OverTime = No & JobRole = Sales Representative & WorkLifeBalance = 4 |
OverTime = No & JobRole = Sales Representative & WorkLifeBalance = 2 & EnvironmentSatisfaction = 4 & Age = 5 |
OverTime = No & JobRole = Sales Representative & WorkLifeBalance = 2 & EnvironmentSatisfaction = 2 |
OverTime = No & JobRole = Sales Representative & WorkLifeBalance = 3 & JobInvolvement = 1 & Age = 4 |
OverTime = No & JobRole = Sales Representative & WorkLifeBalance = 3 & JobInvolvement = 2 & EnvironmentSatisfaction = 1 |
OverTime = No & JobRole = Sales Representative & WorkLifeBalance = 3 & JobInvolvement = 2 & EnvironmentSatisfaction = 3 |
OverTime = No & JobRole = Sales Representative & WorkLifeBalance = 3 & JobInvolvement = 3 & MonthlyRate = 1 & Age = 3 |
OverTime = No & JobRole = Laboratory Technician & MaritalStatus = Divorced & HourlyRate = 5 & DistanceFromHome = 3 & DailyRate = 3 |
OverTime = No & JobRole = Laboratory Technician & MaritalStatus = Divorced & HourlyRate = 3 & DistanceFromHome = 4 |
O

Below is the rules of this tree:

    OverTime = No & JobRole = Sales Representative & WorkLifeBalance = 4 |
    OverTime = No & JobRole = Sales Representative & WorkLifeBalance = 2 & EnvironmentSatisfaction = 4 & Age = 5 |
    OverTime = No & JobRole = Sales Representative & WorkLifeBalance = 2 & EnvironmentSatisfaction = 2 |
    OverTime = No & JobRole = Sales Representative & WorkLifeBalance = 3 & JobInvolvement = 1 & Age = 4 |
    OverTime = No & JobRole = Sales Representative & WorkLifeBalance = 3 & JobInvolvement = 2 & EnvironmentSatisfaction = 1 |
    OverTime = No & JobRole = Sales Representative & WorkLifeBalance = 3 & JobInvolvement = 2 & EnvironmentSatisfaction = 3 |
    OverTime = No & JobRole = Sales Representative & WorkLifeBalance = 3 & JobInvolvement = 3 & MonthlyRate = 1 & Age = 3 |
    OverTime = No & JobRole = Laboratory Technician & MaritalStatus = Divorced & HourlyRate = 5 & DistanceFromHome = 3 & DailyRate = 3 |
    OverTime = No & JobRole = Laboratory Technician & MaritalStatus = Divorced & HourlyRate = 3 & DistanceFromHome = 4 |
    OverTime = No & JobRole = Laboratory Technician & MaritalStatus = Divorced & HourlyRate = 3 & DistanceFromHome = 2 |
    OverTime = No & JobRole = Laboratory Technician & MaritalStatus = Married & WorkLifeBalance = 1 & DailyRate = 2 |
    OverTime = No & JobRole = Laboratory Technician & MaritalStatus = Married & WorkLifeBalance = 1 & DailyRate = 4 |
    OverTime = No & JobRole = Laboratory Technician & MaritalStatus = Married & WorkLifeBalance = 2 & DistanceFromHome = 3 |
    OverTime = No & JobRole = Laboratory Technician & MaritalStatus = Single & TrainingTimesLastYear = 1 |
    OverTime = No & JobRole = Laboratory Technician & MaritalStatus = Single & TrainingTimesLastYear = 2 & DailyRate = 1 & Age = 3 |
    OverTime = No & JobRole = Laboratory Technician & MaritalStatus = Single & TrainingTimesLastYear = 2 & DailyRate = 2 & Age = 4 |
    OverTime = No & JobRole = Laboratory Technician & MaritalStatus = Single & TrainingTimesLastYear = 2 & DailyRate = 4 |
    OverTime = No & JobRole = Laboratory Technician & MaritalStatus = Single & TrainingTimesLastYear = 2 & DailyRate = 5 & BusinessTravel = Travel_Rarely |
    OverTime = No & JobRole = Laboratory Technician & MaritalStatus = Single & TrainingTimesLastYear = 2 & DailyRate = 5 & BusinessTravel = Travel_Frequently |
    OverTime = No & JobRole = Laboratory Technician & MaritalStatus = Single & TrainingTimesLastYear = 3 & NumCompaniesWorked = 1 & Gender = Female & Age = 4 |
    OverTime = No & JobRole = Laboratory Technician & MaritalStatus = Single & TrainingTimesLastYear = 3 & NumCompaniesWorked = 1 & Gender = Male |
    OverTime = No & JobRole = Laboratory Technician & MaritalStatus = Single & TrainingTimesLastYear = 3 & NumCompaniesWorked = 2 |
    OverTime = No & JobRole = Laboratory Technician & MaritalStatus = Single & TrainingTimesLastYear = 5 & WorkLifeBalance = 1 |
    OverTime = No & JobRole = Laboratory Technician & MaritalStatus = Single & TrainingTimesLastYear = 5 & WorkLifeBalance = 2 & Age = 3 |
    OverTime = No & JobRole = Research Scientist & EmployeeNumber = 1 & EducationField = Technical Degree & DailyRate = 4 |
    OverTime = No & JobRole = Research Scientist & EmployeeNumber = 2 & EducationField = Technical Degree |
    OverTime = No & JobRole = Research Scientist & EmployeeNumber = 2 & EducationField = Medical & NumCompaniesWorked = 2 |
    OverTime = No & JobRole = Research Scientist & EmployeeNumber = 3 & NumCompaniesWorked = 1 & DistanceFromHome = 4 |
    OverTime = No & JobRole = Research Scientist & EmployeeNumber = 3 & NumCompaniesWorked = 1 & DistanceFromHome = 5 & Age = 4 |
    OverTime = No & JobRole = Research Scientist & EmployeeNumber = 3 & NumCompaniesWorked = 3 & DailyRate = 2 |
    OverTime = No & JobRole = Research Scientist & EmployeeNumber = 3 & NumCompaniesWorked = 4 & Age = 4 |
    OverTime = No & JobRole = Research Scientist & EmployeeNumber = 3 & NumCompaniesWorked = 4 & Age = 5 |
    OverTime = No & JobRole = Research Scientist & EmployeeNumber = 4 & HourlyRate = 3 & Education = 3 |
    OverTime = No & JobRole = Research Director & TrainingTimesLastYear = 1 & DailyRate = 5 |
    OverTime = No & JobRole = Manufacturing Director & Education = 3 & NumCompaniesWorked = 4 & BusinessTravel = Travel_Frequently |
    OverTime = No & JobRole = Manufacturing Director & Education = 4 & TrainingTimesLastYear = 2 & TotalWorkingYears = 3 |
    OverTime = No & JobRole = Manufacturing Director & Education = 4 & TrainingTimesLastYear = 4 |
    OverTime = No & JobRole = Manufacturing Director & Education = 5 & DistanceFromHome = 1 |
    OverTime = No & JobRole = Sales Executive & StockOptionLevel = 0 & WorkLifeBalance = 1 |
    OverTime = No & JobRole = Sales Executive & StockOptionLevel = 0 & WorkLifeBalance = 2 & JobInvolvement = 1 |
    OverTime = No & JobRole = Sales Executive & StockOptionLevel = 0 & WorkLifeBalance = 2 & JobInvolvement = 2 & JobSatisfaction = 1 |
    OverTime = No & JobRole = Sales Executive & StockOptionLevel = 0 & WorkLifeBalance = 3 & NumCompaniesWorked = 1 & JobSatisfaction = 2 & BusinessTravel = Travel_Frequently |
    OverTime = No & JobRole = Sales Executive & StockOptionLevel = 0 & WorkLifeBalance = 3 & NumCompaniesWorked = 5 & DailyRate = 1 |
    OverTime = No & JobRole = Sales Executive & StockOptionLevel = 0 & WorkLifeBalance = 3 & NumCompaniesWorked = 5 & DailyRate = 3 & EducationField = Life Sciences |
    OverTime = No & JobRole = Sales Executive & StockOptionLevel = 0 & WorkLifeBalance = 3 & NumCompaniesWorked = 5 & DailyRate = 3 & EducationField = Medical |
    OverTime = No & JobRole = Sales Executive & StockOptionLevel = 0 & WorkLifeBalance = 3 & NumCompaniesWorked = 5 & DailyRate = 5 & Age = 4 |
    OverTime = No & JobRole = Sales Executive & StockOptionLevel = 1 & YearsInCurrentRole = 3 & JobSatisfaction = 1 & EducationField = Life Sciences |
    OverTime = No & JobRole = Sales Executive & StockOptionLevel = 2 & TrainingTimesLastYear = 3 & EnvironmentSatisfaction = 2 |
    OverTime = No & JobRole = Sales Executive & StockOptionLevel = 2 & TrainingTimesLastYear = 3 & EnvironmentSatisfaction = 3 & DistanceFromHome = 2 |
    OverTime = No & JobRole = Sales Executive & StockOptionLevel = 2 & TrainingTimesLastYear = 4 & Age = 5 |
    OverTime = No & JobRole = Sales Executive & StockOptionLevel = 3 & EmployeeNumber = 1 & DailyRate = 1 |
    OverTime = No & JobRole = Sales Executive & StockOptionLevel = 3 & EmployeeNumber = 5 |
    OverTime = No & JobRole = Healthcare Representative & MonthlyRate = 3 & TrainingTimesLastYear = 2 & RelationshipSatisfaction = 1 |
    OverTime = No & JobRole = Healthcare Representative & MonthlyRate = 3 & TrainingTimesLastYear = 2 & RelationshipSatisfaction = 4 |
    OverTime = No & JobRole = Healthcare Representative & MonthlyRate = 3 & TrainingTimesLastYear = 4 |
    OverTime = No & JobRole = Healthcare Representative & MonthlyRate = 5 & DistanceFromHome = 4 & Age = 4 |
    OverTime = No & JobRole = Manager & JobInvolvement = 1 & DailyRate = 1 |
    OverTime = No & JobRole = Human Resources & DistanceFromHome = 1 & EducationField = Technical Degree |
    OverTime = No & JobRole = Human Resources & DistanceFromHome = 1 & EducationField = Medical |
    OverTime = No & JobRole = Human Resources & DistanceFromHome = 4 & Age = 4 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 1 & JobSatisfaction = 1 & EducationField = Life Sciences |
    OverTime = Yes & JobLevel = 1 & DailyRate = 1 & JobSatisfaction = 2 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 1 & JobSatisfaction = 3 & Age = 4 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 2 & TrainingTimesLastYear = 1 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 2 & TrainingTimesLastYear = 2 & EducationField = Marketing |
    OverTime = Yes & JobLevel = 1 & DailyRate = 2 & TrainingTimesLastYear = 2 & EducationField = Life Sciences |
    OverTime = Yes & JobLevel = 1 & DailyRate = 2 & TrainingTimesLastYear = 2 & EducationField = Medical & BusinessTravel = Non-Travel |
    OverTime = Yes & JobLevel = 1 & DailyRate = 2 & TrainingTimesLastYear = 2 & EducationField = Medical & BusinessTravel = Travel_Frequently |
    OverTime = Yes & JobLevel = 1 & DailyRate = 2 & TrainingTimesLastYear = 3 & Age = 4 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 2 & TrainingTimesLastYear = 4 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 3 & NumCompaniesWorked = 1 & JobInvolvement = 1 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 3 & NumCompaniesWorked = 1 & JobInvolvement = 2 & BusinessTravel = Travel_Rarely |
    OverTime = Yes & JobLevel = 1 & DailyRate = 3 & NumCompaniesWorked = 2 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 3 & NumCompaniesWorked = 3 & Age = 4 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 3 & NumCompaniesWorked = 4 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 3 & NumCompaniesWorked = 5 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 4 & WorkLifeBalance = 1 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 4 & WorkLifeBalance = 2 & MonthlyRate = 4 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 4 & WorkLifeBalance = 3 & EmployeeNumber = 1 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 4 & WorkLifeBalance = 3 & EmployeeNumber = 2 & Age = 5 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 4 & WorkLifeBalance = 3 & EmployeeNumber = 3 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 4 & WorkLifeBalance = 3 & EmployeeNumber = 5 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 4 & WorkLifeBalance = 4 & RelationshipSatisfaction = 1 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 4 & WorkLifeBalance = 4 & RelationshipSatisfaction = 2 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 5 & Age = 4 & EnvironmentSatisfaction = 1 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 5 & Age = 4 & EnvironmentSatisfaction = 2 & BusinessTravel = Travel_Frequently |
    OverTime = Yes & JobLevel = 1 & DailyRate = 5 & Age = 4 & EnvironmentSatisfaction = 3 & JobRole = Laboratory Technician |
    OverTime = Yes & JobLevel = 1 & DailyRate = 5 & Age = 5 & MonthlyRate = 1 & NumCompaniesWorked = 4 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 5 & Age = 5 & MonthlyRate = 1 & NumCompaniesWorked = 2 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 5 & Age = 5 & MonthlyRate = 5 & Department = Sales |
    OverTime = Yes & JobLevel = 1 & DailyRate = 5 & Age = 3 & EmployeeNumber = 2 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 5 & Age = 3 & EmployeeNumber = 3 |
    OverTime = Yes & JobLevel = 1 & DailyRate = 5 & Age = 3 & EmployeeNumber = 5 |
    OverTime = Yes & JobLevel = 2 & JobRole = Healthcare Representative & DistanceFromHome = 5 |
    OverTime = Yes & JobLevel = 2 & JobRole = Laboratory Technician & JobInvolvement = 1 |
    OverTime = Yes & JobLevel = 2 & JobRole = Sales Executive & DistanceFromHome = 1 & JobSatisfaction = 3 & Age = 5 |
    OverTime = Yes & JobLevel = 2 & JobRole = Sales Executive & DistanceFromHome = 1 & JobSatisfaction = 3 & Age = 3 & DailyRate = 1 |
    OverTime = Yes & JobLevel = 2 & JobRole = Sales Executive & DistanceFromHome = 1 & JobSatisfaction = 4 & WorkLifeBalance = 1 |
    OverTime = Yes & JobLevel = 2 & JobRole = Sales Executive & DistanceFromHome = 2 & JobInvolvement = 1 |
    OverTime = Yes & JobLevel = 2 & JobRole = Sales Executive & DistanceFromHome = 2 & JobInvolvement = 3 & DailyRate = 3 |
    OverTime = Yes & JobLevel = 2 & JobRole = Sales Executive & DistanceFromHome = 3 & RelationshipSatisfaction = 1 |
    OverTime = Yes & JobLevel = 2 & JobRole = Sales Executive & DistanceFromHome = 3 & RelationshipSatisfaction = 2 |
    OverTime = Yes & JobLevel = 2 & JobRole = Sales Executive & DistanceFromHome = 3 & RelationshipSatisfaction = 4 |
    OverTime = Yes & JobLevel = 2 & JobRole = Sales Executive & DistanceFromHome = 4 & RelationshipSatisfaction = 1 |
    OverTime = Yes & JobLevel = 2 & JobRole = Sales Executive & DistanceFromHome = 4 & RelationshipSatisfaction = 2 |
    OverTime = Yes & JobLevel = 2 & JobRole = Sales Executive & DistanceFromHome = 4 & RelationshipSatisfaction = 3 |
    OverTime = Yes & JobLevel = 2 & JobRole = Sales Executive & DistanceFromHome = 5 |
    OverTime = Yes & JobLevel = 2 & JobRole = Manufacturing Director & RelationshipSatisfaction = 1 |
    OverTime = Yes & JobLevel = 2 & JobRole = Manufacturing Director & RelationshipSatisfaction = 4 & Age = 4 |
    OverTime = Yes & JobLevel = 3 & JobRole = Healthcare Representative & Age = 4 |
    OverTime = Yes & JobLevel = 3 & JobRole = Human Resources & BusinessTravel = Travel_Frequently |
    OverTime = Yes & JobLevel = 3 & JobRole = Laboratory Technician |
    OverTime = Yes & JobLevel = 3 & JobRole = Manager & DailyRate = 5 |
    OverTime = Yes & JobLevel = 3 & JobRole = Manager & DailyRate = 3 |
    OverTime = Yes & JobLevel = 3 & JobRole = Sales Executive & JobSatisfaction = 1 & DailyRate = 1 |
    OverTime = Yes & JobLevel = 3 & JobRole = Sales Executive & JobSatisfaction = 1 & DailyRate = 2 |
    OverTime = Yes & JobLevel = 3 & JobRole = Sales Executive & JobSatisfaction = 1 & DailyRate = 5 |
    OverTime = Yes & JobLevel = 3 & JobRole = Sales Executive & JobSatisfaction = 2 & BusinessTravel = Travel_Rarely |
    OverTime = Yes & JobLevel = 3 & JobRole = Manufacturing Director & TrainingTimesLastYear = 1 |
    OverTime = Yes & JobLevel = 4 & JobRole = Sales Executive & EducationField = Marketing |
    OverTime = Yes & JobLevel = 4 & JobRole = Sales Executive & EducationField = Life Sciences |
    OverTime = Yes & JobLevel = 5 & TrainingTimesLastYear = 2 & JobSatisfaction = 1 & DailyRate = 3 |
    OverTime = Yes & JobLevel = 5 & TrainingTimesLastYear = 2 & JobSatisfaction = 2

## Error Analysis for Classification

Below is confusion matrix:

    [[209  33]
     [ 32  11]]

As it can be seen from the confusion matrix above, some of our predictions do not match the actual results. This mismatches can occur due to different reasons. One of them might be because of our data size. As our data grows in size, both in sample size and attribute size, it becomes harder to build a tree based on the entropy and information gain of attributes of our data. Because the differences in information gain between our attributes becomes smaller as our data size grows, it becomes harder to choose "the best" attribute for classifying and as a result mismatches occur. Another result might be due to lack of restrictions. Compared to other tree classifier methods, ID3 algorithm do not use restrictions when building a tree for the first time. Instead, after building a tree, it prunes the first tree in order to get less mismatches and get generally higher accuracy values.

As you can observe from the result table above, our best fold in terms of performance was the fold4. At first, we randomized our data and applied the 5 fold cross validation method. When we split our data for 5 folds, the most decisive data was in fold4. This situation may change when we re-run our code. In the end, fold4 has the least amount of redundant features when it formed a tree and this resulted in better scores.

# Part 2

In [26]:
train2 = data[:882,:]
validate2 = data[882:1176,:]
test2 = data[1176:,:]

In [27]:
df_train2 = pd.DataFrame(train2, columns = column_names)
df_validate2 = pd.DataFrame(validate2, columns = column_names)
tree2 = id3(df_train2, 'Attrition')
result2 = evaluate(tree2, df_validate2) #evaluating the test dataset
nones2 = []
truth2 = validate2[:, 1].copy()

dict_res2 = calculate_scores("validate", tree2, df_validate2, validate2)

In [28]:
df_res2 = pd.DataFrame.from_dict(dict_res2, orient='index', columns = mux)
df_res2.head()

Unnamed: 0,Accuracy,Precision,Recall,F1 Score
validate,0.755245,0.591149,0.599492,0.594704


In [29]:
tree_prune = copy.deepcopy(tree2)
last_accuracy = dict_res2["validate"][0]

df_test2 = pd.DataFrame(test2, columns = column_names)
pruned_tree = prune_tree(tree_prune, last_accuracy, df_train2, df_validate2, validate2)

dict_res3_1 = calculate_scores("test preprune", tree2, df_test2, test2)
dict_res3_2 = calculate_scores("test postprune", pruned_tree, df_test2, test2)

In [30]:
df_res3_1 = pd.DataFrame.from_dict(dict_res3_1, orient='index', columns = mux)
df_res3_1.head()

Unnamed: 0,Accuracy,Precision,Recall,F1 Score
test preprune,0.774648,0.588258,0.582274,0.584947


In [31]:
df_res3_2 = pd.DataFrame.from_dict(dict_res3_2, orient='index', columns = mux)
df_res3_2.head()

Unnamed: 0,Accuracy,Precision,Recall,F1 Score
test postprune,0.792254,0.610099,0.592867,0.599589


Rules for pre-pruned tree:

In [37]:
for i in dict_generator(tree2): 
    if(i[-1] == "Yes"):
        rule = ""
        for j in range(len(i)-1):
            if(j % 2 == 0):
                rule += str(i[j]) + " = "
            else:
                rule += str(i[j]) + " & "
        rule = rule[:-2] + "|"
        print(rule)

OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 3 & DailyRate = 1 & JobSatisfaction = 3 |
OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 3 & DailyRate = 2 & HourlyRate = 4 & DistanceFromHome = 1 |
OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 3 & DailyRate = 2 & HourlyRate = 3 |
OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 3 & DailyRate = 5 & EducationField = Other |
OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 1 & EmployeeNumber = 1 |
OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 1 & EmployeeNumber = 4 |
OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 1 & EmployeeNumber = 5 |
OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 2 & JobRole = Laboratory Technician & BusinessTravel = Travel_Frequently |
OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & En

    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 3 & DailyRate = 1 & JobSatisfaction = 3 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 3 & DailyRate = 2 & HourlyRate = 4 & DistanceFromHome = 1 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 3 & DailyRate = 2 & HourlyRate = 3 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 3 & DailyRate = 5 & EducationField = Other |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 1 & EmployeeNumber = 1 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 1 & EmployeeNumber = 4 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 1 & EmployeeNumber = 5 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 2 & JobRole = Laboratory Technician & BusinessTravel = Travel_Frequently |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 2 & JobRole = Sales Representative |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 3 & Education = 1 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 3 & Education = 2 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 3 & Education = 5 & DailyRate = 2 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 4 & Education = 4 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 5 & YearsWithCurrManager = 1 & WorkLifeBalance = 1 & DailyRate = 3 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 5 & YearsWithCurrManager = 1 & WorkLifeBalance = 3 & TrainingTimesLastYear = 3 & EnvironmentSatisfaction = 1 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 5 & YearsWithCurrManager = 1 & WorkLifeBalance = 3 & TrainingTimesLastYear = 4 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 5 & YearsWithCurrManager = 5 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 1 & WorkLifeBalance = 1 & DailyRate = 2 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 1 & WorkLifeBalance = 1 & DailyRate = 4 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 1 & WorkLifeBalance = 2 & DistanceFromHome = 3 & EducationField = Medical |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 1 & WorkLifeBalance = 2 & DistanceFromHome = 4 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 1 & WorkLifeBalance = 4 & BusinessTravel = Travel_Frequently |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 3 & DailyRate = 3 |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 1 & TrainingTimesLastYear = 2 & JobRole = Laboratory Technician |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 1 & TrainingTimesLastYear = 2 & JobRole = Sales Executive & MaritalStatus = Divorced |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 1 & TrainingTimesLastYear = 4 & DailyRate = 2 |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 1 & TrainingTimesLastYear = 4 & DailyRate = 3 |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 1 & TrainingTimesLastYear = 5 & DistanceFromHome = 5 |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 2 & JobRole = Research Scientist & EmployeeNumber = 2 |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 2 & JobRole = Healthcare Representative & DailyRate = 1 |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 2 & JobRole = Healthcare Representative & DailyRate = 5 |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 3 & NumCompaniesWorked = 1 & TrainingTimesLastYear = 4 & Age = 4 |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 3 & NumCompaniesWorked = 4 & MaritalStatus = Divorced |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 3 & NumCompaniesWorked = 5 & EnvironmentSatisfaction = 1 |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 3 & NumCompaniesWorked = 5 & EnvironmentSatisfaction = 4 & BusinessTravel = Non-Travel |
    OverTime = No & JobLevel = 3 & Education = 1 & HourlyRate = 4 |
    OverTime = No & JobLevel = 3 & Education = 3 & DistanceFromHome = 2 & StockOptionLevel = 2 |
    OverTime = No & JobLevel = 3 & Education = 3 & DistanceFromHome = 2 & StockOptionLevel = 3 & Department = Sales |
    OverTime = No & JobLevel = 3 & Education = 3 & DistanceFromHome = 3 & DailyRate = 3 |
    OverTime = No & JobLevel = 3 & Education = 3 & DistanceFromHome = 4 |
    OverTime = No & JobLevel = 3 & Education = 3 & DistanceFromHome = 5 & Age = 4 |
    OverTime = No & JobLevel = 3 & Education = 4 & JobInvolvement = 1 & BusinessTravel = Travel_Frequently |
    OverTime = No & JobLevel = 3 & Education = 5 & Department = Research & Development |
    OverTime = No & JobLevel = 5 & TrainingTimesLastYear = 1 |
    OverTime = No & JobLevel = 5 & TrainingTimesLastYear = 3 & EducationField = Marketing |
    OverTime = Yes & JobLevel = 1 & Age = 3 & DistanceFromHome = 1 & DailyRate = 3 |
    OverTime = Yes & JobLevel = 1 & Age = 3 & DistanceFromHome = 1 & DailyRate = 5 & EmployeeNumber = 2 |
    OverTime = Yes & JobLevel = 1 & Age = 3 & DistanceFromHome = 1 & DailyRate = 5 & EmployeeNumber = 3 |
    OverTime = Yes & JobLevel = 1 & Age = 3 & DistanceFromHome = 2 |
    OverTime = Yes & JobLevel = 1 & Age = 3 & DistanceFromHome = 4 |
    OverTime = Yes & JobLevel = 1 & Age = 3 & DistanceFromHome = 5 & BusinessTravel = Travel_Frequently |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 1 & DistanceFromHome = 1 & MonthlyRate = 2 & DailyRate = 2 |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 1 & DistanceFromHome = 1 & MonthlyRate = 2 & DailyRate = 4 |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 1 & DistanceFromHome = 1 & MonthlyRate = 3 |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 1 & DistanceFromHome = 1 & MonthlyRate = 5 & DailyRate = 4 |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 1 & DistanceFromHome = 2 & EducationField = Other |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 1 & DistanceFromHome = 3 & BusinessTravel = Travel_Frequently |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 1 & DistanceFromHome = 4 |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 1 & DistanceFromHome = 5 |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 2 & DistanceFromHome = 2 |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 2 & DistanceFromHome = 3 |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 3 |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 4 |
    OverTime = Yes & JobLevel = 1 & Age = 5 & MonthlyRate = 1 & YearsWithCurrManager = 1 |
    OverTime = Yes & JobLevel = 1 & Age = 5 & MonthlyRate = 3 & EmployeeNumber = 3 |
    OverTime = Yes & JobLevel = 1 & Age = 5 & MonthlyRate = 3 & EmployeeNumber = 5 & DailyRate = 4 |
    OverTime = Yes & JobLevel = 1 & Age = 5 & MonthlyRate = 3 & EmployeeNumber = 5 & DailyRate = 2 |
    OverTime = Yes & JobLevel = 1 & Age = 5 & MonthlyRate = 4 |
    OverTime = Yes & JobLevel = 1 & Age = 5 & MonthlyRate = 5 & DailyRate = 1 & Education = 3 |
    OverTime = Yes & JobLevel = 1 & Age = 5 & MonthlyRate = 5 & DailyRate = 4 |
    OverTime = Yes & JobLevel = 2 & DistanceFromHome = 1 & JobSatisfaction = 3 & EducationField = Marketing |
    OverTime = Yes & JobLevel = 2 & DistanceFromHome = 1 & JobSatisfaction = 3 & EducationField = Medical & Age = 5 |
    OverTime = Yes & JobLevel = 2 & DistanceFromHome = 3 & Gender = Male |
    OverTime = Yes & JobLevel = 2 & DistanceFromHome = 4 & JobSatisfaction = 3 |
    OverTime = Yes & JobLevel = 2 & DistanceFromHome = 4 & JobSatisfaction = 4 & BusinessTravel = Non-Travel |
    OverTime = Yes & JobLevel = 2 & DistanceFromHome = 5 & BusinessTravel = Travel_Rarely |
    OverTime = Yes & JobLevel = 3 & JobRole = Healthcare Representative & Age = 4 |
    OverTime = Yes & JobLevel = 3 & JobRole = Human Resources |
    OverTime = Yes & JobLevel = 3 & JobRole = Laboratory Technician |
    OverTime = Yes & JobLevel = 3 & JobRole = Manager & BusinessTravel = Non-Travel |
    OverTime = Yes & JobLevel = 3 & JobRole = Sales Executive & RelationshipSatisfaction = 1 |
    OverTime = Yes & JobLevel = 3 & JobRole = Sales Executive & RelationshipSatisfaction = 2 |
    OverTime = Yes & JobLevel = 3 & JobRole = Sales Executive & RelationshipSatisfaction = 3 & DailyRate = 1 |
    OverTime = Yes & JobLevel = 3 & JobRole = Sales Executive & RelationshipSatisfaction = 3 & DailyRate = 2 |
    OverTime = Yes & JobLevel = 4 & EnvironmentSatisfaction = 1 & BusinessTravel = Travel_Rarely |
    OverTime = Yes & JobLevel = 4 & EnvironmentSatisfaction = 2 |
    OverTime = Yes & JobLevel = 5 & EmployeeNumber = 2 |
    OverTime = Yes & JobLevel = 5 & EmployeeNumber = 4 & DailyRate = 5

Rules for post-pruned tree:

In [38]:
for i in dict_generator(pruned_tree): 
    if(i[-1] == "Yes"):
        rule = ""
        for j in range(len(i)-1):
            if(j % 2 == 0):
                rule += str(i[j]) + " = "
            else:
                rule += str(i[j]) + " & "
        rule = rule[:-2] + "|"
        print(rule)

OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 3 & DailyRate = 1 & JobSatisfaction = 3 |
OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 3 & DailyRate = 2 & HourlyRate = 4 & DistanceFromHome = 1 |
OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 3 & DailyRate = 2 & HourlyRate = 3 |
OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 3 & DailyRate = 5 & EducationField = Other |
OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 1 & EmployeeNumber = 1 |
OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 1 & EmployeeNumber = 4 |
OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 1 & EmployeeNumber = 5 |
OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 2 & JobRole = Laboratory Technician & BusinessTravel = Travel_Frequently |
OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & En

    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 3 & DailyRate = 1 & JobSatisfaction = 3 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 3 & DailyRate = 2 & HourlyRate = 4 & DistanceFromHome = 1 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 3 & DailyRate = 2 & HourlyRate = 3 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 3 & DailyRate = 5 & EducationField = Other |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 1 & EmployeeNumber = 1 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 1 & EmployeeNumber = 4 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 1 & EmployeeNumber = 5 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 2 & JobRole = Laboratory Technician & BusinessTravel = Travel_Frequently |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 2 & JobRole = Sales Representative |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 3 & Education = 1 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 3 & Education = 2 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 3 & Education = 5 & DailyRate = 2 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 5 & YearsWithCurrManager = 1 & WorkLifeBalance = 1 & DailyRate = 3 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 5 & YearsWithCurrManager = 1 & WorkLifeBalance = 3 & TrainingTimesLastYear = 3 & EnvironmentSatisfaction = 1 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 5 & YearsWithCurrManager = 1 & WorkLifeBalance = 3 & TrainingTimesLastYear = 4 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 5 & YearsWithCurrManager = 5 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 1 & WorkLifeBalance = 1 & DailyRate = 2 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 1 & WorkLifeBalance = 1 & DailyRate = 4 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 1 & WorkLifeBalance = 2 & DistanceFromHome = 3 & EducationField = Medical |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 1 & WorkLifeBalance = 2 & DistanceFromHome = 4 |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 1 & WorkLifeBalance = 4 & BusinessTravel = Travel_Frequently |
    OverTime = No & JobLevel = 1 & StockOptionLevel = 3 & DailyRate = 3 |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 1 & TrainingTimesLastYear = 2 & JobRole = Laboratory Technician |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 1 & TrainingTimesLastYear = 2 & JobRole = Sales Executive & MaritalStatus = Divorced |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 1 & TrainingTimesLastYear = 4 & DailyRate = 2 |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 1 & TrainingTimesLastYear = 4 & DailyRate = 3 |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 1 & TrainingTimesLastYear = 5 & DistanceFromHome = 5 |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 2 & JobRole = Research Scientist & EmployeeNumber = 2 |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 2 & JobRole = Healthcare Representative & DailyRate = 1 |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 2 & JobRole = Healthcare Representative & DailyRate = 5 |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 3 & NumCompaniesWorked = 1 & TrainingTimesLastYear = 4 & Age = 4 |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 3 & NumCompaniesWorked = 4 & MaritalStatus = Divorced |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 3 & NumCompaniesWorked = 5 & EnvironmentSatisfaction = 1 |
    OverTime = No & JobLevel = 2 & JobSatisfaction = 3 & NumCompaniesWorked = 5 & EnvironmentSatisfaction = 4 & BusinessTravel = Non-Travel |
    OverTime = No & JobLevel = 3 & Education = 3 & DistanceFromHome = 2 & StockOptionLevel = 2 |
    OverTime = No & JobLevel = 3 & Education = 3 & DistanceFromHome = 2 & StockOptionLevel = 3 & Department = Sales |
    OverTime = No & JobLevel = 3 & Education = 3 & DistanceFromHome = 3 & DailyRate = 3 |
    OverTime = No & JobLevel = 3 & Education = 3 & DistanceFromHome = 4 |
    OverTime = No & JobLevel = 3 & Education = 3 & DistanceFromHome = 5 & Age = 4 |
    OverTime = No & JobLevel = 3 & Education = 4 & JobInvolvement = 1 & BusinessTravel = Travel_Frequently |
    OverTime = No & JobLevel = 3 & Education = 5 & Department = Research & Development |
    OverTime = No & JobLevel = 5 & TrainingTimesLastYear = 1 |
    OverTime = No & JobLevel = 5 & TrainingTimesLastYear = 3 & EducationField = Marketing |
    OverTime = Yes & JobLevel = 1 & Age = 3 & DistanceFromHome = 1 & DailyRate = 3 |
    OverTime = Yes & JobLevel = 1 & Age = 3 & DistanceFromHome = 1 & DailyRate = 5 & EmployeeNumber = 2 |
    OverTime = Yes & JobLevel = 1 & Age = 3 & DistanceFromHome = 1 & DailyRate = 5 & EmployeeNumber = 3 |
    OverTime = Yes & JobLevel = 1 & Age = 3 & DistanceFromHome = 2 |
    OverTime = Yes & JobLevel = 1 & Age = 3 & DistanceFromHome = 4 |
    OverTime = Yes & JobLevel = 1 & Age = 3 & DistanceFromHome = 5 & BusinessTravel = Travel_Frequently |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 1 & DistanceFromHome = 1 & MonthlyRate = 2 & DailyRate = 2 |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 1 & DistanceFromHome = 1 & MonthlyRate = 2 & DailyRate = 4 |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 1 & DistanceFromHome = 1 & MonthlyRate = 3 |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 1 & DistanceFromHome = 1 & MonthlyRate = 5 & DailyRate = 4 |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 1 & DistanceFromHome = 2 & EducationField = Other |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 1 & DistanceFromHome = 3 & BusinessTravel = Travel_Frequently |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 1 & DistanceFromHome = 4 |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 1 & DistanceFromHome = 5 |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 2 & DistanceFromHome = 2 |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 2 & DistanceFromHome = 3 |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 3 |
    OverTime = Yes & JobLevel = 1 & Age = 4 & NumCompaniesWorked = 4 |
    OverTime = Yes & JobLevel = 1 & Age = 5 & MonthlyRate = 1 & YearsWithCurrManager = 1 |
    OverTime = Yes & JobLevel = 1 & Age = 5 & MonthlyRate = 3 & EmployeeNumber = 3 |
    OverTime = Yes & JobLevel = 1 & Age = 5 & MonthlyRate = 3 & EmployeeNumber = 5 & DailyRate = 4 |
    OverTime = Yes & JobLevel = 1 & Age = 5 & MonthlyRate = 3 & EmployeeNumber = 5 & DailyRate = 2 |
    OverTime = Yes & JobLevel = 1 & Age = 5 & MonthlyRate = 4 |
    OverTime = Yes & JobLevel = 1 & Age = 5 & MonthlyRate = 5 & DailyRate = 4 |
    OverTime = Yes & JobLevel = 2 & DistanceFromHome = 1 & JobSatisfaction = 3 & EducationField = Marketing |
    OverTime = Yes & JobLevel = 2 & DistanceFromHome = 1 & JobSatisfaction = 3 & EducationField = Medical & Age = 5 |
    OverTime = Yes & JobLevel = 2 & DistanceFromHome = 4 & JobSatisfaction = 3 |
    OverTime = Yes & JobLevel = 2 & DistanceFromHome = 4 & JobSatisfaction = 4 & BusinessTravel = Non-Travel |
    OverTime = Yes & JobLevel = 2 & DistanceFromHome = 5 & BusinessTravel = Travel_Rarely |
    OverTime = Yes & JobLevel = 3 & JobRole = Healthcare Representative & Age = 4 |
    OverTime = Yes & JobLevel = 3 & JobRole = Human Resources |
    OverTime = Yes & JobLevel = 3 & JobRole = Laboratory Technician |
    OverTime = Yes & JobLevel = 3 & JobRole = Manager & BusinessTravel = Non-Travel |
    OverTime = Yes & JobLevel = 3 & JobRole = Sales Executive & RelationshipSatisfaction = 1 |
    OverTime = Yes & JobLevel = 3 & JobRole = Sales Executive & RelationshipSatisfaction = 2 |
    OverTime = Yes & JobLevel = 3 & JobRole = Sales Executive & RelationshipSatisfaction = 3 & DailyRate = 1 |
    OverTime = Yes & JobLevel = 3 & JobRole = Sales Executive & RelationshipSatisfaction = 3 & DailyRate = 2 |
    OverTime = Yes & JobLevel = 4 & EnvironmentSatisfaction = 1 & BusinessTravel = Travel_Rarely |
    OverTime = Yes & JobLevel = 4 & EnvironmentSatisfaction = 2 |
    OverTime = Yes & JobLevel = 5 & EmployeeNumber = 2 |
    OverTime = Yes & JobLevel = 5 & EmployeeNumber = 4 & DailyRate = 5

    From the tables, you can spot the differences between pre-prune and post-prune models.
    Pre-prune accuracy: 0.774648
    Post-prune accuracy: 0.792254
    You can observe that our post-prune model gives better results in terms of accuracy and also in other metrics (precision, recall, f1 score). This increase in performance is the result of our pruning proccess. When we first form the tree, our model used every attribute even though they are redundant. This resulted in some missclassification issues. However, after the pruning proccess, our model got rid of there redundant features.

    Below, you can see some rules of the pre-pruned tree:
    OverTime = No & JobLevel = 1 & StockOptionLevel = 0 & Age = 4 & EnvironmentSatisfaction = 4 & Education = 4 |
    OverTime = No & JobLevel = 3 & Education = 1 & HourlyRate = 4 |
    OverTime = Yes & JobLevel = 1 & Age = 5 & MonthlyRate = 5 & DailyRate = 1 & Education = 3 |
    OverTime = Yes & JobLevel = 2 & DistanceFromHome = 3 & Gender = Male

    After the pruning proccess, these rules are altered and their result is determined according to the majority of "Yes" or "No".
    To give an example, the first rule's twig got removed from the tree, and then the remaining part yields the result "No".
    All of the twigs of these rules got removed from the in the same manner, and the resulting tree gave us better results in terms of all the metrics.
    Before they were pruned, these features had the least information gain out of all the twigs in the tree. After removing one of them, we test the tree and got a better accuracy score. That's why they got removed from our tree. Because when we first formed the tree, we had no restrictions for our model. It used every attribute in the tree even though they are not good for classification. In the pruning proccess, we found those twigs and removed them. In the end, we had a better result for every metric.

In terms of performance, the post-pruning tree gave better results. However, this might not be the case for everytime we run the code. In some iterations, the post pruning tree gave the same results as pre-pruned tree, even sometimes it gave worse results. This situation happened because we formed and pruned our tree for validation data. If we test our code for validation data, the post-pruned tree always gives better or equal results comöpared to our pre-pruned tree. However, this might not be the case for the test data. If our post-pruned tree gave worse results than the pre-pruned tree, this means the redundant data in our validation data was not redundant in test data. We should have kept them for better results in test data.