In [5]:
# Unit Tests for knn and decision trees (find testing for RF's in random_forest.ipynb):

import numpy as np
import scipy.stats as stats
import random
import math

from mysklearn.myclassifiers import MyKNeighborsClassifier, MyDecisionTreeClassifier
import mysklearn.myutils as myutils

def test_kneighbors_classifier_kneighbors():
    # Let's start by using the 4-instance training set example from lecture.
    k = 3
    four_X_train = [[7, 7], \
                    [7, 4], \
                    [3, 4], \
                    [1, 4]]
    
    four_y_train = ["bad", \
                    "bad", \
                    "good", \
                    "good"]
    
    four_test = [[3, 7]]
    
    four_mins, four_maxs = myutils.scale(four_X_train)
    myutils.scale(four_test, zero_vals=four_mins, one_vals=four_maxs)
    
    our_k_neighbors = MyKNeighborsClassifier(n_neighbors=k)
    our_k_neighbors.fit(four_X_train, four_y_train)
    
    # Now we can grab the info about the knn
    four_distances, four_indices = our_k_neighbors.kneighbors(four_test)
    
    # From class, we can gather the desired results:
    # I want these to be sorted least to most.
    four_distances_desired = [[2/3, 1, (10/9) ** 0.5]]
    four_indices_desired = [[0, 2, 3]]
    
    # Now we can check our implementation
    for i in range(len(four_distances)):
        for j in range(len(four_distances[i])):
            assert math.isclose(four_distances[i][j], four_distances_desired[i][j])
            assert math.isclose(four_indices[i][j], four_indices_desired[i][j])
        
    # Okay, now we can use the 8-instance sample
    k = 3
    eight_X_train = [[3, 2], \
                     [6, 6], \
                     [4, 1], \
                     [4, 4], \
                     [1, 2], \
                     [2, 0], \
                     [0, 3], \
                     [1, 6]]
    
    eight_y_train = ["no", \
                     "yes", \
                     "no", \
                     "no", \
                     "yes", \
                     "no", \
                     "yes", \
                     "yes"]
    
    eight_test = [[2, 3]]
    
    eight_mins, eight_maxs = myutils.scale(eight_X_train)
    myutils.scale(eight_test, zero_vals=eight_mins, one_vals=eight_maxs)
    
    our_k_neighbors = MyKNeighborsClassifier(n_neighbors=k)
    our_k_neighbors.fit(eight_X_train, eight_y_train)
    
    # Grabbing the distances...
    eight_distances, eight_indices = our_k_neighbors.kneighbors(eight_test)
    
    # Now for our assertion distances and indices, we'll use basic math.
    # I got the following Euclidean distances:
    # sqrt(2), 5, 2*sqrt(2), sqrt(5), sqrt(2), 3, 2, sqrt(10)
    # This should mean either index 0 or 4 is first. It depends on our sorting algorithm.
    # For this purpose I'll implement a merge sort that breaks ties by using the lower index
    # in my utils.py
    eight_distances_desired = [[18 ** -0.5, 18 ** -0.5, 1/3]]
    eight_indices_desired = [[4, 0, 6]]
    
    # Now for our assertions...
    for i in range(len(eight_distances_desired)):
        for j in range(len(eight_distances_desired[i])):
            assert math.isclose(eight_distances[i][j], eight_distances_desired[i][j])
            assert math.isclose(eight_indices[i][j], eight_indices_desired[i][j])
        
    # Let's check using the Bramer Figure 3.5 dataset for our training set...
    k = 5
    bramer_X_train = [[0.8, 6.3], \
                      [1.4, 8.1], \
                      [2.1, 7.4], \
                      [2.6, 14.3], \
                      [6.8, 12.6], \
                      [8.8, 9.8], \
                      [9.2, 11.6], \
                      [10.8, 9.6], \
                      [11.8, 9.9], \
                      [12.4, 6.5], \
                      [12.8, 1.1], \
                      [14.0, 19.9], \
                      [14.2, 18.5], \
                      [15.6, 17.4], \
                      [15.8, 12.2], \
                      [16.6, 6.7], \
                      [17.4, 4.5], \
                      [18.2, 6.9], \
                      [19.0, 3.4], \
                      [19.6, 11.1]]
    
    bramer_y_train = ['-', \
                      '-', \
                      '-', \
                      '+', \
                      '-', \
                      '+', \
                      '-', \
                      '+', \
                      '+', \
                      '+', \
                      '-', \
                      '-', \
                      '-', \
                      '-', \
                      '-', \
                      '+', \
                      '+', \
                      '+', \
                      '-', \
                      '+']
    
    bramer_test = [[9.1, 11.0]]
    
    # We won't scale this test.
    
    # Phew, done with that. Remind me never to take a job in data entry.
    # Now we can use our own implementation.
    our_k_neighbors = MyKNeighborsClassifier(n_neighbors=k)
    our_k_neighbors.fit(bramer_X_train, bramer_y_train)
    
    # Grabbing the distances...
    bramer_distances, bramer_indices = our_k_neighbors.kneighbors(bramer_test)
    
    bramer_distances_desired = [[.37 ** 0.5, 1.53 ** 0.5, 4.85 ** 0.5, 7.85 ** 0.5, 8.5 ** 0.5]]
    bramer_indices_desired = [[6, 5, 7, 4, 8]]
    
    # And for our assertions...
    for i in range(len(bramer_distances)):
        for j in range(len(bramer_distances[i])):
            assert math.isclose(bramer_distances[i][j], bramer_distances_desired[i][j])
            assert math.isclose(bramer_indices[i][j], bramer_indices_desired[i][j])

def test_kneighbors_classifier_predict():
    # We can just copy-paste our earlier methods!
    
    # Let's start by using the 4-instance training set example from lecture.
    k = 3
    four_X_train = [[7, 7], \
                    [7, 4], \
                    [3, 4], \
                    [1, 4]]
    
    four_y_train = ["bad", \
                    "bad", \
                    "good", \
                    "good"]
    
    four_test = [[3, 7]]
    
    four_mins, four_maxs = myutils.scale(four_X_train)
    myutils.scale(four_test, zero_vals=four_mins, one_vals=four_maxs)
    
    our_k_neighbors = MyKNeighborsClassifier(n_neighbors=k)
    our_k_neighbors.fit(four_X_train, four_y_train)
    
    # Now we can grab the info about the knn
    four_distances, four_indices = our_k_neighbors.kneighbors(four_test)
    
    # From class, we can gather the desired results:
    # I want these to be sorted least to most.
    four_distances_desired = [[2/3, 1, (10/9) ** 0.5]]
    four_indices_desired = [[0, 2, 3]]
    
    # We'll assume the previous check passes, so let's now use predict().
    # We expect a "good" classification with k=3
    assert our_k_neighbors.predict(four_test) == ["good"]
        
    # Okay, now we can use the 8-instance sample
    k = 3
    eight_X_train = [[3, 2], \
                     [6, 6], \
                     [4, 1], \
                     [4, 4], \
                     [1, 2], \
                     [2, 0], \
                     [0, 3], \
                     [1, 6]]
    
    eight_y_train = ["no", \
                     "yes", \
                     "no", \
                     "no", \
                     "yes", \
                     "no", \
                     "yes", \
                     "yes"]
    
    eight_test = [[2, 3]]
    
    eight_mins, eight_maxs = myutils.scale(eight_X_train)
    myutils.scale(eight_test, zero_vals=eight_mins, one_vals=eight_maxs)
    
    our_k_neighbors = MyKNeighborsClassifier(n_neighbors=k)
    our_k_neighbors.fit(eight_X_train, eight_y_train)
    
    # Grabbing the distances...
    eight_distances, eight_indices = our_k_neighbors.kneighbors(eight_test)
    
    # Now for our assertion distances and indices, we'll use basic math.
    # I got the following Euclidean distances:
    # sqrt(2), 5, 2*sqrt(2), sqrt(5), sqrt(2), 3, 2, sqrt(10)
    # This should mean either index 0 or 4 is first. It depends on our sorting algorithm.
    # For this purpose I'll implement a merge sort that breaks ties by using the lower index
    # in my utils.py
    eight_distances_desired = [[18 ** -0.5, 18 ** -0.5, 1/3]]
    eight_indices_desired = [[0, 4, 6]]
    
    # We'll assume the previous check passes, so let's now use predict().
    # We expect a "yes" classification with k=3
    assert our_k_neighbors.predict(eight_test) == ["yes"]
        
    # Let's check using the Bramer Figure 3.5 dataset for our training set...
    k = 5
    bramer_X_train = [[0.8, 6.3], \
                      [1.4, 8.1], \
                      [2.1, 7.4], \
                      [2.6, 14.3], \
                      [6.8, 12.6], \
                      [8.8, 9.8], \
                      [9.2, 11.6], \
                      [10.8, 9.6], \
                      [11.8, 9.9], \
                      [12.4, 6.5], \
                      [12.8, 1.1], \
                      [14.0, 19.9], \
                      [14.2, 18.5], \
                      [15.6, 17.4], \
                      [15.8, 12.2], \
                      [16.6, 6.7], \
                      [17.4, 4.5], \
                      [18.2, 6.9], \
                      [19.0, 3.4], \
                      [19.6, 11.1]]
    
    bramer_y_train = ['-', \
                      '-', \
                      '-', \
                      '+', \
                      '-', \
                      '+', \
                      '-', \
                      '+', \
                      '+', \
                      '+', \
                      '-', \
                      '-', \
                      '-', \
                      '-', \
                      '-', \
                      '+', \
                      '+', \
                      '+', \
                      '-', \
                      '+']
    
    bramer_test = [[9.1, 11.0]]
    
    # Phew, done with that. Remind me never to take a job in data entry.
    # Now we can use our own implementation.
    our_k_neighbors = MyKNeighborsClassifier(n_neighbors=k)
    our_k_neighbors.fit(bramer_X_train, bramer_y_train)
    
    # We'll assume the previous check passes, so let's now use predict().
    # We expect a "good" classification with k=3
    assert our_k_neighbors.predict(bramer_test) == ['+']
    
def test_decision_tree_classifier_fit():
    interview_header = ["level", "lang", "tweets", "phd", "interviewed_well"]
    interview_table = [
        ["Senior", "Java", "no", "no", "False"],
        ["Senior", "Java", "no", "yes", "False"],
        ["Mid", "Python", "no", "no", "True"],
        ["Junior", "Python", "no", "no", "True"],
        ["Junior", "R", "yes", "no", "True"],
        ["Junior", "R", "yes", "yes", "False"],
        ["Mid", "R", "yes", "yes", "True"],
        ["Senior", "Python", "no", "no", "False"],
        ["Senior", "R", "yes", "no", "True"],
        ["Junior", "Python", "yes", "no", "True"],
        ["Senior", "Python", "yes", "yes", "True"],
        ["Mid", "Python", "no", "yes", "True"],
        ["Mid", "Java", "yes", "no", "True"],
        ["Junior", "Python", "no", "yes", "False"]
    ]
    
    interview_tree = \
        ["Attribute", "att0",
            ["Value", "Junior", 
                ["Attribute", "att3",
                    ["Value", "no", 
                        ["Leaf", "True", 3, 5]
                    ],
                    ["Value", "yes", 
                        ["Leaf", "False", 2, 5]
                    ]
                ]
            ],
            ["Value", "Mid",
                ["Leaf", "True", 4, 14]
            ],
            ["Value", "Senior",
                ["Attribute", "att2",
                    ["Value", "no",
                        ["Leaf", "False", 3, 5]
                    ],
                    ["Value", "yes",
                        ["Leaf", "True", 2, 5]
                    ]
                ]
            ]
        ]
    
    my_dt = MyDecisionTreeClassifier()
    y_train = myutils.get_column(interview_table, interview_header, "interviewed_well")
    interview_table = myutils.drop_column(interview_table, interview_header, "interviewed_well")
    X_train = interview_table
    my_dt.fit(X_train, y_train)
    
    assert myutils.equivalent(my_dt.tree, interview_tree) # Above this function
    
    # bramer degrees dataset
    degrees_header = ["SoftEng", "ARIN", "HCI", "CSA", "Project", "Class"]
    degrees_table = [
        ["A", "B", "A", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "A", "FIRST"],
        ["A", "A", "A", "B", "B", "SECOND"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["A", "A", "B", "B", "A", "FIRST"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "B", "SECOND"],
        ["A", "A", "A", "A", "A", "FIRST"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["A", "B", "B", "A", "B", "SECOND"],
        ["B", "B", "B", "B", "A", "SECOND"],
        ["A", "A", "B", "A", "B", "FIRST"],
        ["B", "B", "B", "B", "A", "SECOND"],
        ["A", "A", "B", "B", "B", "SECOND"],
        ["B", "B", "B", "B", "B", "SECOND"],
        ["A", "A", "B", "A", "A", "FIRST"],
        ["B", "B", "B", "A", "A", "SECOND"],
        ["B", "B", "A", "A", "B", "SECOND"],
        ["B", "B", "B", "B", "A", "SECOND"],
        ["B", "A", "B", "A", "B", "SECOND"],
        ["A", "B", "B", "B", "A", "FIRST"],
        ["A", "B", "A", "B", "B", "SECOND"],
        ["B", "A", "B", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "B", "SECOND"],
    ]

    # Computed using entropy; This won't work until this is implemented
    # This took me an hour, but near the end it got easy. I'm glad computers exist.
    degrees_tree = \
        ["Attribute", "att0",
            ["Value", "A",
                ["Attribute", "att4",
                    ["Value", "A",
                        ["Leaf", "FIRST", 5, 14]
                    ],
                    ["Value", "B",
                        ["Attribute", "att3",
                            ["Value", "A", 
                                ["Attribute", "att1", 
                                    ["Value", "A", 
                                        ["Leaf", "FIRST", 1, 2]
                                    ],
                                    ["Value", "B",
                                        ["Leaf", "SECOND", 1, 2]
                                    ]
                                ]
                            ],
                            ["Value", "B",
                                ["Leaf", "SECOND", 7, 9]
                            ]
                        ]
                    ]
                ]
            ],
            ["Value", "B",
                ["Leaf", "SECOND", 12, 26]
            ]
        ]
    
    # Same thing this time
    my_dt = MyDecisionTreeClassifier()
    y_train = myutils.get_column(degrees_table, degrees_header, "Class")
    degrees_table = myutils.drop_column(degrees_table, degrees_header, "Class")
    X_train = degrees_table
    my_dt.fit(X_train, y_train)
    
    assert myutils.equivalent(my_dt.tree, degrees_tree)

def test_decision_tree_classifier_predict():
    interview_header = ["level", "lang", "tweets", "phd", "interviewed_well"]
    interview_table = [
        ["Senior", "Java", "no", "no", "False"],
        ["Senior", "Java", "no", "yes", "False"],
        ["Mid", "Python", "no", "no", "True"],
        ["Junior", "Python", "no", "no", "True"],
        ["Junior", "R", "yes", "no", "True"],
        ["Junior", "R", "yes", "yes", "False"],
        ["Mid", "R", "yes", "yes", "True"],
        ["Senior", "Python", "no", "no", "False"],
        ["Senior", "R", "yes", "no", "True"],
        ["Junior", "Python", "yes", "no", "True"],
        ["Senior", "Python", "yes", "yes", "True"],
        ["Mid", "Python", "no", "yes", "True"],
        ["Mid", "Java", "yes", "no", "True"],
        ["Junior", "Python", "no", "yes", "False"]
    ]
    
    my_dt = MyDecisionTreeClassifier()
    y_train = myutils.get_column(interview_table, interview_header, "interviewed_well")
    interview_table = myutils.drop_column(interview_table, interview_header, "interviewed_well")
    X_train = interview_table
    my_dt.fit(X_train, y_train)
    
    X_test = [["Junior", "Java", "yes", "no"],
              ["Junior", "Java", "yes", "yes"]]
    y_test = ["True", "False"]
    
    assert myutils.equivalent(my_dt.predict(X_test), y_test)
    
    # bramer degrees dataset
    degrees_header = ["SoftEng", "ARIN", "HCI", "CSA", "Project", "Class"]
    degrees_table = [
        ["A", "B", "A", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "A", "FIRST"],
        ["A", "A", "A", "B", "B", "SECOND"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["A", "A", "B", "B", "A", "FIRST"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "B", "SECOND"],
        ["A", "A", "A", "A", "A", "FIRST"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["A", "B", "B", "A", "B", "SECOND"],
        ["B", "B", "B", "B", "A", "SECOND"],
        ["A", "A", "B", "A", "B", "FIRST"],
        ["B", "B", "B", "B", "A", "SECOND"],
        ["A", "A", "B", "B", "B", "SECOND"],
        ["B", "B", "B", "B", "B", "SECOND"],
        ["A", "A", "B", "A", "A", "FIRST"],
        ["B", "B", "B", "A", "A", "SECOND"],
        ["B", "B", "A", "A", "B", "SECOND"],
        ["B", "B", "B", "B", "A", "SECOND"],
        ["B", "A", "B", "A", "B", "SECOND"],
        ["A", "B", "B", "B", "A", "FIRST"],
        ["A", "B", "A", "B", "B", "SECOND"],
        ["B", "A", "B", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "B", "SECOND"],
    ]
    
    my_dt = MyDecisionTreeClassifier()
    y_train = myutils.get_column(degrees_table, degrees_header, "Class")
    degrees_table = myutils.drop_column(degrees_table, degrees_header, "Class")
    X_train = degrees_table
    my_dt.fit(X_train, y_train)
    
    X_test = [
        ["B", "B", "B", "B", "B"],
        ["A", "A", "A", "A", "A"],
        ["A", "A", "A", "A", "B"]
    ]
    y_test = ["SECOND", "FIRST", "FIRST"]
    
    assert myutils.equivalent(my_dt.predict(X_test), y_test)
    
    # After this we can feel pretty darn good about our implementation.
    # Because it was tricky I'm going to run a good few more tests on the back end.
    
def test_decision_tree_classifier_print_rules():
    interview_header = ["level", "lang", "tweets", "phd", "interviewed_well"]
    interview_table = [
        ["Senior", "Java", "no", "no", "False"],
        ["Senior", "Java", "no", "yes", "False"],
        ["Mid", "Python", "no", "no", "True"],
        ["Junior", "Python", "no", "no", "True"],
        ["Junior", "R", "yes", "no", "True"],
        ["Junior", "R", "yes", "yes", "False"],
        ["Mid", "R", "yes", "yes", "True"],
        ["Senior", "Python", "no", "no", "False"],
        ["Senior", "R", "yes", "no", "True"],
        ["Junior", "Python", "yes", "no", "True"],
        ["Senior", "Python", "yes", "yes", "True"],
        ["Mid", "Python", "no", "yes", "True"],
        ["Mid", "Java", "yes", "no", "True"],
        ["Junior", "Python", "no", "yes", "False"]
    ]
    
    my_dt = MyDecisionTreeClassifier()
    y_train = myutils.get_column(interview_table, interview_header, "interviewed_well")
    interview_table = myutils.drop_column(interview_table, interview_header, "interviewed_well")
    X_train = interview_table
    my_dt.fit(X_train, y_train)
    
    print("Interview Tree Rules:")
    my_dt.print_decision_rules(interview_header[:-1], interview_header[-1])
    print()
    
    # bramer degrees dataset
    degrees_header = ["SoftEng", "ARIN", "HCI", "CSA", "Project", "Class"]
    degrees_table = [
        ["A", "B", "A", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "A", "FIRST"],
        ["A", "A", "A", "B", "B", "SECOND"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["A", "A", "B", "B", "A", "FIRST"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "B", "SECOND"],
        ["A", "A", "A", "A", "A", "FIRST"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["A", "B", "B", "A", "B", "SECOND"],
        ["B", "B", "B", "B", "A", "SECOND"],
        ["A", "A", "B", "A", "B", "FIRST"],
        ["B", "B", "B", "B", "A", "SECOND"],
        ["A", "A", "B", "B", "B", "SECOND"],
        ["B", "B", "B", "B", "B", "SECOND"],
        ["A", "A", "B", "A", "A", "FIRST"],
        ["B", "B", "B", "A", "A", "SECOND"],
        ["B", "B", "A", "A", "B", "SECOND"],
        ["B", "B", "B", "B", "A", "SECOND"],
        ["B", "A", "B", "A", "B", "SECOND"],
        ["A", "B", "B", "B", "A", "FIRST"],
        ["A", "B", "A", "B", "B", "SECOND"],
        ["B", "A", "B", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "B", "SECOND"],
    ]
    
    my_dt = MyDecisionTreeClassifier()
    y_train = myutils.get_column(degrees_table, degrees_header, "Class")
    degrees_table = myutils.drop_column(degrees_table, degrees_header, "Class")
    X_train = degrees_table
    my_dt.fit(X_train, y_train)
    
    print("Degrees Tree Rules:")
    my_dt.print_decision_rules(degrees_header[:-1], degrees_header[-1])
    print()
    
    interview_header = ["level", "lang", "tweets", "phd", "interviewed_well"]
    interview_table = [
        ["Senior", "Java", "no", "no", "False"],
        ["Senior", "Java", "no", "yes", "False"],
        ["Mid", "Python", "no", "no", "True"],
        ["Junior", "Python", "no", "no", "True"],
        ["Junior", "R", "yes", "no", "True"],
        ["Junior", "R", "yes", "yes", "False"],
        ["Mid", "R", "yes", "yes", "True"],
        ["Senior", "Python", "no", "no", "False"],
        ["Senior", "R", "yes", "no", "True"],
        ["Junior", "Python", "yes", "no", "True"],
        ["Senior", "Python", "yes", "yes", "True"],
        ["Mid", "Python", "no", "yes", "True"],
        ["Mid", "Java", "yes", "no", "True"],
        ["Junior", "Python", "no", "yes", "False"]
    ]
    
    my_dt = MyDecisionTreeClassifier()
    y_train = myutils.get_column(interview_table, interview_header, "interviewed_well")
    interview_table = myutils.drop_column(interview_table, interview_header, "interviewed_well")
    X_train = interview_table
    my_dt.fit(X_train, y_train)
    
    print("Interview Tree Rules with generic names:")
    my_dt.print_decision_rules()
    print()
    
    # bramer degrees dataset
    degrees_header = ["SoftEng", "ARIN", "HCI", "CSA", "Project", "Class"]
    degrees_table = [
        ["A", "B", "A", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "A", "FIRST"],
        ["A", "A", "A", "B", "B", "SECOND"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["A", "A", "B", "B", "A", "FIRST"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "B", "SECOND"],
        ["A", "A", "A", "A", "A", "FIRST"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["B", "A", "A", "B", "B", "SECOND"],
        ["A", "B", "B", "A", "B", "SECOND"],
        ["B", "B", "B", "B", "A", "SECOND"],
        ["A", "A", "B", "A", "B", "FIRST"],
        ["B", "B", "B", "B", "A", "SECOND"],
        ["A", "A", "B", "B", "B", "SECOND"],
        ["B", "B", "B", "B", "B", "SECOND"],
        ["A", "A", "B", "A", "A", "FIRST"],
        ["B", "B", "B", "A", "A", "SECOND"],
        ["B", "B", "A", "A", "B", "SECOND"],
        ["B", "B", "B", "B", "A", "SECOND"],
        ["B", "A", "B", "A", "B", "SECOND"],
        ["A", "B", "B", "B", "A", "FIRST"],
        ["A", "B", "A", "B", "B", "SECOND"],
        ["B", "A", "B", "B", "B", "SECOND"],
        ["A", "B", "B", "B", "B", "SECOND"],
    ]
    
    my_dt = MyDecisionTreeClassifier()
    y_train = myutils.get_column(degrees_table, degrees_header, "Class")
    degrees_table = myutils.drop_column(degrees_table, degrees_header, "Class")
    X_train = degrees_table
    my_dt.fit(X_train, y_train)
    
    print("Degrees Tree Rules with generic names:")
    my_dt.print_decision_rules()
    print()
    
#test_kneighbors_classifier_kneighbors() # Working on this, implementation differences are impacting this a bit
#test_kneighbors_classifier_predict()
test_decision_tree_classifier_fit()
test_decision_tree_classifier_predict()
test_decision_tree_classifier_print_rules()

Interview Tree Rules:
IF level == Junior AND phd == no THEN interviewed_well = True
IF level == Junior AND phd == yes THEN interviewed_well = False
IF level == Mid THEN interviewed_well = True
IF level == Senior AND tweets == no THEN interviewed_well = False
IF level == Senior AND tweets == yes THEN interviewed_well = True

Degrees Tree Rules:
IF SoftEng == A AND Project == A THEN Class = FIRST
IF SoftEng == A AND Project == B AND CSA == A AND ARIN == A THEN Class = FIRST
IF SoftEng == A AND Project == B AND CSA == A AND ARIN == B THEN Class = SECOND
IF SoftEng == A AND Project == B AND CSA == B THEN Class = SECOND
IF SoftEng == B THEN Class = SECOND

Interview Tree Rules with generic names:
IF att0 == Junior AND att3 == no THEN class = True
IF att0 == Junior AND att3 == yes THEN class = False
IF att0 == Mid THEN class = True
IF att0 == Senior AND att2 == no THEN class = False
IF att0 == Senior AND att2 == yes THEN class = True

Degrees Tree Rules with generic names:
IF att0 == A AND a