In [224]:
class Tree:
    '''Create a binary tree; keyword-only arguments `data`, `left`, `right`.

  Examples:
    l1 = Tree.leaf("leaf1")
    l2 = Tree.leaf("leaf2")
    tree = Tree(data="root", left=l1, right=Tree(right=l2))
  '''

    def leaf(data):
        '''Create a leaf tree
    '''
        return Tree(data=data)

    # pretty-print trees
    def __repr__(self):
        if self.is_leaf():
            return "Leaf(%r)" % self.data
        else:
            return "Tree(%r) { left = %r, right = %r }" % (self.data, self.left, self.right)

    # all arguments after `*` are *keyword-only*!
    def __init__(self, *, data=None, left=None, right=None):
        self.data = data
        self.left = left
        self.right = right

    def is_leaf(self):
        '''Check if this tree is a leaf tree
    '''
        return self.left == None and self.right == None

    def children(self):
        '''List of child subtrees
    '''
        return [x for x in [self.left, self.right] if x]

    def depth(self):
        '''Compute the depth of a tree
    A leaf is depth-1, and a child is one deeper than the parent.
    '''
        return max([x.depth() for x in self.children()], default=0) + 1

In [225]:
    root = Tree(data="isSystems?")
    takenOtherSys = Tree(data="takenOtherSys")
    morning = Tree(data="morning?")
    likedOtherSys = Tree(data="likedOtherSys?")
    l1 = Tree.leaf(data="like")
    l2 = Tree.leaf(data="like")
    l3 = Tree.leaf(data="nah")
    l4 = Tree.leaf(data="nah")
    l5 = Tree.leaf(data="like")
    root.left = l1
    root.right = takenOtherSys
    takenOtherSys.left = morning
    takenOtherSys.right = likedOtherSys
    morning.left = l2
    morning.right = l3
    likedOtherSys.left = l4
    likedOtherSys.right = l5
    print(root)

Tree('isSystems?') { left = Leaf('like'), right = Tree('takenOtherSys') { left = Tree('morning?') { left = Leaf('like'), right = Leaf('nah') }, right = Tree('likedOtherSys?') { left = Leaf('nah'), right = Leaf('like') } } }


## TASK 2

In [226]:
import pandas as pd
import numpy as np

csv_thing = """rating,easy,ai,systems,theory,morning
 2,True,True,False,True,False
 2,True,True,False,True,False
 2,False,True,False,False,False
 2,False,False,False,True,False
 2,False,True,True,False,True
 1,True,True,False,False,False
 1,True,True,False,True,False
 1,False,True,False,True,False
 0,False,False,False,False,True
 0,True,False,False,True,True
 0,False,True,False,True,False
 0,True,True,True,True,True
-1,True,True,True,False,True
-1,False,False,True,True,False
-1,False,False,True,False,True
-1,True,False,True,False,True
-2,False,False,True,True,False
-2,False,True,True,False,True
-2,True,False,True,False,False
-2,True,False,True,False,True
"""
file = open("csv_thing.csv", 'w')
file.write(csv_thing)
file.close()

In [227]:
res = pd.read_csv("csv_thing.csv")
res['ok'] = np.where(res['rating'] >= 0, True, False)
print(res)

    rating   easy     ai  systems  theory  morning     ok
0        2   True   True    False    True    False   True
1        2   True   True    False    True    False   True
2        2  False   True    False   False    False   True
3        2  False  False    False    True    False   True
4        2  False   True     True   False     True   True
5        1   True   True    False   False    False   True
6        1   True   True    False    True    False   True
7        1  False   True    False    True    False   True
8        0  False  False    False   False     True   True
9        0   True  False    False    True     True   True
10       0  False   True    False    True    False   True
11       0   True   True     True    True     True   True
12      -1   True   True     True   False     True  False
13      -1  False  False     True    True    False  False
14      -1  False  False     True   False     True  False
15      -1   True  False     True   False     True  False
16      -2  Fa

In [228]:
res.to_csv("task2_result", index=False)

## TASK 3

 Write a function which takes a feature and computes the performance
   of the corresponding single-feature classifier:

In [229]:
import pandas as pd

In [230]:
data = pd.read_csv('result')

In [231]:
data

Unnamed: 0,rating,easy,ai,systems,theory,morning,ok
0,2,True,True,False,True,False,True
1,2,True,True,False,True,False,True
2,2,False,True,False,False,False,True
3,2,False,False,False,True,False,True
4,2,False,True,True,False,True,True
5,1,True,True,False,False,False,True
6,1,True,True,False,True,False,True
7,1,False,True,False,True,False,True
8,0,False,False,False,False,True,True
9,0,True,False,False,True,True,True


In [232]:
def single_feature_score(data, goal, feature):
    truefalse = (data[goal]==data[feature])
    matches = truefalse.sum()
    total = truefalse.shape[0]
    return matches/total

In [233]:
systems = single_feature_score(data, 'ok', 'systems')
ai = single_feature_score(data, 'ok', 'ai')

In [234]:
print(systems)
print(ai)

0.1
0.75


## Task 4

In [311]:
class MyDecisionTree(Tree):
    
    def leaf(data):
        '''Create a leaf tree'''
        return MyDecisionTree(data=data)

    
    def single_feature_score(data, goal, feature):
        truefalse = (data[goal]==data[feature])
        matches = truefalse.sum()
        total = truefalse.shape[0]
        return matches/total
    
    #shorter alias
    def sfs(data, goal, feature):
        return single_feature_score(data, goal, feature)
    
    def decision_tree_train(data, features):
        score = dict()
        guess = data['ok'].mode()[0]
        if len(data['ok'].unique()) <= 1:
            return MyDecisionTree.leaf(guess)
        elif len(features)==1:
            return MyDecisionTree.leaf(guess)
        else:
            for f in features:
                NO = data.loc[data[f] == False]
                YES = data.loc[data[f] == True]
                score[f] = MyDecisionTree.sfs(NO, f, 'ok') + MyDecisionTree.sfs(YES, f, 'ok')
                
        top_f = max(score, key=score.get) # gets the key with the highest value in dict
        NO =  data.loc[data[top_f] == False]
        YES = data.loc[data[top_f] == True]
        left =  MyDecisionTree.decision_tree_train(NO,  set(features) ^ {top_f})
        right = MyDecisionTree.decision_tree_train(YES, set(features) ^ {top_f})
        return MyDecisionTree(data=top_f, left=left, right=right)

    # I assume that a test case is a row of a pandas dataframe
    def decision_tree_test(self, test_point):
        if self.is_leaf():
            return self.data
        else:
            if test_point[self.data] == False:
                return self.left.decision_tree_test(test_point)
            else:
                return self.right.decision_tree_test(test_point)

In [312]:
data_features_only = data.drop(columns=["rating"])
my_tree = MyDecisionTree.decision_tree_train(data_features_only, set(data_features_only.columns)^{"ok"})
print(my_tree)

Tree('ai') { left = Tree('theory') { left = Tree('morning') { left = Leaf(False), right = Tree('easy') { left = Leaf(False), right = Leaf(False) } }, right = Tree('easy') { left = Tree('systems') { left = Leaf(True), right = Leaf(False) }, right = Leaf(True) } }, right = Tree('theory') { left = Tree('easy') { left = Tree('systems') { left = Leaf(True), right = Leaf(False) }, right = Tree('systems') { left = Leaf(True), right = Leaf(False) } }, right = Leaf(True) } }


  return matches/total


In [313]:
data_features_only.iloc[0]

easy        True
ai          True
systems    False
theory      True
morning    False
ok          True
Name: 0, dtype: bool

In [320]:
successes = 0
for idx in range(data_features_only.shape[0]):
    result = my_tree.decision_tree_test(data_features_only.iloc[idx])
    label = data_features_only.iloc[idx]['ok']
    # print(f"""FOR TEST CASE {idx}\n result is {result} \n label is {label}\n""")
    if result == label:
        successes += 1
print(successes / 20, "success rate")

0.9 success rate


In [321]:
for f in data_features_only.columns:
    print(f, MyDecisionTree.sfs(data, "ok", f))

easy 0.5
ai 0.75
systems 0.1
theory 0.7
morning 0.35
ok 1.0


### TASK 5