In [None]:
training_data = [
    ['Green',3,'Mango']
    ['Yellow',3,'Mango']
    ['Red',1,'Grape']
    ['Red',1,'Grape']
    ['Yellow',3,'Lemon']
]

header = ['color', 'diameter', 'label']

def unique_values(rows, columns):
    return set([row[columns]] for row in rows)

def class_count(rows):
    counts = {}
    for row in rows:
        label = row[-1]
        if label not in counts:
            counts[label] = 0
        counts[label] += 1
    return counts

def is_numeric(value):
    return isinstance(value, int) or isinstance(value, float)

class Question:
    
    def __init__(self, columns, value):
        self.columns = columns
        self.value = value
        
    def match(self, example):
        val = example[self.columns]
        if is_numeric(val):
            return val >= self.value
        else:
            return val == self.value
    def __repr__(self):
        condition = "=="
        if is_numeric(self.value):
            condition = ">="
        return "Is %s %s %s?" % (
        header[self.columns], condition, str(self.value))

def partition(rows, Question):
    true_rows, false_rows = [], []
    for row in rows:
        if Question.match(row):
            true_rows.append(row)
        else:
            false_rows.append(row)
        return true_rows, false_rows
    
def gini(rows):
    counts = class_count(rows)
    impurity = 1
    for lbl in counts:
        prob_of_lbl = counts[lbl]/float(len(rows))
        impurity -= prob_of_lbl**2
    return impurity

def info_gain(left, right, current_uncertainity):
    p = float(len(left)) / (len(left) + len(right))
    return current_uncertainity - p * gini(left) - (1-p) * gini(right)
