In [1]:
from copy import deepcopy
import random
from collections import namedtuple
import math
import pprint

In [2]:
def parse_data(file_name: str) -> list[list]:
    data = []
    file = open(file_name, "r")
    for line in file:
        datum = line.rstrip().split(",")
        data.append(datum)
    random.shuffle(data)
    return data

In [3]:
def create_folds(xs: list, n: int) -> list[list[list]]:
    k, m = divmod(len(xs), n)
    # be careful of generators...
    return list(xs[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n))

## count_labels

### Args:
* **data** : List of lists, where each inner list represents a row of data. The first element of each row is the label ('e' or 'p').

### Returns: 
* **returns (count_e, count_p)** A tuple where `count_e` is the number of rows labeled 'e' and `count_p` is the number of rows labeled 'p'.


In [4]:
def count_labels(data):
    count_e = 0 
    count_p = 0
    for row in data: 
        label = row[0]
        if label == 'e':
            count_e += 1
        elif label == 'p':
            count_p += 1
    return count_e, count_p

In [5]:
assert count_labels([['e'], ['p'], ['e'], ['p']]) == (2, 2)
assert count_labels([['e'], ['e'], ['e']]) == (3, 0)
assert count_labels([]) == (0, 0)


## calculate_entropy_fullset

### Args:
* **data** : List of lists, where each inner list represents a row of data. The first element of each row is the label ('e' or 'p'). This data is used to calculate the entropy of the full set.

### Returns: 
* **returns entropy** The entropy value of the full dataset, calculated using the proportion of positive ('e') and negative ('p') labels. If the dataset is empty, returns 0.


In [6]:
def calculate_entropy_fullset(data):
    pos, neg = count_labels(data)
    total = pos + neg
    if total == 0: 
        return 0
    prob_pos = pos/total
    prob_neg = neg/total
    entropy = 0
    if prob_pos > 0: 
        entropy-= prob_pos * math.log2(prob_pos)
    if prob_neg > 0:
        entropy -= prob_neg * math.log2(prob_neg)
    return entropy


In [7]:
assert round(calculate_entropy_fullset([['e'], ['p']]), 4) == 1.0
assert calculate_entropy_fullset([['e'], ['e'], ['e']]) == 0.0
assert calculate_entropy_fullset([]) == 0.0

## get_subset

### Args:
* **data** : List of lists, where each inner list represents a row of data.
* **attribute** : Integer representing the index of the attribute/column to filter by.
* **value** : The value to filter the rows by, based on the specified attribute.

### Returns: 
* **returns subset** A list of rows (subsets) where the value of the specified attribute matches the provided value.


In [8]:
def get_subset(data, attribute, value):
    subset = []
    for row in data: 
        if row[attribute] == value:
            subset.append(row)
    return subset

In [9]:
assert get_subset([['e', 1], ['p', 0], ['e', 1]], 1, 1) == [['e', 1], ['e', 1]]
assert get_subset([['e', 1], ['p', 0], ['e', 1]], 1, 0) == [['p', 0]]
assert get_subset([['e', 1], ['p', 0], ['e', 1]], 1, 2) == []


## calculate_entropy_subset

### Args:
* **data** : List of lists, where each inner list represents a row of data.
* **attribute** : Integer representing the index of the attribute/column to filter by.
* **value** : The value to filter the rows by, based on the specified attribute.

### Returns: 
* **returns (entropy, subset_size)** A tuple where:
  * `entropy` is the entropy of the subset filtered by the given attribute and value.
  * `subset_size` is the number of rows in the subset.


In [10]:
def calculate_entropy_subset(data, attribute, value):
    subset = get_subset(data, attribute, value )
    subset_size = len(subset)
    pos, neg = count_labels(subset)
    total = pos + neg
    if total == 0: 
        return 0
    prob_pos = pos/total
    prob_neg = neg/total
    entropy = 0
    if prob_pos > 0: 
        entropy-= prob_pos * math.log2(prob_pos)
    if prob_neg > 0:
        entropy -= prob_neg * math.log2(prob_neg)
    return entropy, subset_size


In [11]:
assert round(calculate_entropy_subset([['e', 1], ['p', 1]], 1, 1)[0], 4) == 1.0
assert calculate_entropy_subset([['e', 1], ['e', 1]], 1, 1)[0] == 0.0
assert calculate_entropy_subset([], 1, 1) == 0


## get_unique_values

### Args:
* **data** : List of lists, where each inner list represents a row of data.
* **attribute** : Integer representing the index of the attribute/column to extract unique values from.

### Returns: 
* **returns unique** A set containing all unique values for the specified attribute in the dataset.


In [12]:
def get_unique_values(data,attribute):
    unique = set()
    for row in data:
        unique.add(row[attribute])
    return unique

In [13]:
assert get_unique_values([['e', 1], ['p', 2], ['e', 3]], 1) == {1, 2, 3}
assert get_unique_values([['e', 1], ['p', 1], ['e', 1]], 1) == {1}
assert get_unique_values([], 1) == set()

## calculate_information_gain

### Args:
* **data** : List of lists, where each inner list represents a row of data.
* **attribute** : Integer representing the index of the attribute/column for which the information gain is calculated.

### Returns: 
* **returns information_gain** The information gain obtained by splitting the data on the specified attribute. It is calculated as the difference between the base entropy of the full dataset and the weighted entropy of the subsets.


In [14]:
def calculate_information_gain(data,attribute):
    values = get_unique_values(data,attribute)
    base_entropy = calculate_entropy_fullset(data)
    weighted_entropy = 0
    total_instances = len(data) 
    for value in values: 
        sub_entropy, subset_size = calculate_entropy_subset(data,attribute,value)
        prob = subset_size / total_instances
        weighted_entropy += prob * sub_entropy
    information_gain = base_entropy - weighted_entropy
    return information_gain


In [15]:
assert math.isclose(calculate_information_gain([['e', 1], ['p', 1], ['e', 0], ['p', 0]], 1), 4 == 1.0)
assert calculate_information_gain([['e', 1], ['e', 0]], 1) == 0.0
assert calculate_information_gain([], 1) == 0

## pick_best_attribute

### Args:
* **data** : List of lists, where each inner list represents a row of data.
* **attributes** : List of integers, each representing an index of an attribute/column to evaluate.

### Returns: 
* **returns best_attribute** The attribute (represented as an index) that provides the highest information gain when used to split the data. If no attribute provides a positive gain, `None` is returned.


In [16]:
def pick_best_attribute(data, attributes):
    best_attribute = None
    max_gain = float("-inf")
    for attribute in attributes:
        gain = calculate_information_gain(data,attribute)
        if gain > max_gain:
            max_gain = gain
            best_attribute = attribute
    return best_attribute
    

In [17]:
assert pick_best_attribute([['e', 1, 0], ['p', 0, 1], ['e', 1, 0], ['p', 0, 1]], [1, 2]) == 1
assert pick_best_attribute([['e', 1, 1], ['p', 0, 0]], [1, 2]) in [1, 2]
assert pick_best_attribute([], [1, 2]) == 1


## is_homogeneous

### Args:
* **data** : List of lists, where each inner list represents a row of data. The first element of each row is the label to check.

### Returns: 
* **returns True/False** Returns `True` if all rows in the dataset have the same label, otherwise returns `False`.


In [18]:
def is_homogeneous(data):
    label = data[0][0]
    for row in data:
        if row[0] != label:
            return False 
    return True

In [19]:
assert is_homogeneous([['e'], ['e'], ['e']]) == True
assert is_homogeneous([['e'], ['p'], ['e']]) == False
assert is_homogeneous([['p'], ['p'], ['p']]) == True


## majority_class

### Args:
* **data** : List of lists, where each inner list represents a row of data. The first element of each row is the label to be counted.

### Returns: 
* **returns majority_class** The label that appears most frequently in the dataset. In case of a tie, it returns the first label that reaches the maximum count.


In [20]:
def majority_class(data):
    classes = {}
    for row in data: 
        label = row[0]
        classes[label] =  classes.get(label,0) + 1
        return max(classes, key=classes.get)


In [21]:
assert majority_class([['e'], ['p'], ['e']]) == 'e'
assert majority_class([['p'], ['p'], ['e']]) == 'p'
assert majority_class([['e'], ['p']]) == 'e'

## create_leaf

### Args:
* **label** : The label assigned to the leaf node, representing the class or category for that leaf.

### Returns: 
* **returns leaf** A dictionary representing a leaf node with the structure `{'type': 'leaf', 'label': label}`.


In [22]:
def create_leaf(label):
    return {'type': 'leaf', 'label': label}

In [23]:
assert create_leaf('e') == {'type': 'leaf', 'label': 'e'}
assert create_leaf('p') == {'type': 'leaf', 'label': 'p'}
assert create_leaf('unknown') == {'type': 'leaf', 'label': 'unknown'}

## create_node

### Args:
* **attribute** : The attribute/index used to split the data at this node.
* **children** : A dictionary or list representing the child nodes that result from splitting on the attribute.

### Returns: 
* **returns node** A dictionary representing a decision tree node with the structure `{'type': 'node', 'attribute': attribute, 'children': children}`.


In [24]:
def create_node(attribute, children):
    return {'type': 'node', 'attribute': attribute, 'children': children}

In [25]:
assert create_node(1, {'value1': 'child1', 'value2': 'child2'}) == {'type': 'node', 'attribute': 1, 'children': {'value1': 'child1', 'value2': 'child2'}}
assert create_node(2, {}) == {'type': 'node', 'attribute': 2, 'children': {}}
assert create_node(3, {'value1': 'child1'}) == {'type': 'node', 'attribute': 3, 'children': {'value1': 'child1'}}

## get_remaining_attributes

### Args:
* **attributes** : List of integers representing the indices of all attributes available for selection.
* **used_attribute** : An integer representing the index of the attribute that has already been used and should be excluded.

### Returns: 
* **returns remaining_attributes** A list of attributes that excludes the `used_attribute`, containing only the remaining attributes that can still be used.


In [26]:
def get_remaining_attributes(attributes, used_attribute):
    remaining_attributes = []
    for attribute in attributes:
        if attribute != used_attribute:
            remaining_attributes.append(attribute)
    return remaining_attributes

## id3
This function implements the ID3 algorithm for decision tree generation. It recursively selects the best attribute to split the data based on information gain, creating a tree where each node represents a decision, and each leaf represents a class label. The function stops either when the data is homogeneous, there are no remaining attributes, or there is no more data to split, in which case a default or majority class is returned.
### Args:
* **data** : List of lists, where each inner list represents a row of data. The first element of each row is the label ('e' or 'p').
* **attributes** : List of integers representing the indices of all available attributes to split the data.
* **default** : The default label to use if the dataset is empty.

### Returns: 
* **returns tree** A decision tree represented as a dictionary. The tree is built using the ID3 algorithm, where each node contains either a 'leaf' with a label or a 'node' with an attribute and children representing the possible splits.


In [27]:
def id3(data, attributes, default): 
    if not data: 
        return create_leaf(default)
    if is_homogeneous(data):
        return create_leaf(data[0][0])
    if not attributes:
        return create_leaf(majority_class(data))
    best_attribute = pick_best_attribute(data, attributes)
    values = get_unique_values(data, best_attribute)
    children = {}
    for value in values:
        subset = get_subset(data,best_attribute,value)
        remaining_attributes = get_remaining_attributes(attributes, best_attribute)
        child = id3(subset, remaining_attributes, majority_class(data))
        children[value] = child
    return create_node(best_attribute, children)
        

## remove_rows_with_missing_values

This function filters the input dataset and removes any rows that contain missing values, represented by a '?' in the row. It ensures that the resulting dataset only includes complete rows, which can be important for models or algorithms that don't handle missing values.

### Args:
* **data** : List of lists, where each inner list represents a row of data. A row may contain missing values represented by a '?'.

### Returns: 
* **returns cleaned_data** A list of rows from the dataset where no missing values ('?') are present.


In [28]:
def remove_rows_with_missing_values(data):
    cleaned_data = []
    for row in data: 
        if "?" not in row: 
            cleaned_data.append(row)
    return cleaned_data

## train

This function trains a decision tree using the ID3 algorithm. It first cleans the dataset by removing rows with missing values, then selects the attributes for splitting the data, and finally uses the majority class as the default label for empty datasets. The ID3 algorithm is applied to generate the decision tree.

### Args:
* **training_data** : List of lists, where each inner list represents a row of training data. The first element of each row is the label, and the remaining elements are the attributes.

### Returns: 
* **returns tree** A decision tree generated from the training data using the ID3 algorithm. The tree is represented as a dictionary of nodes and leaves.


In [29]:
def train(training_data): 
    cleaned_data = remove_rows_with_missing_values(training_data)
    attributes = range(1,len(cleaned_data[0]))
    default = majority_class(cleaned_data)
    return id3(cleaned_data,attributes, default)

In [30]:
data = parse_data('agaricus-lepiota.data')

In [31]:
decision_tree = train(data) 

## classify_single_observation

This function classifies a single observation using a decision tree. It traverses the tree based on the values of the attributes in the observation, following the appropriate branches until it reaches a leaf node, which provides the class label. If the value of the attribute is not found in the tree's children, it returns "unknown".

### Args:
* **tree** : A decision tree represented as a dictionary, where each node contains either a 'leaf' with a label or a 'node' with an attribute and children.
* **observation** : A list representing a single observation. Each element corresponds to a value for an attribute.
* **labeled** : A boolean indicating whether the observation includes the label as the first element (`True` if labeled, `False` otherwise). Defaults to `True`.

### Returns: 
* **returns label or "unknown"** The predicted class label for the observation if the tree can classify it, or "unknown" if the observation has an unrecognized value for the attribute.


In [32]:
def classify_single_observation(tree, observation,labeled=True):
    if tree['type'] == 'leaf':
        return tree['label']
    
    attribute = tree['attribute']
    if labeled:
        value = observation[attribute]
    else: 
        value = observation[attribute - 1]
    if value in tree['children']:
        return classify_single_observation(tree['children'][value], observation, labeled)
    return "unknown"

In [33]:
tree = {'type': 'leaf', 'label': 'e'}
assert classify_single_observation(tree, [0, 1]) == 'e'
tree = {'type': 'node', 'attribute': 1, 'children': {1: {'type': 'leaf', 'label': 'p'}}}
assert classify_single_observation(tree, [1], labeled=False) == 'p'
assert classify_single_observation(tree, [2], labeled=False) == 'unknown'

## classify

This function classifies multiple observations using a decision tree. For each observation, it calls the `classify_single_observation` function to traverse the tree and determine the predicted label. The results are stored in a list of predictions.

### Args:
* **tree** : A decision tree represented as a dictionary, where each node contains either a 'leaf' with a label or a 'node' with an attribute and children.
* **observations** : A list of observations, where each observation is a list of attribute values.
* **labeled** : A boolean indicating whether the observations include labels as the first element (`True` if labeled, `False` otherwise). Defaults to `True`.

### Returns: 
* **returns predictions** A list of predicted class labels for the given observations.


In [34]:
def classify(tree,observations,labeled=True):
    predictions = []
    for observation in observations:
        predictions.append(classify_single_observation(tree, observation,labeled))
    return predictions

In [35]:
tree = {'type': 'leaf', 'label': 'e'}
assert classify(tree, [[0], [1]]) == ['e', 'e']
tree = {'type': 'node', 'attribute': 1, 'children': {1: {'type': 'leaf', 'label': 'p'}}}
assert classify(tree, [[1], [0]], labeled=False) == ['p', 'unknown']
assert classify(tree, [], labeled=True) == []


In [36]:
cleaned_data = remove_rows_with_missing_values(data)
predictions = classify(decision_tree, cleaned_data)


In [37]:
print(predictions)

['p', 'e', 'e', 'e', 'e', 'e', 'p', 'p', 'e', 'e', 'e', 'p', 'e', 'e', 'p', 'e', 'e', 'e', 'e', 'p', 'p', 'p', 'e', 'e', 'p', 'e', 'e', 'e', 'e', 'p', 'e', 'e', 'p', 'e', 'e', 'e', 'p', 'p', 'e', 'e', 'p', 'e', 'e', 'p', 'e', 'p', 'e', 'p', 'p', 'e', 'e', 'p', 'e', 'e', 'p', 'p', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'p', 'e', 'e', 'e', 'e', 'p', 'e', 'e', 'e', 'e', 'p', 'p', 'p', 'e', 'e', 'e', 'e', 'e', 'e', 'p', 'e', 'e', 'p', 'e', 'e', 'e', 'e', 'p', 'p', 'p', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'p', 'e', 'e', 'e', 'e', 'e', 'e', 'p', 'e', 'p', 'p', 'p', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'p', 'e', 'e', 'p', 'e', 'e', 'e', 'p', 'p', 'p', 'p', 'p', 'e', 'e', 'p', 'e', 'p', 'p', 'e', 'e', 'e', 'p', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'p', 'p', 'p', 'e', 'p', 'e', 'e', 'p', 'e', 'e', 'e', 'e', 'p', 'e', 'e', 'p', 'e', 'e', 'e', 'e', 'e', 'e', 'p', 'p', 'e', 'e', 'e', 'e', 'p', 'p', 'e', 'p', 'e', 'p', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'p', 'e',

## evaluate

This function evaluates the performance of a classifier by comparing the predicted labels to the actual labels in the labeled data. It calculates the error rate by determining the proportion of incorrect predictions.

### Args:
* **predicted_labels** : A list of predicted class labels.
* **labeled_data** : A list of lists, where each inner list represents an observation. The first element of each observation is the actual label.

### Returns: 
* **returns error_rate** The error rate, calculated as the number of incorrect predictions divided by the total number of observations.


In [38]:
def evaluate(predicted_labels, labeled_data):
    errors = 0
    n = len(labeled_data)
    for i in range(n):
        actual_label = labeled_data[i][0]
        predicted_label = predicted_labels[i]
        if predicted_label != actual_label:
            errors += 1
    error_rate = errors / n
    return error_rate

In [39]:
error_rate = evaluate(predictions,cleaned_data)
print(error_rate)

0.0


## train_and_evaluate

This function trains a model using the provided training function, then classifies the test data using the provided classification function, and finally evaluates the model's performance using the provided evaluation function. It returns the error rate on the test data.

### Args:
* **train_fn** : A function used to train the model, taking the training data as input.
* **classify_fn** : A function used to classify observations, taking the model and the test data as inputs.
* **evaluate_fn** : A function used to evaluate the model's performance, taking the predicted labels and labeled test data as inputs.
* **train_fold** : A list of lists representing the training data.
* **test_fold** : A list of lists representing the test data. The first element of each observation is the actual label.

### Returns: 
* **returns test_error** The error rate of the model on the test data.


In [40]:
def train_and_evaluate(train_fn, classify_fn, evaluate_fn, train_fold, test_fold):
    model = train_fn(train_fold)
    predicted_test_labels = classify_fn(model, test_fold, labeled=True)
    test_error = evaluate_fn(predicted_test_labels, test_fold)
    return test_error

## calculate_and_print_mean

This function calculates the mean test error rate across multiple folds and prints the result with four decimal places of precision.

### Args:
* **total_test_error_rate** : The sum of test error rates from all folds.
* **num_folds** : The number of folds used in the evaluation.

### Returns:
* **None** : This function does not return any value. It prints the mean test error rate to the console.


In [41]:
def calculate_and_print_mean(total_test_error_rate, num_folds):
    mean_test_error_rate = total_test_error_rate / num_folds
    print(f"Mean = {mean_test_error_rate:.4f}")

## cross_validate

This function performs cross-validation on the provided dataset by splitting the data into 10 folds. It trains and evaluates a model for each pair of consecutive folds, printing the error rate for each pair. It also calculates and prints the mean test error rate across all folds.

### Args:
* **data** : List of lists, where each inner list represents an observation in the dataset. The first element of each observation is the label.
* **train_fn** : A function used to train the model, taking the training data as input.
* **classify_fn** : A function used to classify observations, taking the model and the test data as inputs.
* **evaluate_fn** : A function used to evaluate the model's performance, taking the predicted labels and labeled test data as inputs.

### Returns:
* **None** : This function does not return any value. It prints the error rates for each pair of folds and the mean test error rate across all folds.


In [42]:
def cross_validate(data, train_fn, classify_fn, evaluate_fn):    
    total_test_error_rate = 0
    folds = create_folds(data, 10)
    print("Train   Test")
    for i in range(0, 10, 2):
        fold_train = folds[i]
        fold_test = folds[i + 1]
        test_error1 = train_and_evaluate(train_fn, classify_fn, evaluate_fn, fold_train, fold_test)
        print(f"Fold {i + 1} -> Fold {i + 2} error rate: {test_error1:.4f}")
        test_error2 = train_and_evaluate(train_fn, classify_fn, evaluate_fn, fold_test, fold_train)
        print(f"Fold {i + 2} -> Fold {i + 1} error rate: {test_error2:.4f}")
        total_test_error_rate += test_error1 + test_error2
    calculate_and_print_mean(total_test_error_rate, 10)

In [43]:
cross_validate(cleaned_data,train,classify,evaluate)

Train   Test
Fold 1 -> Fold 2 error rate: 0.0035
Fold 2 -> Fold 1 error rate: 0.0088
Fold 3 -> Fold 4 error rate: 0.0088
Fold 4 -> Fold 3 error rate: 0.0018
Fold 5 -> Fold 6 error rate: 0.0035
Fold 6 -> Fold 5 error rate: 0.0000
Fold 7 -> Fold 8 error rate: 0.0035
Fold 8 -> Fold 7 error rate: 0.0053
Fold 9 -> Fold 10 error rate: 0.0035
Fold 10 -> Fold 9 error rate: 0.0000
Mean = 0.0039


## pretty_print_tree

This function prints a decision tree in a readable, indented format. It recursively traverses the tree, printing decision nodes and their corresponding attribute splits, as well as leaf nodes and their classification results. Each level of the tree is indented to visually represent the structure.

### Args:
* **tree** : A decision tree represented as a dictionary. Each node contains either a 'leaf' with a label or a 'node' with an attribute and children representing further splits.
* **indent** : A string used to control the indentation for the current node level. Defaults to an empty string.
* **parent_indent** : A string used to control the indentation for the parent node. It helps in formatting the child nodes properly. Defaults to an empty string.

### Returns:
* **None** : This function does not return any value. It prints the tree in a structured and readable format.


In [44]:
def pretty_print_tree(tree,indent="", parent_indent=""):
    if tree['type'] == 'leaf':
        print(f"{indent}[Leaf Node] Results: {tree['label']}")
    else:
        print(f"{indent}[Decision Node] Attribute: {tree['attribute']}")
        for value, subtree in tree['children'].items():
            print(f"{parent_indent}|-- Value: {value} -->", end=" ") 
            pretty_print_tree(subtree, indent="", parent_indent=parent_indent + "     ")

In [45]:
pretty_print_tree(decision_tree)

[Decision Node] Attribute: 5
|-- Value: a --> [Leaf Node] Results: e
|-- Value: f --> [Leaf Node] Results: p
|-- Value: m --> [Leaf Node] Results: p
|-- Value: n --> [Decision Node] Attribute: 20
     |-- Value: r --> [Leaf Node] Results: p
     |-- Value: k --> [Leaf Node] Results: e
     |-- Value: n --> [Leaf Node] Results: e
     |-- Value: w --> [Decision Node] Attribute: 3
          |-- Value: g --> [Leaf Node] Results: e
          |-- Value: y --> [Leaf Node] Results: p
          |-- Value: n --> [Leaf Node] Results: e
          |-- Value: c --> [Leaf Node] Results: e
          |-- Value: p --> [Leaf Node] Results: e
          |-- Value: w --> [Leaf Node] Results: p
|-- Value: c --> [Leaf Node] Results: p
|-- Value: p --> [Leaf Node] Results: p
|-- Value: l --> [Leaf Node] Results: e


In [46]:
pprint.pprint(decision_tree)

{'attribute': 5,
 'children': {'a': {'label': 'e', 'type': 'leaf'},
              'c': {'label': 'p', 'type': 'leaf'},
              'f': {'label': 'p', 'type': 'leaf'},
              'l': {'label': 'e', 'type': 'leaf'},
              'm': {'label': 'p', 'type': 'leaf'},
              'n': {'attribute': 20,
                    'children': {'k': {'label': 'e', 'type': 'leaf'},
                                 'n': {'label': 'e', 'type': 'leaf'},
                                 'r': {'label': 'p', 'type': 'leaf'},
                                 'w': {'attribute': 3,
                                       'children': {'c': {'label': 'e',
                                                          'type': 'leaf'},
                                                    'g': {'label': 'e',
                                                          'type': 'leaf'},
                                                    'n': {'label': 'e',
                                                          't