## Advanced Data Analytics - Algorithms and Machine Learning
## 31005
### Harrison Cole
### 12962712

### Section 1 - Imports
Imports libraries and type-definitions for use throughout the program.

In [57]:
import abc
import numpy as np
import pandas as pd
import random
import traceback

from typing import Callable, Optional, Union, Tuple, List, Dict
from sklearn.datasets import load_iris as dataset
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

### Section 2 - Utility Function Definitions
Defines utility functions for (re)use throughout the program.

In [58]:
def require(value: Optional[any], field: str) -> any:
    """
    A mechanism for asserting the presence of a value, and raising an exception
    in the case of its absence.
    :param value:
    The value whose presence is being checked.
    :param field:
    A diagnostic tag indicating which value is absent.
    """
    if value is None:
        raise ValueError(f'Missing required value: "{field}".')
    return value


def default(value: Optional[any], otherwise: any) -> any:
    """
    A mechanism for checking for the presence of a value, and supplying a default value
    in the case of its absence.
    :param value:
    The value whose presence is being checked.
    :param otherwise:
    The default value to return in the case of it's absence.
    """
    return otherwise if value is None else value


def value_counts(elements, normalise: bool = True) -> tuple:
    """
    A mechanism for counting the occurrences of each unique value in a set of elements.
    :param elements:
    The set of elements.
    :param normalise:
    Whether or not to return the relative frequencies of the unique values.
    :return:
    The values and their corresponding representation within the set of elements
    as a tuple of arrays.
    """
    values, counts = np.unique(elements, return_counts=True)
    if normalise:
        return values, counts / np.sum(counts)
    return values, counts

def majority_class_index(elements):
    """
    A mechanism for returning the index of the class with the greatest representation
    in a set of elements.
    :param elements:
    The set of elements.
    """
    _, counts = value_counts(elements, normalise=False)
    return np.argmax(counts)


### Section 3 - Data-structures, Interfaces and Implementations
Defines the API and data-structures available for use throughout this program. Where applicable, effort is taken
to program by contract against the interface rather than the implementation.

#### Section 3.1 - Split Criterion Metrics

In [59]:
class SplitCriterionMetric(metaclass=abc.ABCMeta):
    """
    An interface for computing the measure of quality produced by splitting the set of items across
    the axis of a given variable at each step of computation during the tree building process.
    """

    @abc.abstractmethod
    def compute(self, frequencies) -> float:
        """
        Computes a measure of quality, usually the homogeneity ("sameness") of the target class, represented by the
        frequencies of each target class instance within a subset of the dataset.
        :param frequencies:
        The frequencies of each target class instance within this subset.
        :return:
        A floating point value where higher values indicate a higher degree of homogeneity.
        """
        pass


class Entropy(SplitCriterionMetric):

    def compute(self, frequencies) -> float:
        """
        Computes the entropy of the target class within a subset of the dataset.
        :param frequencies:
        The frequencies of each class instance within this subset.
        :return:
        A measure of the randomness of the distribution of each target class instance within this subset.
        """
        eps=1e-9
        return -(frequencies * np.log2(frequencies + eps)).sum()


class GiniImpurity(SplitCriterionMetric):

    def compute(self, frequencies) -> float:
        """
        Computes the Gini impurity of the target class within a subset of the dataset.
        :param frequencies:
        The frequencies of each class instance within this subset.
        :return:
        A measure of how often a randomly chosen element from the dataset would be incorrectly labelled if
        it was labelled according to the distribution of class instances within this subset.
        """
        return 1 - np.sum(np.square(frequencies))

#### Section 3.2 - Pivot

In [60]:
class Pivot:
    """
    A component class that captures and describes an arbitrary predicate that is
    used as a pivot point for splitting a set of elements.
    i.e.
    elements = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    predicate = lambda e: e <= 5
    pivot = Pivot(predicate, (, ))
    splits = [pivot.split(e) for e in elements]
    [T, T, T, T, T, T, F, F, F, F, F]
    """

    def __init__(self, predicate: Callable[[any], bool], info: Tuple[any, any, str, str]):
        self.__predicate = require(predicate, 'predicate')
        self.__info = require(info, 'info')

    @property
    def predicate(self) -> Callable[[any], bool]:
        """
        A property returning the predicate captured in this pivot.
        """
        return self.__predicate

    def attribute(self) -> any:
        """
        The value of the variable being pivoted upon.
        """
        return self.__info[0]

    def point(self) -> any:
        """
        The value(s) of the pivot point.
        """
        return self.__info[1]

    def true_condition(self) -> str:
        """
        The affirmative textual representation of the predicate.
        """
        return self.__info[2]

    def false_condition(self) -> str:
        """
        The negative textual representation of the predicate.
        :return:
        """
        return self.__info[3]

    def split(self, value: any) -> bool:
        """
        A mechanism for applying the predicate upon an element.
        :param value:
        The value upon which the predicate is applied.
        """
        return self.predicate(value)

    @staticmethod
    def continuous(attribute: Union[str, int], point: Union[int, float]) -> 'Pivot':
        """
        A static factory method for building a pivot that operates upon continuous (numerical)
        values.
        :param attribute:
        The name (str) or index (int) that represents the key of the attribute value
        upon which the pivot is applied within each element of a set of homogenous elements.
        :param point:
        The discrete value that represents the pivot point.
        :return:
        A pivot in the form of: lambda value: value[attribute] <= point
        """
        def predicate(value: any) -> bool:
            return value[attribute] <= point
        return Pivot(predicate=predicate, info=(attribute, point, '<=', '>'))

    def __str__(self, condition: bool = True) -> str:
        """
        The human-intelligible, textual representation of this pivot.
        :param condition:
        The negation of the predicate, if false.
        """
        operator: str = self.true_condition() if condition else self.false_condition()
        return f'x[{self.attribute()}] {operator.ljust(2)} {self.point()}'


class NumericalPivotCandidate:
    """
    A component data-structure for tracking the set of parameters that best splits a
    continuous attribute.
    """

    def __init__(self, feature: Union[str, int], gain: float, probe: float):
        self.__feature = feature
        self.__gain = gain
        self.__probe = probe

    def feature(self) -> Union[str, int]:
        """
        The name (str) or index (int) that represents the key of the attribute that is being
        used as a feature.
        """
        return require(self.__feature, 'feature')

    def gain(self) -> float:
        """
        The gain yielded by this combination of feature and probe value.
        """
        return require(self.__gain, 'gain')

    def probe(self) -> float:
        """
        The best probe value tested thus far.
        """
        return require(self.__probe, 'probe')

    def update(self, feature: int, gain: float, probe: float) -> bool:
        """
        
        :param feature:
        :param gain:
        :param probe:
        :return:
        True if this feature combination yielded a better gain than that yielded
        by a previous combination, otherwise, False.
        """
        if gain < self.gain():
            return False
        self.__feature = feature
        self.__gain = gain
        self.__probe = probe
        return True

    @staticmethod
    def initial() -> 'NumericalPivotCandidate':
        return NumericalPivotCandidate(0, 0, 0.5)

    def __str__(self):
        return f'feature: {self.__feature}, gain: {self.__gain}, probe: {self.__probe}'


In [61]:
class Node(metaclass=abc.ABCMeta):

    @abc.abstractmethod
    def eval(self, element: any) -> any:
        raise NotImplementedError('Node#eval')

    @abc.abstractmethod
    def depth(self, level: int = 0) -> int:
        pass

    def prune(self) -> 'Node':
        return self

    @staticmethod
    def terminate(value: any) -> 'Node':
        return TerminalNode(value=value)

    @staticmethod
    def branch(pivot: 'Pivot', true_branch: Optional['Node'] = None, false_branch: Optional['Node'] = None) -> 'Node':
        return BranchNode(pivot=pivot, true_branch=true_branch, false_branch=false_branch)

    @staticmethod
    def lookup(mapping: Dict[any, 'Node'], feature: any) -> 'Node':
        return LookupNode(mapping=mapping, feature=feature)


class TerminalNode(Node):

    def __init__(self, value: any):
        self.__value = require(value, 'value')

    def eval(self, element: any) -> any:
        return self.value

    def depth(self, level: int = 0) -> int:
        return level

    @property
    def value(self) -> any:
        return require(self.__value, 'value')


class BranchNode(Node):
    """
    TODO: documentation
    """

    def __init__(self, pivot: 'Pivot', true_branch: 'Node', false_branch: 'Node'):
        self.__pivot = require(pivot, 'pivot')
        self.__nodes = np.asarray([false_branch, true_branch])

    def eval(self, element: any) -> any:
        """
        :param element:
        :return:
        """
        # Optimisation: removed the conditional branch to take advantage of speculative processing.
        # previous form: branch: Node = self.true_branch if self.pivot.split(element) else self.false_branch
        index = int(self.pivot.split(element))
        branch: Node = self.nodes[index]
        return branch.eval(element)

    def depth(self, level: int = 0) -> int:
        return max(self.true_branch.depth(level=level + 1), self.false_branch.depth(level=level + 1))

    def prune(self) -> 'Node':
        if isinstance(self.true_branch, TerminalNode) and isinstance(self.false_branch, TerminalNode) and self.true_branch.value == self.false_branch.value:
            return self.true_branch
        return self

    @property
    def pivot(self) -> 'Pivot':
        return self.__pivot

    @property
    def nodes(self):
        return self.__nodes

    @property
    def true_branch(self) -> 'Node':
        return self.nodes[int(True)]

    @property
    def false_branch(self) -> 'Node':
        return self.nodes[int(False)]


class LookupNode(Node):

    __mapping: Dict[any, 'Node']
    __feature: any

    def __init__(self, mapping: Dict[any, 'Node'], feature: any):
        self.__mapping = require(mapping, 'mapping')
        self.__feature = require(feature, 'feature')

    def eval(self, element: any) -> any:
        value = element[self.feature]
        lookup: Node = require(self.mapping[value], 'lookup')
        return lookup.eval(element)

    def depth(self, level: int = 0) -> int:
        return max([node.depth(level=level + 1) for node in self.mapping.values()], default=level + 1)

    def prune(self) -> 'Node':
        size = len(self.mapping)
        if size == 1:
            return next(iter(self.mapping.values()))
        return self

    @property
    def mapping(self) -> Dict[any, 'Node']:
        return self.__mapping

    @property
    def feature(self) -> any:
        return self.__feature


In [62]:
entropy: SplitCriterionMetric = Entropy()
gini: SplitCriterionMetric = GiniImpurity()

class DecisionTreeBuilder(metaclass=abc.ABCMeta):

    @abc.abstractmethod
    def build(self, x, y) -> 'Node':
        raise NotImplementedError('DecisionTreeBuilder#build')

    def entropy(self, attributes, criterion: SplitCriterionMetric = entropy) -> float:
        _, probabilities = value_counts(attributes, normalise=True)
        return criterion.compute(frequencies=probabilities)

    def information_gain_categorical(self, data, target_attribute, feature_attribute):
        total_entropy = self.entropy(data[target_attribute])
        feature_entropy = self.entropy(data[feature_attribute])
        return total_entropy - feature_entropy

    def information_gain_continuous(self, data, target_attribute, feature_attribute, probe):
        size, target, feature = len(data), data[target_attribute], data[feature_attribute]
        total_entropy = self.entropy(target)
        lte, gt = feature <= probe, feature > probe

        lower_entropy = self.entropy(target[lte]) * (np.count_nonzero(lte) / size)
        upper_entropy = self.entropy(target[gt]) * (np.count_nonzero(gt) / size)

        return total_entropy - (lower_entropy + upper_entropy)

    @staticmethod
    def factory(implementation: str, **kwargs) -> 'DecisionTreeBuilder':
        factories = {
            'ID3': ID3DecisionTreeBuilder
        }
        constructor = require(factories.get(implementation, None), implementation)
        return constructor(**kwargs)

    @staticmethod
    def default() -> 'DecisionTreeBuilder':
        return DecisionTreeBuilder.factory('ID3')


class ID3DecisionTreeBuilder(DecisionTreeBuilder):

    # TODO: handle continuous attributes
    # TODO: output classes in original format...
    # TODO: mean value
    def build(self, x, y) -> 'Node':
        data = x.copy()
        data[y.name] = y
        return self._build(original=data, subset=data, features=x.columns, target=y.name)

    def _build(self, original, subset, features, target, parent_class=None) -> 'Node':
        """
        ID3 Algorithm as per: https://en.wikipedia.org/wiki/ID3_algorithm#Algorithm
        """
        classes = np.unique(subset[target])
        choices = len(classes)

        # base case #1
        # Every element of the subset belongs to the same class.
        if choices <= 1:
            return Node.terminate(classes[0])

        # base case #2
        # There are no examples in the subset, which happens when no example in the parent set was found to match a
        # specific value of the selected attribute.
        if len(subset) <= 0:
            return Node.terminate(parent_class)

        majority_class = classes[majority_class_index(elements=subset[target])]

        # base case #3
        # There are no more attributes to be selected, but the examples still do not belong to the same class.
        if len(features) <= 0:
            return Node.terminate(majority_class)

        gains = np.asarray([self.information_gain_categorical(data=subset, target_attribute=target, feature_attribute=feature) for feature in features])

        best_feature = features[np.argmax(gains)]
        attribute = subset[best_feature]

        remaining_features = [feature for feature in features if feature != best_feature]

        subtree: Node

        # noinspection PyBroadException
        try:
            subtree = self._build_continuous(original=original, subset=subset, features=remaining_features, target=target, parent_class=majority_class, best_feature=best_feature, attribute=attribute)
        except:
            subtree = self._build_categorical(original=original, subset=subset, features=remaining_features, target=target, parent_class=majority_class, best_feature=best_feature, attribute=attribute)

        while (pruned := subtree.prune()) != subtree:
            subtree = pruned

        return subtree

    def _build_continuous(self, original, subset, features, target, parent_class, best_feature, attribute) -> 'Node':
        probes = self.create_probe_values(attribute.min(), attribute.max())
        candidate: NumericalPivotCandidate = NumericalPivotCandidate.initial()
        for probe in probes:
            gain = self.information_gain_continuous(data=subset, target_attribute=target, feature_attribute=best_feature, probe=probe)
            candidate.update(feature=best_feature, gain=gain, probe=probe)

        def build_subtree(indices) -> 'Node':
            return self._build(original=original, subset=subset[indices], features=features, target=target, parent_class=parent_class)

        pivot: Pivot = Pivot.continuous(candidate.feature(), candidate.probe())
        true_branch = build_subtree(subset[candidate.feature()] <= candidate.probe())
        false_branch = build_subtree(subset[candidate.feature()] > candidate.probe())

        return Node.branch(pivot=pivot, true_branch=true_branch, false_branch=false_branch)

    def _build_categorical(self, original, subset, features, target, parent_class, best_feature, attribute) -> 'Node':
        values = np.unique(attribute)
        mapping: Dict[any, Node] = {}

        for value in values:
            data = subset.where(subset[best_feature] == value).dropna()
            subtree = self._build(original=original, subset=data, features=features, target=target, parent_class=parent_class)
            mapping[value] = subtree

        return Node.lookup(mapping=mapping, feature=best_feature)

    # # TODO: handle categorical attributes...
    # def build(self, x, y) -> 'Node':
    #     classes = np.unique(y)
    #     choices = len(classes)
    #
    #     if choices <= 0:  # edge-case: no choices
    #         default_value = '<todo:default-value>'  # TODO: get default value
    #         return Node.terminate(default_value)
    #
    #     if choices == 1:  # edge-case: one clear choice
    #         return Node.terminate(classes[0])
    #
    #     candidate: PivotCandidate = PivotCandidate.initial()
    #     attributes = x.shape[1]
    #     for index in range(attributes):
    #         # TODO: handle continuous and categorical attributes
    #         attribute = x[:, index]  # array of all values at that index
    #
    #         print(f'attribute => {attribute}')
    #         # print(f'build')
    #         # print(f'index: {index}, attribute: {attribute}')
    #         probes = ID3DecisionTreeBuilder.create_probe_values(attribute.min(), attribute.max())
    #         # print(f'probes: {probes}')
    #         for probe in probes:
    #             gain = ID3DecisionTreeBuilder.compute_information_gain(y, attribute, probe)
    #             # gain = np.random.random()
    #
    #             # gain = 0.0  # compute_gain(samples, attribute, target)
    #             # gain = self.purity(attribute, probe, x, y)  # TODO: compute information gain
    #             # gain = self.measure_progress(y, attribute, probe)
    #             # gain = self.purity(attribute, )
    #             candidate.update(feature=index, gain=gain, probe=probe)
    #
    #     # TODO: sanity check candidate or build pivot from candidate
    #     pivot: Pivot = Pivot.continuous(candidate.feature(), candidate.probe())
    #     # TODO: use or apply pivot data-structure and make more efficient...
    #     idx_lower = x[:, candidate.feature()] <= candidate.probe()
    #     idx_upper = x[:, candidate.feature()] > candidate.probe()
    #
    #     def build_index(indices) -> Node:
    #         return self.build(x[indices], y[indices])
    #
    #     return Node.branch(pivot, build_index(idx_lower), build_index(idx_upper))
    #
    # @staticmethod
    # def measure_progress(y, attribute, target, criterion: str = 'entropy'):
    #     size = len(y)
    #     lte, gt = attribute <= target, attribute > target
    #     total_e = DecisionTreeBuilder.compute_impurity(y, criterion=criterion)
    #     lower_e = DecisionTreeBuilder.compute_impurity(y[lte], criterion=criterion)
    #     upper_e = DecisionTreeBuilder.compute_impurity(y[gt], criterion=criterion)
    #     lower_w = np.count_nonzero(lte) / size
    #     upper_w = np.count_nonzero(gt) / size
    #
    #     return total_e - (lower_w * lower_e + upper_w * upper_e)
    #
    # @staticmethod
    # def compute_information_gain(samples, attribute, target) -> float:
    #     return ID3DecisionTreeBuilder.measure_progress(samples, attribute, target)
    #     # classes, frequencies = value_counts(samples, normalise=True)
    #     # total: float = DecisionTreeBuilder.compute_impurity(samples=target)
    #     # cumulative: float = 0
    #     # print(f'compute_information_gain')
    #     # print(f'total: {total}')
    #     # print(f'classes: {classes}')
    #     # print(f'frequencies: {frequencies}')
    #     # print(f'samples: {samples}')
    #     # print(f'attribute: {attribute}')
    #     # print(f'target: {target}')
    #     # print()
    #     # print()
    #     # for (value, frequency) in zip(classes, frequencies):
    #     #     print(f'class: {value}, frequency: {frequency}')
    #     #     indices = attribute[attribute <= target]
    #     #     # indices = [0]
    #     #     # indices = attributes[]
    #     #     # indices = samples[attribute] == value
    #     #     # indices = samples[attribute == value]
    #     #     print(f'indices {indices}')
    #     #     # print(f'indices: {indices}')
    #     #     contribution = DecisionTreeBuilder.compute_impurity(target[indices])
    #     #     cumulative += frequency * contribution
    #     # return total - cumulative
    #

    def create_probe_values(self, minima, maxima):
        return [v * minima + (1.0 - v) * maxima for v in [0.75, 0.5, 0.25]]  # TODO: expand values


In [63]:
class Model(metaclass=abc.ABCMeta):

    def compile(self, *args, **kwargs):
        pass

    @abc.abstractmethod
    def fit(self, x, y, *args, **kwargs):
        raise NotImplementedError('Model#fit')

    @abc.abstractmethod
    def predict(self, x, *args, **kwargs):
        raise NotImplementedError('Model#predict')


class DecisionTree(Model):

    # TODO: default value (most common class..?)
    __builder: 'DecisionTreeBuilder' = DecisionTreeBuilder.default()
    __root: Optional['Node'] = None

    def compile(self, *args, **kwargs):
        previous: DecisionTreeBuilder = self.__builder
        try:
            builder = DecisionTreeBuilder.factory(kwargs['implementation'], **kwargs)
        except (KeyError, ValueError):
            builder = previous
        self.__builder = builder

    def fit(self, x, y, *args, **kwargs):
        self.__root = self.builder.build(x, y)

    def predict(self, x, *args, **kwargs):
        tree: Node = self.root
        samples = x.to_dict(orient='records')
        return np.asarray([tree.eval(sample) for sample in samples])

    @property
    def builder(self) -> 'DecisionTreeBuilder':
        return require(self.__builder, 'builder')

    @property
    def root(self) -> 'Node':
        return require(self.__root, 'root')

    def depth(self) -> int:
        return 0 if self.root is None else self.root.depth()


### Section 4 - Implementation

In [64]:
def debug_tree(node, depth: int = 0, size: int = 1):
    padding = '|' + ('--' * depth) + ' '

    def p(o):
        print(f'{str(depth).ljust(2)} {padding} {o}')

    if isinstance(node, TerminalNode):
        p(f'class: {node.value}')
    elif isinstance(node, BranchNode):
        p(f'truthy pivot: {node.pivot.__str__(condition=True)}')
        debug_tree(node.true_branch, depth=depth+size, size=size)
        p(f'falsey pivot: {node.pivot.__str__(condition=False)}')
        debug_tree(node.false_branch, depth=depth+size, size=size)
    elif isinstance(node, LookupNode):
        for (k, v) in node.mapping.items():
            p(f'lookup: {node.feature} {k}')
            debug_tree(v, depth=depth+size, size=size)
    else:
        raise ValueError(f'Unexpected node: {node}')


model: DecisionTree = DecisionTree()
[xxx, yyy] = dataset(return_X_y=True, as_frame=True)

# ddd = {
#     'wind_direction': ['N', 'S', 'E', 'W'],
#     'tide': ['Low', 'High'],
#     'swell_forecasting': ['small', 'medium', 'large'],
#     'good_waves': ['Yes', 'No'],
# }
#
# # create an empty dataframe
# df = pd.DataFrame(columns=ddd.keys())
#
# np.random.seed(42)
# for i in range(len(xxx)):
#     df.loc[i, 'wind_direction'] = str(np.random.choice(ddd['wind_direction'], 1)[0])
#     df.loc[i, 'tide'] = str(np.random.choice(ddd['tide'], 1)[0])
#     df.loc[i, 'swell_forecasting'] = str(np.random.choice(ddd['swell_forecasting'], 1)[0])
#     df.loc[i, 'good_waves'] = str(np.random.choice(ddd['good_waves'], 1)[0])
#     df.loc[i, 'temp'] = int(np.random.random() * 26) + 1
#     df.loc[i, 'hello'] = 'world'
#     for attr in xxx:
#         df.loc[i, attr] = xxx.iloc[i][attr]
#
# # xxx = df.drop('good_waves', 1)
# xxx = df
# # yyy = df['good_waves']

print(f'Data\n{xxx}\n')
print(f'Targets\n{yyy}\n')

model.fit(xxx, yyy)

print(f'MAX DEPTH: {model.depth()}')
debug_tree(model.root, 1, 1)

predictions = model.predict(xxx)

accuracy = accuracy_score(yyy, predictions)

print(f'Predictions\n{predictions}')
print(f'Actual\n{yyy}')
print(f'Accuracy: {accuracy}')

dt = DecisionTreeClassifier()
dt.fit(xxx, yyy)
p = dt.predict(xxx)

acc = accuracy_score(yyy, p)

print(f'Predictions\n{p}')
print(f'Actual\n{yyy}')
print(f'Accuracy: {acc}')

# differences = predictions.difference(p)
# differences = np.nonzero(predictions - p)

# differences = [f'Index: {idx}, Ours: {predictions[idx]}, Sklearns: {p[idx]}' for idx in index for index in np.nonzero(predictions - p)]
diff = np.asarray(np.nonzero(predictions - p))
ours = predictions[diff]
theirs = p[diff]

differences = [f'Index: {index}, Ours: {us}, Theirs: {them}' for (index, us, them) in zip(diff, ours, theirs)]

print(differences)

# TODO: python 3.6 compatibility (for google colab)
# TODO: test_train_split / sampling
# TODO: CRISP-DM
# TODO: cleanup
# TODO: documentation
# TODO: dummy dataset (demonstrating edge cases) and then proper dataset (demonstrating capabilities)

# interface Predicate:
#     apply(element: any) -> bool;
#
# interface Node:
#     eval(element: any) -> any;
#
# class BranchNode implements Node:
#     condition: Predicate;
#     left, right: Node;
#
#     eval(element: any) -> any:
#         if condition.apply(element):
#             return left.eval(element);
#         else:
#             return right.eval(element);
#
# class LeafNode implements Node:
#     value: any;
#
#     eval(element: any) -> any:
#         return value;

Data
     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                  5.1               3.5                1.4               0.2
1                  4.9               3.0                1.4               0.2
2                  4.7               3.2                1.3               0.2
3                  4.6               3.1                1.5               0.2
4                  5.0               3.6                1.4               0.2
..                 ...               ...                ...               ...
145                6.7               3.0                5.2               2.3
146                6.3               2.5                5.0               1.9
147                6.5               3.0                5.2               2.0
148                6.2               3.4                5.4               2.3
149                5.9               3.0                5.1               1.8

[150 rows x 4 columns]

Targets
0      0
1      0
2      0