In [1]:
import math
import random
from collections import Counter

class DecisionTree:
    def __init__(self, max_depth=5, min_samples_split=2):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.tree = None

    def fit(self, X, y):
        self.tree = self._build_tree(X, y)

    def predict(self, X):
        return [self._predict_single(x, self.tree) for x in X]

    def _build_tree(self, X, y, depth=0):
        n_samples = len(X)
        n_features = len(X[0]) if n_samples > 0 else 0

        # Criterios de parada
        if (depth >= self.max_depth or
            n_samples < self.min_samples_split or
            len(set(y)) == 1):
            return {'class': Counter(y).most_common(1)[0][0]}

        # Selección de características y punto de división
        best_feature, best_threshold = self._best_split(X, y)

        if best_feature is None:
            return {'class': Counter(y).most_common(1)[0][0]}

        # Dividir el conjunto de datos
        left_X, left_y, right_X, right_y = [], [], [], []
        for i in range(n_samples):
            if X[i][best_feature] <= best_threshold:
                left_X.append(X[i])
                left_y.append(y[i])
            else:
                right_X.append(X[i])
                right_y.append(y[i])

        # Construir subárboles recursivamente
        left_subtree = self._build_tree(left_X, left_y, depth + 1)
        right_subtree = self._build_tree(right_X, right_y, depth + 1)

        return {
            'feature': best_feature,
            'threshold': best_threshold,
            'left': left_subtree,
            'right': right_subtree
        }

    def _best_split(self, X, y):
        best_gini = float('inf')
        best_feature = None
        best_threshold = None

        n_features = len(X[0]) if len(X) > 0 else 0

        # Probar características aleatorias (para Random Forest)
        features = random.sample(range(n_features), min(int(math.sqrt(n_features)), n_features))

        for feature in features:
            thresholds = set([x[feature] for x in X])
            for threshold in thresholds:
                left_y, right_y = [], []
                for i in range(len(X)):
                    if X[i][feature] <= threshold:
                        left_y.append(y[i])
                    else:
                        right_y.append(y[i])

                if not left_y or not right_y:
                    continue

                gini = self._gini_index(left_y, right_y)

                if gini < best_gini:
                    best_gini = gini
                    best_feature = feature
                    best_threshold = threshold

        return best_feature, best_threshold

    def _gini_index(self, left_y, right_y):
        n_left = len(left_y)
        n_right = len(right_y)
        n_total = n_left + n_right

        gini_left = 1.0 - sum((left_y.count(c) / n_left) ** 2 for c in set(left_y))
        gini_right = 1.0 - sum((right_y.count(c) / n_right) ** 2 for c in set(right_y))

        return (n_left / n_total) * gini_left + (n_right / n_total) * gini_right

    def _predict_single(self, x, node):
        if 'class' in node:
            return node['class']

        if x[node['feature']] <= node['threshold']:
            return self._predict_single(x, node['left'])
        else:
            return self._predict_single(x, node['right'])


class RandomForest:
    def __init__(self, n_estimators=10, max_depth=5, min_samples_split=2):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.trees = []

    def fit(self, X, y):
        self.trees = []
        n_samples = len(X)

        for _ in range(self.n_estimators):
            # Bootstrap sampling
            indices = [random.randint(0, n_samples-1) for _ in range(n_samples)]
            X_sample = [X[i] for i in indices]
            y_sample = [y[i] for i in indices]

            tree = DecisionTree(max_depth=self.max_depth,
                               min_samples_split=self.min_samples_split)
            tree.fit(X_sample, y_sample)
            self.trees.append(tree)

    def predict(self, X):
        predictions = []
        for x in X:
            votes = [tree.predict([x])[0] for tree in self.trees]
            majority_vote = Counter(votes).most_common(1)[0][0]
            predictions.append(majority_vote)
        return predictions


# Ejemplo de uso
if __name__ == "__main__":
    # Datos de ejemplo (OR)
    X = [[0, 0], [0, 1], [1, 0], [1, 1]]
    y = [0, 1, 1, 1]

    # Crear y entrenar el Random Forest
    rf = RandomForest(n_estimators=5, max_depth=3)
    rf.fit(X, y)

    # Predecir
    test_X = [[0, 0], [0, 1], [1, 0], [1, 1]]
    predictions = rf.predict(test_X)

    print("Predicciones:", predictions)
    print("Real:", y)

Predicciones: [0, 1, 1, 1]
Real: [0, 1, 1, 1]


In [5]:
import math

# Compatibilidad entre Python normal y MicroPython
try:
    import urandom as random
except ImportError:
    import random

# Contador simple
def contar(lista):
    conteo = {}
    for item in lista:
        if item in conteo:
            conteo[item] += 1
        else:
            conteo[item] = 1
    return conteo

def mayoritario(lista):
    conteo = contar(lista)
    max_cuenta = -1
    clase = None
    for k in conteo:
        if conteo[k] > max_cuenta:
            max_cuenta = conteo[k]
            clase = k
    return clase

class DecisionTree:
    def __init__(self, max_depth=5, min_samples_split=2):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.tree = None

    def fit(self, X, y):
        self.tree = self._build_tree(X, y)

    def predict(self, X):
        return [self._predict_single(x, self.tree) for x in X]

    def _build_tree(self, X, y, depth=0):
        n_samples = len(X)
        n_features = len(X[0]) if n_samples > 0 else 0

        if (depth >= self.max_depth or n_samples < self.min_samples_split or len(set(y)) == 1):
            return {'class': mayoritario(y)}

        best_feature, best_threshold = self._best_split(X, y)

        if best_feature is None:
            return {'class': mayoritario(y)}

        left_X, left_y, right_X, right_y = [], [], [], []
        for i in range(n_samples):
            if X[i][best_feature] <= best_threshold:
                left_X.append(X[i])
                left_y.append(y[i])
            else:
                right_X.append(X[i])
                right_y.append(y[i])

        left_subtree = self._build_tree(left_X, left_y, depth + 1)
        right_subtree = self._build_tree(right_X, right_y, depth + 1)

        return {
            'feature': best_feature,
            'threshold': best_threshold,
            'left': left_subtree,
            'right': right_subtree
        }

    def _best_split(self, X, y):
        best_gini = 1
        best_feature = None
        best_threshold = None

        n_features = len(X[0]) if len(X) > 0 else 0
        m = min(int(math.sqrt(n_features)) + 1, n_features)

        features = []
        while len(features) < m:
            f = random.getrandbits(8) % n_features
            if f not in features:
                features.append(f)

        for feature in features:
            thresholds = []
            for i in range(len(X)):
                val = X[i][feature]
                if val not in thresholds:
                    thresholds.append(val)

            for threshold in thresholds:
                left_y, right_y = [], []
                for i in range(len(X)):
                    if X[i][feature] <= threshold:
                        left_y.append(y[i])
                    else:
                        right_y.append(y[i])

                if not left_y or not right_y:
                    continue

                gini = self._gini_index(left_y, right_y)
                if gini < best_gini:
                    best_gini = gini
                    best_feature = feature
                    best_threshold = threshold

        return best_feature, best_threshold

    def _gini_index(self, left_y, right_y):
        n_left = len(left_y)
        n_right = len(right_y)
        n_total = n_left + n_right

        def gini(y_list):
            clases = set(y_list)
            score = 1.0
            for c in clases:
                p = y_list.count(c) / len(y_list)
                score -= p * p
            return score

        return (n_left / n_total) * gini(left_y) + (n_right / n_total) * gini(right_y)

    def _predict_single(self, x, node):
        if 'class' in node:
            return node['class']
        if x[node['feature']] <= node['threshold']:
            return self._predict_single(x, node['left'])
        else:
            return self._predict_single(x, node['right'])

class RandomForest:
    def __init__(self, n_estimators=5, max_depth=5, min_samples_split=2):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.trees = []

    def fit(self, X, y):
        self.trees = []
        n_samples = len(X)
        for _ in range(self.n_estimators):
            indices = []
            for _ in range(n_samples):
                indices.append(random.getrandbits(8) % n_samples)
            X_sample = [X[i] for i in indices]
            y_sample = [y[i] for i in indices]
            tree = DecisionTree(max_depth=self.max_depth, min_samples_split=self.min_samples_split)
            tree.fit(X_sample, y_sample)
            self.trees.append(tree)

    def predict(self, X):
        predictions = []
        for x in X:
            votes = []
            for tree in self.trees:
                vote = tree.predict([x])[0]
                votes.append(vote)
            predictions.append(mayoritario(votes))
        return predictions

# Prueba simple
if __name__ == '__main__':
    X = [[0, 0], [0, 1], [1, 0], [1, 1]]
    y = [0, 1, 1, 1]

    rf = RandomForest(n_estimators=5, max_depth=3)
    rf.fit(X, y)

    test_X = [[0, 0], [0, 1], [1, 0], [1, 1]]
    pred = rf.predict(test_X)

    print("Predicciones:", pred)
    print("Reales:", y)
    print("")
    print("Jannet Ortiz Aguilar")


Predicciones: [0, 1, 1, 1]
Reales: [0, 1, 1, 1]

Jannet Ortiz Aguilar
