<a href="https://colab.research.google.com/github/Mariyselita/CIP/blob/main/Tarea8_Iris_Tree.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.6-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.6


In [2]:
import pandas as pd
from ucimlrepo import fetch_ucirepo
import numpy as np
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split

In [3]:
iris = fetch_ucirepo(id=53)

In [4]:
X = iris.data.features
y = iris.data.targets.values.flatten()

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
X_train_dense = X_train.to_numpy()
X_test_dense = X_test.to_numpy()

In [7]:
class DecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth

    def fit(self, X, y):
        self.tree = self._grow_tree(X, y)

    def _grow_tree(self, X, y, depth=0):
        if self.max_depth is not None and depth >= self.max_depth:
            return {'label': max(set(y), key=list(y).count)}
        if len(np.unique(y)) == 1:
            return {'label': y[0]}

        best_split = self._find_best_split(X, y)
        if best_split is None:
            return {'label': max(set(y), key=list(y).count)}

        left_idxs = X[:, best_split['feature']] < best_split['threshold']
        right_idxs = ~left_idxs

        if np.sum(left_idxs) == 0 or np.sum(right_idxs) == 0:
            return {'label': max(set(y), key=list(y).count)}

        left_subtree = self._grow_tree(X[left_idxs], y[left_idxs], depth + 1)
        right_subtree = self._grow_tree(X[right_idxs], y[right_idxs], depth + 1)

        return {'feature': best_split['feature'], 'threshold': best_split['threshold'], 'left': left_subtree, 'right': right_subtree}

    def _find_best_split(self, X, y):
        best_gini = 1.0
        best_split = None
        n_features = X.shape[1]

        for feature in range(n_features):
            thresholds = np.percentile(X[:, feature], [10, 25, 50, 75, 90])
            for threshold in thresholds:
                left_idxs = X[:, feature] < threshold
                if np.sum(left_idxs) == 0 or np.sum(left_idxs) == len(y):
                    continue
                gini = self._gini_impurity(y[left_idxs]) * np.mean(left_idxs) + \
                       self._gini_impurity(y[~left_idxs]) * np.mean(~left_idxs)
                if gini < best_gini:
                    best_gini = gini
                    best_split = {'feature': feature, 'threshold': threshold}
        return best_split

    def _gini_impurity(self, y):
        class_probs = np.array([np.mean(y == c) for c in np.unique(y)])
        return 1 - np.sum(class_probs ** 2)

    def predict(self, X):
        return np.array([self._predict_tree(x, self.tree) if self.tree is not None else None for x in X])

    def _predict_tree(self, x, tree):
        if 'label' in tree:
            return tree['label']
        if x[tree['feature']] < tree['threshold']:
            return self._predict_tree(x, tree['left'])
        else:
            return self._predict_tree(x, tree['right'])

In [8]:
tree = DecisionTree(max_depth=3)
tree.fit(X_train_dense, y_train)
y_pred = tree.predict(X_test_dense)

In [9]:
accuracy = accuracy_score(y_test, y_pred)
print("Precisión del modelo:", accuracy)
print("Reporte de clasificación:")
print(classification_report(y_test, y_pred))

Precisión del modelo: 1.0
Reporte de clasificación:
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30

