In [1]:
import numpy as np
import pandas as pd

# Árbol de decisión
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Laboratorio 8: Random Forest y despliegues

**Duración:** 2 horas  
**Formato:** Implementación, despliegue y competencia  

---

## Portada del equipo

**Integrantes:**
- Nombre 1 (Usuario GitHub)
- Nombre 2 (Usuario GitHub)
- Nombre 3 (Usuario GitHub)

**Repositorio del equipo:**  
<https://github.com/usuario/equipoX>

**Fecha de entrega:**  
__/__/____

## Elemento 1 - Implementación del Random Forest

In [2]:
df=pd.read_csv('iris_train.csv')
X,y=df.iloc[:,:-1].values,df.iloc[:,-1].values

In [3]:
class RandomForest:
  def __init__(self, n_estimators=100, max_depth='sqrt', random_state=17):
    self.n_estimators = n_estimators
    self.max_depth = max_depth
    self.random_state = random_state
    self.trees = []

  def bootstrap(self, X, y):
    n_samples = len(X)
    idxs = np.random.choice(n_samples, n_samples, replace=True)
    return X[idxs], y[idxs]

  def fit(self, X, y):
    self.trees = []
    if self.max_depth == 'sqrt':
      depth = int(np.sqrt(len(X[0])))
    elif self.max_depth == 'log2':
      depth = int(np.log2(len(X[0])))
    else:
      depth = int(np.sqrt(len(X[0])))
    for i in range(self.n_estimators):
      tree = DecisionTreeClassifier(max_depth=depth, random_state=self.random_state+i)
      X_sample, y_sample = self.bootstrap(X, y)
      tree.fit(X_sample, y_sample)
      self.trees.append(tree)

  def predict(self, X):
    tree_preds = np.array([tree.predict(X) for tree in self.trees])
    return np.array([np.argmax(np.bincount(tree_preds[:, i])) for i in range(len(X))])


  def fit_predict(self, X, y):
    self.fit(X, y)
    return self.predict(X)


In [4]:
df.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
count,123.0,123.0,124.0,120.0,125.0
mean,5.821221,2.764442,3.909994,1.186667,0.984
std,2.428445,2.174626,2.484749,0.758474,0.822898
min,-11.601111,-14.870849,1.1,0.1,0.0
25%,5.1,2.7,1.6,0.3,0.0
50%,5.7,3.0,4.25,1.3,1.0
75%,6.4,3.3,5.1,1.8,2.0
max,24.111271,4.4,23.439238,2.5,2.0


In [5]:
df[(df["sepal length (cm)"]>10) | (df["sepal length (cm)"]<0)]

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
22,-11.601111,3.8,6.4,2.0,2
97,24.111271,2.3,4.4,1.3,1


In [6]:
# Importar knn
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.impute import KNNImputer

# Rellenar datos atípicos usando el promedio de su clase
df=pd.read_csv('iris_train.csv')
df[(df > 10) | (df < 0)] = None

# Rellenar nulos con KNN por clase
df_filled = df.copy()

for clase in df.iloc[:, -1].unique():
    mascara = df.iloc[:, -1] == clase
    imputer = KNNImputer(n_neighbors=5)
    df_filled.loc[mascara, df.columns[:-1]] = imputer.fit_transform(df.loc[mascara, df.columns[:-1]])


In [7]:
df_filled.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
count,125.0,125.0,125.0,125.0,125.0
mean,5.82096,3.04176,3.75312,1.19616,0.984
std,0.817912,0.446658,1.766489,0.754991,0.822898
min,4.3,2.0,1.1,0.1,0.0
25%,5.1,2.8,1.6,0.3,0.0
50%,5.7,3.0,4.2,1.3,1.0
75%,6.4,3.3,5.1,1.8,2.0
max,7.7,4.4,6.9,2.5,2.0


In [8]:
X,y=df_filled.iloc[:,:-1].values,df_filled.iloc[:,-1].values

rf=RandomForest(n_estimators=100, max_depth='sqrt', random_state=17)
rf.fit(X,y)

y_hat=rf.predict(X)
accuracy_score(y,y_hat)

0.952

In [9]:
import pickle

with open("modelo.pkl", "wb") as f:
    pickle.dump(rf, f)

### Elemento 1 - Preguntas teóricas

## Elemento 2 - Comparativa con scikit-learn

In [10]:
# comparar con sklearn
from sklearn.ensemble import RandomForestClassifier
rf_sklearn=RandomForestClassifier(n_estimators=100, max_depth=None, random_state=17)
rf_sklearn.fit(X,y)

y_hat=rf_sklearn.predict(X)
accuracy_score(y,y_hat)

1.0

### Elemento 2 - Preguntas teóricas

## Elemento 3 - Creación y despliegue de la API

### Elemento 3 - Preguntas teóricas