# Ejercicio 3: Primera prueba con RandomForest

In [39]:
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist

# Cargar el dataset completo
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Seleccionar solo las primeras 1000 imÃ¡genes y etiquetas
X_train_small = X_train[:10000]
y_train_small = y_train[:10000]

X_test_small = X_test[:2000]   # TambiÃ©n podÃ©s reducir el test si querÃ©s
y_test_small = y_test[:2000]

print("Shape del set de entrenamiento:", X_train_small.shape)
print("Shape del set de prueba:", X_test_small.shape)

Shape del set de entrenamiento: (10000, 28, 28)
Shape del set de prueba: (2000, 28, 28)


### Preprocesamiento

In [49]:
# Aplanar las imÃ¡genes de 28x28 a 784 caracterÃ­sticas (1D)
X_train_flattened = X_train_small.reshape(X_train_small.shape[0], -1)
X_test_flattened = X_test_small.reshape(X_test_small.shape[0], -1)

print("Shape del set de entrenamiento:", X_train_flattened.shape)
print("Shape del set de prueba:", X_test_flattened.shape)

# Escalar los valores de los pÃ­xeles entre 0 y 1
X_train_normalized = X_train_flattened / 255.0
X_test_normalized = X_test_flattened / 255.0

from sklearn.preprocessing import Binarizer

binarizer = Binarizer(threshold=127)
X_train_binarized = binarizer.fit_transform(X_train_flattened)
X_test_binarized = binarizer.transform(X_test_flattened)

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_standardized = scaler.fit_transform(X_train_flattened)
X_test_standardized = scaler.transform(X_test_flattened)

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train_flattened)
X_test_scaled = scaler.transform(X_test_flattened)

Shape del set de entrenamiento: (10000, 784)
Shape del set de prueba: (2000, 784)


### Entrenamiento de RandomForest normalizado

In [50]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Diccionario de datasets a probar
datasets = {
    "Raw (sin normalizar)": X_train_flattened,
    "Normalizado (/255)": X_train_normalized,
    "Binarizado": X_train_binarized,
    "Estandarizado (Z-score)": X_train_standardized,
    "Min-Max Scaling": X_train_scaled
}

test_sets = {
    "Raw (sin normalizar)": X_test_flattened,
    "Normalizado (/255)": X_test_normalized,
    "Binarizado": X_test_binarized,
    "Estandarizado (Z-score)": X_test_standardized,
    "Min-Max Scaling": X_test_scaled
}

# Ejecutar prueba para cada forma de normalizaciÃ³n
for nombre, X_train_version in datasets.items():
    print(f"\nðŸ”Ž Evaluando: {nombre}")
    
    model = RandomForestClassifier(n_estimators=150, random_state=42)
    model.fit(X_train_version, y_train_small)
    
    X_test_version = test_sets[nombre]
    y_pred = model.predict(X_test_version)
    
    acc = accuracy_score(y_test_small, y_pred)
    print(f"âœ… PrecisiÃ³n: {acc * 100:.2f}%")



ðŸ”Ž Evaluando: Raw (sin normalizar)
âœ… PrecisiÃ³n: 93.35%

ðŸ”Ž Evaluando: Normalizado (/255)
âœ… PrecisiÃ³n: 93.35%

ðŸ”Ž Evaluando: Binarizado
âœ… PrecisiÃ³n: 92.30%

ðŸ”Ž Evaluando: Estandarizado (Z-score)
âœ… PrecisiÃ³n: 93.35%

ðŸ”Ž Evaluando: Min-Max Scaling
âœ… PrecisiÃ³n: 93.35%
