In [13]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from lazypredict.Supervised import LazyClassifier, LazyRegressor
from torchvision import datasets, transforms
import time

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.base import clone
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

from tqdm.notebook import tqdm 

In [2]:
import joblib

In [14]:
transform = transforms.Compose([transforms.ToTensor()])
mnist_train = datasets.MNIST(root='./data',train=True,download=True,transform=transform)
mnist_test = datasets.MNIST(root='./data',train=False,download=True,transform=transform)
X_train = mnist_train.data.numpy().reshape(-1, 28*28) / 255.0 
Y_train = mnist_train.targets.numpy()

X_test = mnist_test.data.numpy().reshape(-1, 28*28) / 255.0
Y_test = mnist_test.targets.numpy()

## 1. Variamos criterios

In [5]:
def evaluar_criterios(X_train, Y_train, X_test, Y_test):
    criterios = ['gini', 'entropy', 'log_loss']
    for criterio in criterios:
        rf = RandomForestClassifier(criterion=criterio, random_state=42)
        rf.fit(X_train, Y_train)
        Y_pred = rf.predict(X_test)
        accuracy = accuracy_score(Y_test, Y_pred)
        print(f"Criterio '{criterio}' | Accuracy: {acc:.4f}")

evaluar_criterios(X_train, Y_train, X_test, Y_test)

Criterio 'gini' | Accuracy: 0.9704
Criterio 'entropy' | Accuracy: 0.9686
Criterio 'log_loss' | Accuracy: 0.9686


## 2. Variamos numero de estimadores

In [10]:
def evaluar_estimators(X_train, Y_train, X_test, Y_test):
    estimators = [10, 50]
    for n in estimators:
        rf = RandomForestClassifier(criterion='gini', n_estimators=n, random_state=42)
        rf.fit(X_train, Y_train)
        Y_pred = rf.predict(X_test)
        acc = accuracy_score(Y_test, Y_pred)
        print(f"n_estimators={n} | Accuracy: {acc:.4f}")
        
evaluar_estimators(X_train, Y_train, X_test, Y_test)

n_estimators=10 | Accuracy: 0.9492
n_estimators=50 | Accuracy: 0.9668


In [15]:
def evaluar_estimators(X_train, Y_train, X_test, Y_test):
    estimators = [100,200,500]
    for n in estimators:
        rf = RandomForestClassifier(criterion='gini', n_estimators=n, random_state=42)
        rf.fit(X_train, Y_train)
        Y_pred = rf.predict(X_test)
        acc = accuracy_score(Y_test, Y_pred)
        print(f"n_estimators={n} | Accuracy: {acc:.4f}")
        
evaluar_estimators(X_train, Y_train, X_test, Y_test)

n_estimators=100 | Accuracy: 0.9704
n_estimators=200 | Accuracy: 0.9707
n_estimators=500 | Accuracy: 0.9712


In [16]:
def evaluar_estimators(X_train, Y_train, X_test, Y_test):
    estimators = [750, 1000]
    for n in estimators:
        rf = RandomForestClassifier(criterion='gini', n_estimators=n, random_state=42)
        rf.fit(X_train, Y_train)
        Y_pred = rf.predict(X_test)
        acc = accuracy_score(Y_test, Y_pred)
        print(f"n_estimators={n} | Accuracy: {acc:.4f}")
        
evaluar_estimators(X_train, Y_train, X_test, Y_test)

n_estimators=750 | Accuracy: 0.9718
n_estimators=1000 | Accuracy: 0.9725


## 3. Variamos profundidad maxima

In [20]:
def evaluar_profundidad_max(X_train, Y_train, X_test, Y_test):
    depthMax = [5,10,15,20,30,None]
    for n in depthMax:
        rf = RandomForestClassifier(criterion='gini', n_estimators=500, max_depth=n, random_state=42)
        rf.fit(X_train, Y_train)
        Y_pred = rf.predict(X_test)
        acc = accuracy_score(Y_test, Y_pred)
        print(f"n_estimators={n} | Accuracy: {acc:.4f}")
        
evaluar_profundidad_max(X_train, Y_train, X_test, Y_test)

n_estimators=5 | Accuracy: 0.8673
n_estimators=10 | Accuracy: 0.9521
n_estimators=15 | Accuracy: 0.9682
n_estimators=20 | Accuracy: 0.9707
n_estimators=30 | Accuracy: 0.9714
n_estimators=None | Accuracy: 0.9712
