<a href="https://colab.research.google.com/github/MarianiPedro/topicos_especiais_ANN/blob/main/ClassificarMNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import urllib.request, gzip, struct, numpy as np
import os
from google.colab import drive

FOLDERNAME = 'Colab Notebooks/ANN_2025_1/'
drive.mount("/content/drive", force_remount=True)
cache_dir = f"/content/drive/My Drive/{FOLDERNAME}"
os.makedirs(cache_dir, exist_ok=True)

def download_and_parse_mnist(cache_dir=cache_dir):
    base = "https://raw.githubusercontent.com/fgnt/mnist/master/"  #http://yann.lecun.com/exdb/mnist - apresentando erro
    files = {
        "train_images":"train-images-idx3-ubyte.gz",
        "train_labels":"train-labels-idx1-ubyte.gz",
        "test_images":"t10k-images-idx3-ubyte.gz",
        "test_labels":"t10k-labels-idx1-ubyte.gz"
    }
    paths = {}

    for key, fname in files.items():
        local_path = os.path.join(cache_dir, fname)
        if not os.path.exists(local_path):
            print(f"Baixando {fname} ...")
            urllib.request.urlretrieve(base + fname, local_path)
        else:
            print(f"Usando cache de {fname}")
        paths[key] = local_path

    def load_images(fname):
        with gzip.open(fname, 'rb') as f:
            magic, n, rows, cols = struct.unpack(">IIII", f.read(16))
            return np.frombuffer(f.read(), dtype=np.uint8).reshape(n, rows, cols)

    def load_labels(fname):
        with gzip.open(fname, 'rb') as f:
            magic, n = struct.unpack(">II", f.read(8))
            return np.frombuffer(f.read(), dtype=np.uint8)
    return load_images(paths["train_images"]), load_labels(paths["train_labels"]), \
           load_images(paths["test_images"]), load_labels(paths["test_labels"])

X_train, y_train, X_test, y_test = download_and_parse_mnist()
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)


Mounted at /content/drive
Baixando train-images-idx3-ubyte.gz ...
Baixando train-labels-idx1-ubyte.gz ...
Baixando t10k-images-idx3-ubyte.gz ...
Baixando t10k-labels-idx1-ubyte.gz ...
(60000, 28, 28) (60000,) (10000, 28, 28) (10000,)


In [12]:
import torch, torch.nn as nn, torch.nn.functional as F, torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Usando:", device)

Usando: cpu


In [13]:
class Normalize(TransformerMixin, BaseEstimator):
    def fit(self, X, y=None): return self
    def transform(self, X): return X / 255.0

class AddChannel(TransformerMixin, BaseEstimator):
    def fit(self, X, y=None): return self
    def transform(self, X): return X.reshape(-1,1,28,28).astype(np.float32)

In [17]:
class DeepCNN(nn.Module):
    def __init__(self, input_shape, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(input_shape[0], 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.flatten = nn.Flatten()

        # Calculate the size of the flattened layer dynamically
        # Create a dummy tensor to pass through the layers
        dummy_input = torch.zeros(1, *input_shape)
        x = self.pool(F.relu(self.conv1(dummy_input)))
        x = self.pool(F.relu(self.conv2(x)))
        x = F.relu(self.conv3(x))
        flattened_size = self.flatten(x).size(1)

        self.fc = nn.Linear(flattened_size, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = self.flatten(x)
        return self.fc(x)

In [15]:
class TorchCNNClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, epochs=5, batch_size=128):
        self.epochs = epochs
        self.batch_size = batch_size

    def fit(self, X, y):
        X = torch.from_numpy(X).float()
        y = torch.from_numpy(y).long()
        self.classes_, y_ids = torch.unique(y, return_inverse=True)
        ds = TensorDataset(X, y_ids)
        loader = DataLoader(ds, batch_size=self.batch_size, shuffle=True)
        self.model = DeepCNN(X.shape[1:], len(self.classes_)).to(device)
        opt = optim.Adam(self.model.parameters(), lr=1e-3)
        loss_fn = nn.CrossEntropyLoss()
        for ep in range(self.epochs):
            for xb, yb in loader:
                xb, yb = xb.to(device), yb.to(device)
                opt.zero_grad()
                logits = self.model(xb)
                loss = loss_fn(logits, yb)
                loss.backward()
                opt.step()
            print(f"Epoca {ep+1}/{self.epochs}, Loss final: {loss.item():.4f}")
        return self

    def predict(self, X):
        X = torch.from_numpy(X).float().to(device)
        with torch.no_grad():
            logits = self.model(X)
            ids = logits.argmax(dim=1).cpu()
        return self.classes_[ids.numpy()]

In [18]:
pipeline = Pipeline([
    ("norm", Normalize()),
    ("channel", AddChannel()),
    ("cnn", TorchCNNClassifier(epochs=5))
])

pipeline.fit(X_train, y_train)
ypred = pipeline.predict(X_test)
print("Acurácia:", accuracy_score(y_test, ypred))

Epoca 1/5, Loss final: 0.0987
Epoca 2/5, Loss final: 0.0190
Epoca 3/5, Loss final: 0.0941
Epoca 4/5, Loss final: 0.0269
Epoca 5/5, Loss final: 0.0075
Acurácia: 0.989
