In [1]:
import torch
import torchvision

In [2]:
print("torch:", torch.__version__)

torch: 2.3.0


In [3]:
from torchvision.datasets import MNIST

In [4]:
from torchvision.transforms import Lambda, ToTensor, Compose
trasform = Compose([
    ToTensor(),
    Lambda(lambda image: image/255),
    Lambda(lambda image: image.view(784))
])

data_train = MNIST(root="./", download=True, train=True, transform=trasform)
data_test = MNIST(root="./", download=True, train=False, transform=trasform)

In [5]:
data_train[0][0].shape

torch.Size([784])

In [6]:
from torch import nn, optim

class Model(nn.Module):
    def __init__(self, sizes):      #[128, 64, 10]
        super().__init__()
        self.hidden_layer_1 = nn.Linear(784,sizes[0])
        self.act1= nn.Sigmoid()
        self.hidden_layer_2 = nn.Linear(sizes[0], sizes[1])
        self.act2 = nn.Sigmoid()
        self.output_layer = nn.Linear(sizes[1], sizes[2])
        self.act3 = nn.Softmax(dim=1)

        self.loss = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.parameters())

    
    def forward(self, inputs):
        x = self.hidden_layer_1(inputs)
        x = self.act1(x)
        x = self.hidden_layer_2(x)
        x = self.act2(x)
        x = self.output_layer(x)
        x = self.act3(x)
        return x

    def fit(self, X, Y):
        self.optimizer.zero_grad()
        y_pred = self.forward(X)
        loss = self.loss(y_pred, Y)
        loss.backward()
        self.optimizer.step()
        return loss.item()
    
    def predict(self, X):
        with torch.no_grad():
            return torch.argmax(self.forward(X), axis=1)
        
    
    def evaluate(self, test_dataloader):
        correct = 0
        for X, Y in test_dataloader:
            y_pred = self.predict(X)
            correct += (Y==y_pred).sum()
        acc = correct/(len(test_dataloader)*16)
        print(f"Accuracy : {acc}")

In [7]:
model = Model([128, 64, 10])

In [8]:
print(model)

Model(
  (hidden_layer_1): Linear(in_features=784, out_features=128, bias=True)
  (act1): Sigmoid()
  (hidden_layer_2): Linear(in_features=128, out_features=64, bias=True)
  (act2): Sigmoid()
  (output_layer): Linear(in_features=64, out_features=10, bias=True)
  (act3): Softmax(dim=1)
  (loss): CrossEntropyLoss()
)


In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)     

cuda


Model(
  (hidden_layer_1): Linear(in_features=784, out_features=128, bias=True)
  (act1): Sigmoid()
  (hidden_layer_2): Linear(in_features=128, out_features=64, bias=True)
  (act2): Sigmoid()
  (output_layer): Linear(in_features=64, out_features=10, bias=True)
  (act3): Softmax(dim=1)
  (loss): CrossEntropyLoss()
)

In [10]:
from torch.utils.data import DataLoader

BATCH = 512

train_dataloader  = DataLoader(data_train, batch_size=BATCH, shuffle=True)
test_dataloader = DataLoader(data_test, batch_size=BATCH, shuffle=True)

In [11]:
from tqdm import tqdm

epochs = 1000

for i in range(epochs):
    total_loss = 0
    for X, Y in tqdm(train_dataloader, desc=f"Fitting epoch {i}"):
        loss = model.fit(X, Y)
        total_loss += loss
    avg_loss = total_loss/len(train_dataloader)
    print(f"Epoch {i}: {avg_loss:.4f}")    

Fitting epoch 0:   0%|          | 0/118 [00:00<?, ?it/s]


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)

In [None]:
model.evaluate(test_dataloader)

In [None]:
path = "./mnist_model.pth"
torch.save(model, path)