In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from tqdm import tqdm

In [2]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, (3, 3), (1, 1), (1, 1))
        self.conv2 = nn.Conv2d(32, 32, (3, 3), (1, 1), (1, 1))
        self.conv3 = nn.Conv2d(32, 64, (3, 3), (1, 1), (1, 1))
        self.conv4 = nn.Conv2d(64, 128, (3, 3), (1, 1), (1, 1))
        self.fc1 = nn.Linear(128*8*8, 512)
        self.fc2 = nn.Linear(512, 10)
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, kernel_size=(2, 2))
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, kernel_size=(2, 2))
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, kernel_size=(2, 2))
        x = F.relu(self.conv4(x))
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc1(x))
        x = torch.dropout(x, 0.2, train=True)
        x = self.fc2(x)
        x = torch.softmax(x, dim=1)
        return x 

    def accuracy(self, preds, labels):
        maxs, indices = torch.max(preds, 1)
        acc = torch.sum(indices == labels) / len(preds)
        return acc.cpu()

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Model().to(device)

In [4]:
# Hyper Parameters
batch_size = 64
epochs = 20
lr = 0.001

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [5]:
# Dataset
transform = transforms.Compose([
                        transforms.RandomRotation(10),
                        transforms.Resize((70, 70)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

dataset = torchvision.datasets.ImageFolder(root="/content/drive/MyDrive/datasets/MNIST_persian", transform=transform)
train_data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [6]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_function = nn.CrossEntropyLoss()

In [7]:
# train
model.train()

for epoch in range(epochs):
    train_loss = 0.0
    train_acc = 0.0
    for images, labels in tqdm(train_data_loader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()

        preds = model(images)

        loss = loss_function(preds, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss
        train_acc += model.accuracy(preds, labels)
    
    total_loss = train_loss / len(train_data_loader)
    total_acc = train_acc / len(train_data_loader)

    print(f"Epoch: {epoch}, Loss: {total_loss}, Acc: {total_acc}")

100%|██████████| 19/19 [00:04<00:00,  4.72it/s]


Epoch: 0, Loss: 2.2980849742889404, Acc: 0.12911184132099152


100%|██████████| 19/19 [00:03<00:00,  5.01it/s]


Epoch: 1, Loss: 2.0690135955810547, Acc: 0.390350878238678


100%|██████████| 19/19 [00:03<00:00,  4.96it/s]


Epoch: 2, Loss: 1.8805313110351562, Acc: 0.5844298601150513


100%|██████████| 19/19 [00:03<00:00,  5.11it/s]


Epoch: 3, Loss: 1.8536678552627563, Acc: 0.6047148704528809


100%|██████████| 19/19 [00:03<00:00,  4.77it/s]


Epoch: 4, Loss: 1.7609652280807495, Acc: 0.7050438523292542


100%|██████████| 19/19 [00:03<00:00,  4.99it/s]


Epoch: 5, Loss: 1.7253291606903076, Acc: 0.7417762875556946


100%|██████████| 19/19 [00:03<00:00,  5.01it/s]


Epoch: 6, Loss: 1.685822606086731, Acc: 0.7768640518188477


100%|██████████| 19/19 [00:03<00:00,  4.98it/s]


Epoch: 7, Loss: 1.66959547996521, Acc: 0.7927631735801697


100%|██████████| 19/19 [00:04<00:00,  3.92it/s]


Epoch: 8, Loss: 1.6703969240188599, Acc: 0.7878289222717285


100%|██████████| 19/19 [00:03<00:00,  5.17it/s]


Epoch: 9, Loss: 1.6448310613632202, Acc: 0.8185306787490845


100%|██████████| 19/19 [00:04<00:00,  4.58it/s]


Epoch: 10, Loss: 1.6438826322555542, Acc: 0.8193530440330505


100%|██████████| 19/19 [00:03<00:00,  5.02it/s]


Epoch: 11, Loss: 1.6280637979507446, Acc: 0.8341556787490845


100%|██████████| 19/19 [00:03<00:00,  4.98it/s]


Epoch: 12, Loss: 1.6077061891555786, Acc: 0.8536184430122375


100%|██████████| 19/19 [00:03<00:00,  5.03it/s]


Epoch: 13, Loss: 1.6093270778656006, Acc: 0.8500548005104065


100%|██████████| 19/19 [00:03<00:00,  5.02it/s]


Epoch: 14, Loss: 1.6139248609542847, Acc: 0.8475877046585083


100%|██████████| 19/19 [00:03<00:00,  4.75it/s]


Epoch: 15, Loss: 1.571777105331421, Acc: 0.893366277217865


100%|██████████| 19/19 [00:04<00:00,  4.23it/s]


Epoch: 16, Loss: 1.5563642978668213, Acc: 0.9081689119338989


100%|██████████| 19/19 [00:04<00:00,  4.75it/s]


Epoch: 17, Loss: 1.5359998941421509, Acc: 0.921875


100%|██████████| 19/19 [00:03<00:00,  5.08it/s]


Epoch: 18, Loss: 1.531252145767212, Acc: 0.9276315569877625


100%|██████████| 19/19 [00:04<00:00,  4.35it/s]

Epoch: 19, Loss: 1.5210882425308228, Acc: 0.941063642501831





In [None]:
# inference
import cv2
import numpy as np


model.eval()

# preprocess
img = cv2.imread("three.jpg")
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = cv2.resize(img, (28, 28))
tensor = transform(img).unsqueeze(0).to(device)

# process
preds = model(tensor)

# postprocess
preds = preds.cpu().detach().numpy()
output = np.argmax(preds)
print(output)