In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from matplotlib import pyplot as plt
import numpy as np
from torch.utils.data import DataLoader, TensorDataset,random_split
import random
import os
import cv2

In [2]:
def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
set_seed(42)


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [4]:
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform, download=True)

In [5]:
from torch.utils.data import random_split
total_train_size = len(train_dataset)
val_size = int(0.25* total_train_size)
train_size = int(0.75*total_train_size)
train_dataset,val_dataset = random_split(train_dataset,[train_size,val_size])

In [6]:
class CNN(nn.Module):
    def __init__(self,kernel_size=3,stride=2,dropout=0.5):
        super(CNN,self).__init__()
        self.con_layer1=nn.Conv2d(1,16,kernel_size=kernel_size)
        self.con_layer2= nn.Conv2d(16,32, kernel_size=kernel_size)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2,stride=stride)
        self.maxpool2= nn.MaxPool2d(kernel_size=2,stride=stride)
        self.dropout = nn.Dropout2d(dropout)
        self.size= int((27-kernel_size*(stride+1))/(stride**2) +1)
        self.fc = nn.Linear(32*self.size*self.size,10)
    def forward(self,X):
        X = self.maxpool1(F.relu(self.con_layer1(X)))
        X = self.maxpool2(F.relu(self.con_layer2(X)))
        X = self.dropout(X)
        X = X.reshape(-1,32*(self.size**2))
        X = self.fc(X)
        return X

In [7]:
batch_size=64
train_loader=DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
val_loader = DataLoader(dataset=val_dataset,batch_size=batch_size,shuffle=True)

In [8]:
# model = CNN().to(device=device)
model = torch.load('model.pth')
learn_rate = 0.001
loss_func = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(model.parameters(),lr= learn_rate)
num_epochs = 1
train_losses=[]
train_accuracies = []
val_losses=[]
val_accuracies = []
for epoch in range(num_epochs):
    loss_epoch=0
    total_train=0
    correct_train=0
    for images,labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        output = model(images)
        optimiser.zero_grad()
        loss = loss_func(output,labels)
        loss.backward()
        optimiser.step()
        loss_epoch += loss.item()
        _, predicted = torch.max(output.data, 1)
        total_train += len(labels)
        correct_train += (predicted==labels).sum().item()
    train_loss = loss_epoch/len(train_loader)
    train_accuracy = 100*(correct_train/total_train)
    train_accuracies.append(train_accuracy)
    train_losses.append(train_loss)

    with torch.no_grad():
        loss_val=0
        total_val=0
        correct_val=0
        for images,labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            output = model(images)
            loss_epoch += loss_func(output,labels)
            
            _, predicted = torch.max(output.data, 1)
            total_val += len(labels)
            correct_val += (predicted==labels).sum().item()
            
        val_loss = loss_epoch/len(val_loader)
        val_accuracy = 100*(correct_val/total_val)
        val_accuracies.append(val_accuracy)
        val_losses.append(val_loss)
    print(f"Epoch {epoch+1}/{num_epochs} - "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}% - "
          f"Validation Loss: {val_loss:.4f}, Validation Acc: {val_accuracy:.2f}%")
        

  model = torch.load('model.pth')


Epoch 1/1 - Train Loss: 0.0456, Train Acc: 98.57% - Validation Loss: 0.2049, Validation Acc: 98.06%


In [9]:
torch.save(model,"model.pth")

In [10]:
data1 = np.load('../data/data0.npy')
lab1 = np.load('../data/lab0.npy')
data2 = np.load('../data/data1.npy')
lab2 = np.load('../data/lab1.npy')
data3 = np.load('../data/data2.npy')
lab3 = np.load('../data/lab2.npy')

In [11]:
data_cumulative = np.concat([data1,data2,data3])
labels_cumulative = np.concat([lab1,lab2,lab3])
labels = torch.tensor(labels_cumulative, dtype=torch.long).to(device)

In [12]:
import cv2
import numpy as np
import torch

def process_images(data_cumulative):
    processed_images = []
    for img in data_cumulative:
        _, binary = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        kernel = np.ones((2,1), np.uint8)
        binary = cv2.dilate(binary, kernel, iterations=1)
        num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary, connectivity=8)
        digit_regions = []
        min_area = 20        
        for i in range(1, num_labels):
            x = stats[i, cv2.CC_STAT_LEFT]
            y = stats[i, cv2.CC_STAT_TOP]
            w = stats[i, cv2.CC_STAT_WIDTH]
            h = stats[i, cv2.CC_STAT_HEIGHT]
            area = stats[i, cv2.CC_STAT_AREA]
            
            if area > min_area:
                digit_regions.append((x, y, w, h))
        digit_regions.sort(key=lambda x: x[0])
        digit_images = []
        for x, y, w, h in digit_regions:
            digit = binary[y:y+h, x:x+w]
            digit_with_border = cv2.copyMakeBorder(digit, 10, 10, 10, 10, 
                                                 cv2.BORDER_CONSTANT, value=0)
            resized_digit = cv2.resize(digit_with_border, (28, 28))
            digit_tensor = torch.tensor(resized_digit/255.0, dtype=torch.float32).unsqueeze(0)
            digit_images.append(digit_tensor)
            
        processed_images.append(digit_images)
    
    return processed_images

processed_images = process_images(data_cumulative)

In [13]:
model.eval()
total = 0
correct = 0
for idx,images_dig in enumerate(processed_images):
    total+=1
    dig_sum = 0
    for dig in images_dig:
        # plt.imshow(dig.squeeze(0).cpu().numpy())
        # plt.show()
        dig = dig.to(device)
        output = model(dig.unsqueeze(0))
        # print(torch.argmax(torch.softmax(output[0],dim=0)).item())
        dig_sum += torch.argmax(torch.softmax(output[0],dim=0)).item()
    # print(dig_sum,end=" ")
    # print(labels[idx].item() )
    if labels[idx].item() == dig_sum:
        correct+=1


In [14]:
print(correct/total)

0.5673
