In [1]:
import os, shutil, random
import numpy as np
import torch
import cv2

from torchmetrics.classification import AveragePrecision, MulticlassAccuracy, MulticlassF1Score
from sklearn.model_selection import train_test_split
import pandas as pd
import torchvision as tv
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import seaborn as sns
from ntm.encapsulated import EncapsulatedNTM
from config import configuration as configs

In [3]:
class ConvBlock(nn.Module):
    def __init__(self, input_channels: int, out_channels: int, kernel_size: tuple = (3, 3)):
        """Basic Conv Block with convolution layer, etc.

        Args:
            input_channels (int): _description_
            out_channels (int): _description_
            kernel_size (tuple, optional): _description_. Defaults to (3, 3).
        """
        super().__init__()
        
        self.input_channels = input_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        
        self.conv = nn.Conv2d(self.input_channels, self.out_channels, self.kernel_size)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(self.kernel_size)
        self.dropout = nn.Dropout2d()
        self.batchnorm = nn.BatchNorm2d(self.out_channels)
    
    def forward(self, x, training:bool = True):
        conv_res = self.conv(x)
        activated = self.relu(conv_res)
        pooled = self.maxpool(activated)
        if training:
            pooled = self.dropout(pooled)
        y = self.batchnorm(pooled)
        return y
    
    def __repr__(self):
        return f"ConvBlock({self.input_channels, self.out_channels, self.kernel_size})"


class FeedforwardController(nn.Module):
    def __init__(self, num_inputs:int, num_layers:int) -> None:
        super().__init__()
        self.num_inputs = num_inputs
        # self.num_outputs = num_outputs
        self.num_layers = num_layers
        # self.batch_size = batch_size
        self.device_ = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        self.model_ = nn.Sequential(
            ConvBlock(self.num_inputs, 32, (2,2)),
            ConvBlock(32, 64, (2,2))
        )
        self.flatten = nn.Flatten()
        self.final_layer = nn.LazyLinear(44)

    def forward(self, x, training:bool=True):
        y = self.model_(x)
        y = self.flatten(y)
        y = self.final_layer(y)
        return F.softmax(y)


In [24]:
num_inputs = 1
num_outputs = 44
controller_size = 3136 #1024     3136 for FeedforwardController in the cell above 12800 for feedforwardcontroller with 
controller_layers = 1
num_heads = 4
num_classes = 44
N, M = 10, 10
controller_ = FeedforwardController
num_epochs = 5

batch_size = 2 #220

# defining the network
# net = EncapsulatedNTM(
#     num_inputs,
#     num_outputs,
#     controller_size,
#     controller_layers,
#     num_heads,
#     N,
#     M,
#     controller_= controller_, #FeedforwardController,
#     vanilla_heads=True
# )
net = FeedforwardController(1, 3)
device_ = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")




In [14]:

class dataset(torch.utils.data.Dataset):
    def __init__(self, annotations_file, img_dir):
        self.images_csv = annotations_file.reset_index(drop=True) #pd.read_csv(annotations_file).reset_index(drop=True)
        self.img_dir = img_dir
        # self.transforms_ = tv.transforms.Compose([
        #     tv.transforms.Resize(40),
        #     # tv.transforms.CenterCrop(40),
        #     tv.transforms.ConvertImageDtype(torch.float),
        #     tv.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        # ])
        
    def transforms_(self, image):
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if image.ndim == 3 else image
        thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 21, 10)
        # normed= cv2.normalize(thresh, None, 0, 1.0, cv2.NORM_MINMAX, dtype=cv2.CV_32F)
        skeleton = cv2.ximgproc.thinning(thresh, None, 1)
        image = cv2.resize(skeleton, (32, 32))
        return image

    def __len__(self):
        return len(self.images_csv)

    def __getitem__(self, index):
        img_path = os.path.join(self.img_dir, self.images_csv.iloc[index, 0])
        # image = tv.io.image.read_image(img_path)
        image = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
        image_ = self.transforms_(image) #.to(device_)
        image = torch.from_numpy(image_)
        label = self.images_csv.iloc[index, 1]
        label = torch.nn.functional.one_hot(torch.tensor(label).to(torch.int64), num_classes=num_outputs)
        return image.unsqueeze(0), label

In [15]:
Config = configs()

PATH_PRINTED_TRAIN_CSV, PATH_PRINTED_TRAIN_IMGS = Config.paths(printed=True, train=True)
PATH_PRINTED_VAL_CSV, PATH_PRINTED_VAL_IMGS = Config.paths(printed=True, train=False)

PATH_HW_TRAIN_CSV, PATH_HW_TRAIN_IMGS = Config.paths(printed=False, train=True)
PATH_HW_VAL_CSV, PATH_HW_VAL_IMGS = Config.paths(printed=False, train=False)


In [16]:
train_dataset = dataset(pd.read_csv(PATH_PRINTED_TRAIN_CSV), PATH_PRINTED_TRAIN_IMGS)
val_dataset = dataset(pd.read_csv(PATH_PRINTED_VAL_CSV), PATH_PRINTED_VAL_IMGS)

# creating dataloader
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size)

In [25]:
# loss
loss_fn = torch.nn.BCELoss()

# optimizer
optimizer = torch.optim.Adam(net.parameters())

# metric
# metric = AveragePrecision(task="multiclass", num_classes=num_outputs)
metric = MulticlassAccuracy(num_classes=44)

In [29]:
num_epochs = 50


# training loop
cnn_train_loss = []
cnn_train_acc = []
cnn_val_loss = []
cnn_val_acc = []


for epoch in range(num_epochs):
    print("Epoch {}".format(epoch))

    last_loss = 0.0
    list_outputs = []
    list_labels = []
    
    net.train()
    for i, data in enumerate(train_dataloader):
        running_loss = 0.0
        
        inputs, labels = data[0].to(device_), data[1].to(device_)
        # print(inputs.size())
        labels = labels.type(torch.float)
        optimizer.zero_grad()

        # net.init_sequence(batch_size)


        training = True
        outputs = net(inputs.type(torch.float))
        avg_loss = loss_fn(outputs, labels)

        list_outputs.append(outputs)
        list_labels.append(torch.argmax(labels, dim=1))
        
        # avg_prec = metric(outputs, torch.argmax(labels, dim=1))
        # print(torch.argmax(outputs, dim=1), torch.argmax(labels, dim=1))
        avg_loss.backward()

        optimizer.step()

        running_loss += avg_loss.item()
        last_loss = running_loss
        # last_avg_prec = avg_prec

        # appending the train loss for grpah plotting
        cnn_train_loss.append(avg_loss.item())

    outputs = torch.cat(list_outputs, dim=0)
    labels = torch.cat(list_labels).squeeze()
    # print(labels)
    # print(torch.argmax(outputs, dim=1))
    acc = metric(outputs, labels)

    # adding for the graph 
    cnn_train_acc.append(acc)

    print("epoch {}, loss {:.3f}, train acc {:.3f}".format(epoch, last_loss, acc))
    # model_scripted = torch.jit.script(net)
    # torch.save(net.state_dict(), "model_state_convnet_01-06-23_.pt")
    # torch.save(net, "model_convnet_{}_{}.pt".format("12-05-23", epoch))
    
    # validation
    list_outputs = []
    list_labels = []
    
    with torch.no_grad():
        running_vloss = 0.0
        for i, data in enumerate(val_dataloader):
            # net.init_sequence(batch_size)

            vinputs, vlabels = data[0].to(device_), data[1].to(device_)
            vlabels = vlabels.type(torch.float)
            voutputs = net(vinputs.type(torch.float))
            list_outputs.append(voutputs)
            list_labels.append(torch.argmax(vlabels, dim=1))
            vloss = loss_fn(voutputs, vlabels)
            running_vloss += vloss

            # appending the val loss for the graph plotting
            cnn_val_loss.append(vloss.item())

        voutputs = torch.cat(list_outputs, dim=0)
        vlabels = torch.cat(list_labels).squeeze()
        vacc = metric(voutputs, vlabels)
        
        # appending the val accuracy for plotting graph
        cnn_val_acc.append(vacc)

        avg_vloss = running_vloss / (i+1)
        print("Loss train {:.3f},  validation {:.3f},  val acc {:.3f}".format(avg_loss, avg_vloss, vacc))


Epoch 0


  return F.softmax(y)


epoch 0, loss 0.002, train acc 0.994
Loss train 0.002,  validation 0.002,  val acc 0.986
Epoch 1
epoch 1, loss 0.000, train acc 0.991
Loss train 0.000,  validation 0.002,  val acc 0.988
Epoch 2
epoch 2, loss 0.010, train acc 0.991
Loss train 0.010,  validation 0.001,  val acc 0.991
Epoch 3
epoch 3, loss 0.000, train acc 0.992
Loss train 0.000,  validation 0.001,  val acc 0.993
Epoch 4
epoch 4, loss 0.000, train acc 0.991
Loss train 0.000,  validation 0.002,  val acc 0.987
Epoch 5
epoch 5, loss 0.000, train acc 0.992
Loss train 0.000,  validation 0.001,  val acc 0.993
Epoch 6


KeyboardInterrupt: 

In [30]:
train_dataset = dataset(pd.read_csv(PATH_HW_TRAIN_CSV), PATH_HW_TRAIN_IMGS)
val_dataset = dataset(pd.read_csv(PATH_HW_VAL_CSV), PATH_HW_VAL_IMGS)

# creating dataloader
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size)

In [31]:
num_epochs = 50


# training loop
cnn_train_loss = []
cnn_train_acc = []
cnn_val_loss = []
cnn_val_acc = []


for epoch in range(num_epochs):
    print("Epoch {}".format(epoch))

    last_loss = 0.0
    list_outputs = []
    list_labels = []
    
    net.train()
    for i, data in enumerate(train_dataloader):
        running_loss = 0.0
        
        inputs, labels = data[0].to(device_), data[1].to(device_)
        # print(inputs.size())
        labels = labels.type(torch.float)
        optimizer.zero_grad()

        # net.init_sequence(batch_size)


        training = True
        outputs = net(inputs.type(torch.float))
        avg_loss = loss_fn(outputs, labels)

        list_outputs.append(outputs)
        list_labels.append(torch.argmax(labels, dim=1))
        
        # avg_prec = metric(outputs, torch.argmax(labels, dim=1))
        # print(torch.argmax(outputs, dim=1), torch.argmax(labels, dim=1))
        avg_loss.backward()

        optimizer.step()

        running_loss += avg_loss.item()
        last_loss = running_loss
        # last_avg_prec = avg_prec

        # appending the train loss for grpah plotting
        cnn_train_loss.append(avg_loss.item())

    outputs = torch.cat(list_outputs, dim=0)
    labels = torch.cat(list_labels).squeeze()
    # print(labels)
    # print(torch.argmax(outputs, dim=1))
    acc = metric(outputs, labels)

    # adding for the graph 
    cnn_train_acc.append(acc)

    print("epoch {}, loss {:.3f}, train acc {:.3f}".format(epoch, last_loss, acc))
    # model_scripted = torch.jit.script(net)
    # torch.save(net.state_dict(), "model_state_convnet_01-06-23_.pt")
    # torch.save(net, "model_convnet_{}_{}.pt".format("12-05-23", epoch))
    
    # validation
    list_outputs = []
    list_labels = []
    
    with torch.no_grad():
        running_vloss = 0.0
        for i, data in enumerate(val_dataloader):
            # net.init_sequence(batch_size)

            vinputs, vlabels = data[0].to(device_), data[1].to(device_)
            vlabels = vlabels.type(torch.float)
            voutputs = net(vinputs.type(torch.float))
            list_outputs.append(voutputs)
            list_labels.append(torch.argmax(vlabels, dim=1))
            vloss = loss_fn(voutputs, vlabels)
            running_vloss += vloss

            # appending the val loss for the graph plotting
            cnn_val_loss.append(vloss.item())

        voutputs = torch.cat(list_outputs, dim=0)
        vlabels = torch.cat(list_labels).squeeze()
        vacc = metric(voutputs, vlabels)
        
        # appending the val accuracy for plotting graph
        cnn_val_acc.append(vacc)

        avg_vloss = running_vloss / (i+1)
        print("Loss train {:.3f},  validation {:.3f},  val acc {:.3f}".format(avg_loss, avg_vloss, vacc))


Epoch 0


  return F.softmax(y)


epoch 0, loss 3.443, train acc 0.051
Loss train 3.443,  validation 2.539,  val acc 0.026
Epoch 1
epoch 1, loss 4.545, train acc 0.034
Loss train 4.545,  validation 4.400,  val acc 0.023
Epoch 2
epoch 2, loss 4.545, train acc 0.023
Loss train 4.545,  validation 4.400,  val acc 0.023
Epoch 3
epoch 3, loss 4.545, train acc 0.023
Loss train 4.545,  validation 4.399,  val acc 0.023
Epoch 4
epoch 4, loss 4.545, train acc 0.023
Loss train 4.545,  validation 4.399,  val acc 0.023
Epoch 5
epoch 5, loss 4.545, train acc 0.023


KeyboardInterrupt: 

# NTM

In [33]:
class ConvBlock(nn.Module):
    def __init__(self, input_channels: int, out_channels: int, kernel_size: tuple = (3, 3)):
        """Basic Conv Block with convolution layer, etc.

        Args:
            input_channels (int): _description_
            out_channels (int): _description_
            kernel_size (tuple, optional): _description_. Defaults to (3, 3).
        """
        super().__init__()
        
        self.input_channels = input_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        
        self.conv = nn.Conv2d(self.input_channels, self.out_channels, self.kernel_size)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(self.kernel_size)
        self.dropout = nn.Dropout2d()
        self.batchnorm = nn.BatchNorm2d(self.out_channels)
    
    def forward(self, x, training:bool = True):
        conv_res = self.conv(x)
        activated = self.relu(conv_res)
        pooled = self.maxpool(activated)
        if training:
            pooled = self.dropout(pooled)
        y = self.batchnorm(pooled)
        return y
    
    def __repr__(self):
        return f"ConvBlock({self.input_channels, self.out_channels, self.kernel_size})"


class FeedforwardController(nn.Module):
    def __init__(self, num_inputs:int, num_layers:int) -> None:
        super().__init__()
        self.num_inputs = num_inputs
        # self.num_outputs = num_outputs
        self.num_layers = num_layers
        # self.batch_size = batch_size
        self.device_ = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        self.model_ = nn.Sequential(
            ConvBlock(self.num_inputs, 32, (2,2)),
            ConvBlock(32, 64, (2,2))
        )
        self.flatten = nn.Flatten()
        self.final_layer = nn.LazyLinear(44)

    def forward(self, x, training:bool=True):
        y = self.model_(x)
        y = self.flatten(y)
        # y = self.final_layer(y)
        return y

In [42]:
num_inputs = 1
num_outputs = 44
controller_size = 3136 #1024     3136 for FeedforwardController in the cell above 12800 for feedforwardcontroller with 
controller_layers = 1
num_heads = 4
num_classes = 44
N, M = 10, 10
controller_ = FeedforwardController
num_epochs = 5

batch_size = 2 #220

# defining the network
net = EncapsulatedNTM(
    num_inputs,
    num_outputs,
    controller_size,
    controller_layers,
    num_heads,
    N,
    M,
    controller_= controller_, #FeedforwardController,
    vanilla_heads=False
)
# net = EncapsulatedNTM
device_ = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")




In [43]:
train_dataset = dataset(pd.read_csv(PATH_PRINTED_TRAIN_CSV), PATH_PRINTED_TRAIN_IMGS)
val_dataset = dataset(pd.read_csv(PATH_PRINTED_VAL_CSV), PATH_PRINTED_VAL_IMGS)

# creating dataloader
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size)

In [44]:
num_epochs = 50


# training loop
cnn_train_loss = []
cnn_train_acc = []
cnn_val_loss = []
cnn_val_acc = []


for epoch in range(num_epochs):
    print("Epoch {}".format(epoch))

    last_loss = 0.0
    list_outputs = []
    list_labels = []
    
    net.train()
    for i, data in enumerate(train_dataloader):
        running_loss = 0.0
        
        inputs, labels = data[0].to(device_), data[1].to(device_)
        # print(inputs.size())
        labels = labels.type(torch.float)
        optimizer.zero_grad()

        net.init_sequence(batch_size)


        training = True
        outputs, _ = net(inputs.type(torch.float))
        avg_loss = loss_fn(outputs, labels)

        list_outputs.append(outputs)
        list_labels.append(torch.argmax(labels, dim=1))
        
        # avg_prec = metric(outputs, torch.argmax(labels, dim=1))
        # print(torch.argmax(outputs, dim=1), torch.argmax(labels, dim=1))
        avg_loss.backward()

        optimizer.step()

        running_loss += avg_loss.item()
        last_loss = running_loss
        # last_avg_prec = avg_prec

        # appending the train loss for grpah plotting
        cnn_train_loss.append(avg_loss.item())

    outputs = torch.cat(list_outputs, dim=0)
    labels = torch.cat(list_labels).squeeze()
    # print(labels)
    # print(torch.argmax(outputs, dim=1))
    acc = metric(outputs, labels)

    # adding for the graph 
    cnn_train_acc.append(acc)

    print("epoch {}, loss {:.3f}, train acc {:.3f}".format(epoch, last_loss, acc))
    # model_scripted = torch.jit.script(net)
    # torch.save(net.state_dict(), "model_state_convnet_01-06-23_.pt")
    # torch.save(net, "model_convnet_{}_{}.pt".format("12-05-23", epoch))
    
    # validation
    list_outputs = []
    list_labels = []
    
    with torch.no_grad():
        running_vloss = 0.0
        for i, data in enumerate(val_dataloader):
            net.init_sequence(batch_size)

            vinputs, vlabels = data[0].to(device_), data[1].to(device_)
            vlabels = vlabels.type(torch.float)
            voutputs, _ = net(vinputs.type(torch.float))
            list_outputs.append(voutputs)
            list_labels.append(torch.argmax(vlabels, dim=1))
            vloss = loss_fn(voutputs, vlabels)
            running_vloss += vloss

            # appending the val loss for the graph plotting
            cnn_val_loss.append(vloss.item())

        voutputs = torch.cat(list_outputs, dim=0)
        vlabels = torch.cat(list_labels).squeeze()
        vacc = metric(voutputs, vlabels)
        
        # appending the val accuracy for plotting graph
        cnn_val_acc.append(vacc)

        avg_vloss = running_vloss / (i+1)
        print("Loss train {:.3f},  validation {:.3f},  val acc {:.3f}".format(avg_loss, avg_vloss, vacc))


Epoch 0


  return F.softmax(o), self.state


epoch 0, loss 0.098, train acc 0.022
Loss train 0.098,  validation 0.111,  val acc 0.026
Epoch 1
epoch 1, loss 0.113, train acc 0.025
Loss train 0.113,  validation 0.111,  val acc 0.025
Epoch 2


KeyboardInterrupt: 