In [None]:
# !pip install opencv-python
# !pip install tqdm

In [None]:
import numpy as np
import pandas as pd
import math
import os
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
import pickle

In [None]:
MODEL_INIT_FILE = 'model_desc.txt'
IMAGE_DATASET_DIR = './TRAIN_IMAGES/'
CSV_FILE = 'training-a_revised.csv'
MINI_BATCH_SIZE = 64
IMAGE_DIM = 180 # Height and width of the image


<h2>SOFTMAX LAYER</h2>

In [None]:
class Softmax_Layer:
    def __init__(self):
        self.layer_type = 'Softmax'
    
    def __str__(self):
        return f"{self.layer_type} Layer"
    
    def forward(self, X):
        Z = np.exp(X)
        return Z / np.einsum('ij->j', Z)
    
    def backward(self, dZ, learning_rate=0.0001):
        return np.copy(dZ)

<h2>ReLU ACTIVATION </h2>

In [None]:
class ReLU_Activation:
    def __init__(self):
        self.layer_type = 'ReLU'
    
    def __str__(self):
        return f"{self.layer_type} Activation"
    
    def forward(self, X):
        self.X = X

        Z = np.copy(X)
        Z[Z < 0] = 0
        return Z
    
    def backward(self, dZ, learning_rate=0.0001):
        dX = np.copy(self.X)

        dX[dX < 0] = 0
        dX[dX > 0] = 1
        return dX * dZ

<h2>FULLY CONNECTED LAYER</h2>

In [None]:
class Fully_Connected_Layer:
    def __init__(self, output_dim):
        self.output_dim = output_dim
        self.W = None
        self.b = None

    def __str__(self):
        return f"Fully Connected Layer(output_dim={self.output_dim})"
    
    def forward(self, X):
        self.X = X

        if self.W is None:
            self.W = np.random.randn(X.shape[1], self.output_dim) * math.sqrt(2 / X.shape[0])
        
        if self.b is None:
            self.b = np.zeros((1, self.output_dim))

        Z = np.einsum('ij,jk->ik', X, self.W) + self.b
        
        return Z
    
    def backward(self, dZ, learning_rate=0.0001):
        dW = np.einsum('ij,ik->jk', self.X, dZ) / self.X.shape[1] # check here
        db = np.einsum('ij->j', dZ) / self.X.shape[0] # check here
        dX = np.einsum('ij,jk->ik', dZ, self.W.T)

        self.W = self.W - learning_rate * dW
        self.b = self.b - learning_rate * db

        return dX

<h2>FLATENNING LAYER</h2>

In [None]:
class Flatenning_Layer:
    def __init__(self):
        self.layer_type = 'Flatten'
    
    def __str__(self):
        return f"{self.layer_type} Layer"
    
    def forward(self, X):
        self.input_shape = X.shape
        return X.reshape((X.shape[0], -1)) # check here
    
    def backward(self, dZ, learning_rate=0.0001):
        dX = np.copy(dZ)
        return dX.reshape(self.input_shape) # check here

<h1>MAX POOLING</h1>

In [None]:
class Max_Pooling:
    def __init__(self, filter_dim, stride):
        self.layer_type = 'Max Pooling'
        self.filter_dim = filter_dim
        self.stride = stride
        self.X = None
        self.Z_Max_idx = None
    
    def __str__(self):
        return f"{self.layer_type} (filter_dim={self.filter_dim}, stride={self.stride})"
    
    def forward(self, X):
        self.X = X
        output_dim = (X.shape[1] - self.filter_dim) // self.stride + 1
        Z = np.zeros((X.shape[0], output_dim, output_dim, X.shape[3]))
        self.Z_Max_idx = np.zeros((Z.shape), dtype=np.int32)

        for k in range(X.shape[0]):
            for l in range(X.shape[3]):
                for i in range(output_dim):
                    for j in range(output_dim):
                        p = X[k, i * self.stride: i * self.stride + self.filter_dim,
                                   j * self.stride: j * self.stride + self.filter_dim, l]
                        Z[k, i, j, l] = np.max(p)
                        self.Z_Max_idx[k, i, j, l] = np.argmax(p)

        return Z
    
    def backward(self, dZ, learning_rate=0.0001):
        dX = np.zeros(self.X.shape)
        # print(f"shape of dZ: {dZ.shape}")
        # print(dZ)
        for k in range(self.X.shape[0]):
            for l in range(self.X.shape[3]):
                for i in range(self.X.shape[1]):
                    for j in range(self.X.shape[2]):
                        p_idx = (i // self.stride, j // self.stride, l)
                        dX[k, i, j, l] = dZ[k, i//self.stride, j//self.stride, l] * (self.Z_Max_idx[k, i//self.stride, j//self.stride, l] == (i%self.stride)*self.filter_dim + j%self.stride)
        # print(f"shape of dX: {dX.shape}")
        # print(dX)
        return dX

<h1>CONVOLUTION</h1>

In [None]:
class Convolution:
    def __init__(self, num_output_channels, filter_dim, stride=1, padding=0):
        self.layer_type = 'Convolution'
        self.num_output_channels = num_output_channels
        self.filter_dim = filter_dim
        self.stride = stride
        self.padding = padding
        self.W = None
        self.b = None
    
    def __str__(self):
        return f"{self.layer_type} (num_output_channels={self.num_output_channels}, filter_dim={self.filter_dim}, stride={self.stride}, padding={self.padding})"
    
    def forward(self, X):
        self.X = X

        self.output_dim = (X.shape[1] - self.filter_dim + 2 * self.padding) // self.stride + 1

        # padding
        X = np.pad(X, ((0, 0), (self.padding, self.padding), (self.padding, self.padding), (0, 0)), 'constant')

        Z = np.zeros((X.shape[0], self.output_dim, self.output_dim, self.num_output_channels))

        if self.W is None:
            self.W = np.random.randn(self.num_output_channels, self.filter_dim, self.filter_dim, X.shape[3]) * math.sqrt(2 / X.shape[0])
        if self.b is None:
            self.b = np.zeros((self.num_output_channels))
        

        # print(self.W.shape)
        # print(self.b.shape)
        # print(X.shape)
        # print(Z.shape)

        for channel in range(self.num_output_channels):
            for i in range(self.output_dim):
                for j in range(self.output_dim):
                    Z[:, i, j, channel] = np.sum(X[:, i*self.stride:i*self.stride+self.filter_dim, j*self.stride:j*self.stride+self.filter_dim, :] * self.W[channel, :, :, :], axis=(1, 2, 3)) + self.b[channel]

        return Z
    
    def backward(self, dZ, learning_rate=0.0001):
        dW = np.zeros((self.num_output_channels, self.filter_dim, self.filter_dim, self.X.shape[3]))
        db = np.zeros((self.num_output_channels))
        dX = np.zeros(self.X.shape)

        for channel in range(self.num_output_channels):
            for i in range(self.output_dim):
                for j in range(self.output_dim):
                    dW[channel, :, :, :] += np.sum(self.X[:, i*self.stride:i*self.stride+self.filter_dim, j*self.stride:j*self.stride+self.filter_dim, :] * dZ[:, i, j, channel].reshape((dZ.shape[0], 1, 1, 1)), axis=0)/dZ.shape[0]
                    dX[:, i*self.stride:i*self.stride+self.filter_dim, j*self.stride:j*self.stride+self.filter_dim, :] += self.W[channel, :, :, :] * dZ[:, i, j, channel].reshape((dZ.shape[0], 1, 1, 1))/dZ.shape[0]
                    db[channel] += np.sum(dZ[:, i, j, channel])
        
        self.W = self.W - learning_rate * dW
        self.b = self.b - learning_rate * db

        return dX

<h1>MODEL</h1>

In [None]:
class Model:
    def __init__(self, filePath):
        self.layers = []
        self.filePath = filePath
        self.build_model()

    def __str__(self):
        string = 'MODEL DETAILS:\n\n'
        for i, layer in enumerate(self.layers):
            string += f"Layer {i+1}: {layer}\n"
        return string
    
    def build_model(self):
        #check if file exists
        if not os.path.exists(self.filePath):
            print('File does not exist')
            return
        with open(self.filePath, 'r') as file:
            lines = file.readlines()
            for line in lines:
                if line.startswith('#'):
                    continue

                line = line.strip()
                
                if line == '':
                    continue

                line_split = line.split(' ')
                layer_name = str(line_split[0]).upper()
                
                if layer_name == 'FC':
                    output_dim = int(line_split[1])
                    self.layers.append(Fully_Connected_Layer(output_dim))

                elif layer_name == 'CONV':
                    num_output_channels = int(line_split[1])
                    filter_dim = int(line_split[2])
                    stride = int(line_split[3])
                    padding = int(line_split[4])
                    self.layers.append(Convolution(num_output_channels, filter_dim, stride, padding))

                elif layer_name == 'MAXPOOL':
                    filter_dim = int(line_split[1])
                    stride = int(line_split[2])
                    self.layers.append(Max_Pooling(filter_dim, stride))

                elif layer_name == 'FLATTEN':
                    self.layers.append(Flatenning_Layer())

                elif layer_name == 'RELU':
                    self.layers.append(ReLU_Activation())

                elif layer_name == 'SOFTMAX':
                    self.layers.append(Softmax_Layer())
                
                else:
                    print('Invalid layer name')
                    return
        
    def forward(self, X):
        for layer in self.layers:
            # print("forward : ", layer)
            X = layer.forward(X)
        return X
    
    def backward(self, dZ, learning_rate=0.0001):
        for layer in reversed(self.layers):
            # print("Backward : ",layer)
            dZ = layer.backward(dZ, learning_rate)
        return dZ
    
    def train(self, X, Y, learning_rate=0.0001, epochs=10):
        for epoch in tqdm(range(epochs)):
            Z = self.forward(X)
            dZ = Z - Y
            self.backward(dZ, learning_rate)
            # print(f"Epoch {epoch+1} completed")

    def predict(self, X):
        Z = self.forward(X)
        return np.argmax(Z, axis=1)
    
    def evaluate(self, X, Y):
        Y_pred = self.predict(X)
        Y_true = np.argmax(Y, axis=1)
        return np.sum(Y_pred == Y_true) / len(Y_true) * 100

    

# PROCESS DATASET

In [None]:
def read_image(path):
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    # Resizing the image
    img = cv2.resize(img,(IMAGE_DIM, IMAGE_DIM))
    # reshaping the image
    img = img.reshape(IMAGE_DIM, IMAGE_DIM, 1) # 1 for grayscale
    # Displaying the image
    # plt.imshow(img, cmap='gray')
    # plt.show()
    # print(img.shape)
    img = np.array(img)
    img = img.astype('float32')
    img /= 255
    return img

In [None]:
# https://pytorch.org/tutorials/beginner/basics/data_tutorial.html#creating-a-custom-dataset-for-your-files
class CustomImageDataset:
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 3] # 3 is the column index of the label
        #one hot encoding
        label = np.eye(10)[label]
        
        # if self.transform:
        #     image = self.transform(image)
        # if self.target_transform:
        #     label = self.target_transform(label)
        return image, label

In [None]:
class CustomDataLoader:
    def __init__(self, dataset, batch_size=32, shuffle=False):
        self.dataset = dataset
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.current_idx = 0

    def __iter__(self):
        return self

    def __next__(self):
        if self.current_idx >= len(self.dataset):
            raise StopIteration
        
        # Get the next batch.
        if self.current_idx + self.batch_size > len(self.dataset):
            batch = [self.dataset[i] for i in range(self.current_idx, len(self.dataset))]
            self.current_idx = len(self.dataset)
        else:
            batch = [self.dataset[i] for i in range(self.current_idx, self.current_idx + self.batch_size)]
            self.current_idx += self.batch_size
        

        if self.shuffle:
            np.random.shuffle(batch)

        images, labels = zip(*batch)
        images = np.stack(images)
        labels = np.stack(labels)

        return images, labels


In [None]:
dataset = CustomImageDataset(annotations_file= CSV_FILE, img_dir=IMAGE_DATASET_DIR)
dataloader = CustomDataLoader(dataset, batch_size=64, shuffle=False)

# BUILD MODEL

In [None]:
model = Model(MODEL_INIT_FILE)
print(model)

for images, labels in dataloader:
    print(images.shape, labels.shape)
    # print(labels)
    model.train(images, labels, learning_rate=0.0001, epochs=10)

# SAVE MODEL

In [None]:
with open('model.pkl', 'wb') as file:
    pickle.dump(model, file)

# TESTING

In [None]:
TESTING_DIR = './TEST/'
TEST_CSV_FILE = './TEST_csv.csv'

test_dataset = CustomImageDataset(annotations_file= TEST_CSV_FILE, img_dir=TESTING_DIR)
test_dataloader = CustomDataLoader(test_dataset, batch_size=48, shuffle=False)

for images, labels in test_dataloader:
    print(images.shape, labels.shape)
    print(model.predict(images))
    print(model.evaluate(images, labels))