In [1]:
import torch
import torch.nn as nn
import torch.functional as F

import os
import numpy as np

from PIL import Image
import cv2

In [2]:
def read_data():
    image_path = 'dataset'
    sub_dir = os.listdir(image_path)
    path = [image_path+'/'+str(sub)+'/'+str(sub_path) for sub in sub_dir for sub_path in os.listdir(image_path+'/'+str(sub))]
    return(path)

In [3]:
def convert_vec(path):
    for sample in path[:5]:
        img = Image.open(sample).convert('RGBA')
        arr = np.array(img) #(Batch, Height, Width, Depth)
        # print(arr.shape)  Original Size        

In [4]:
def image_preprocessing(path):
    image = []
    for i in path:
        img = cv2.imread(i, cv2.IMREAD_UNCHANGED) 
        
        ### Resize ###
        height = 220
        width = 220
        dim = (width, height)
        res = cv2.resize(img, dim, interpolation=cv2.INTER_LINEAR)
        res = res.reshape(3, 220, 220)
        
        ### Gaussian Smoothening ###
        blur = cv2.GaussianBlur(res, (5,5), 0)
        image.append(blur)
        
        # Final shape [batch_size, input_channels, height, width] 
    return image

In [35]:
class EncoderCNN(nn.Module):
    def __init__(self):
        super(EncoderCNN, self).__init__()
        #---------------#
        self.cnn1 = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=4, stride=1, padding=1)
        self.batchnorm1 = nn.BatchNorm2d(8)
        self.relu = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size = 2)
        #---------------#
        self.cnn2 = nn.Conv2d(in_channels=8, out_channels=32, kernel_size=6, stride=1, padding = 2)
        self.batchnorm2 = nn.BatchNorm2d(32)
        self.maxpool2 = nn.MaxPool2d(kernel_size = 2)
        #---------------#
        self.fc1 = nn.Linear(in_features = 466560, out_features = 600)
        self.dropout = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(in_features = 600, out_features = 10)
        
    def forward(self, x):
        out = self.cnn1(x)
        out = self.batchnorm1(out)
        out = self.relu(out)
        out = self.maxpool1(out)
        out = self.cnn2(out)
        out = self.batchnorm2(out)
        out = self.relu(out)
        out = self.maxpool2(out)
        print(out.shape)
        out = out.view(-1, 466560)
        out = self.fc1(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)
        return out

In [36]:
def run_model(inputs):
    model = EncoderCNN()
    outputs = model(inputs)
    print(outputs)

In [37]:
def main():
    path = read_data()
    pre_image = torch.tensor(image_preprocessing(path[:5]), dtype=torch.float)
    print(pre_image.shape)
    run_model(pre_image)

In [38]:
if __name__=="__main__":
    main()

torch.Size([5, 3, 220, 220])
torch.Size([5, 32, 54, 54])
tensor([[-0.2072,  0.1306, -0.0814, -0.2503, -0.0023, -0.0441, -0.5391, -0.1127,
         -0.1916,  0.1281]], grad_fn=<AddmmBackward>)
