In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models


# Step 1: Load and preprocess image data

input1 = 'alpaca/'
filename1 = []
for filename in os.listdir(input1):
    filename1.append(input1 + filename)

input2 = 'not alpaca/'
filename2 = []
for filename in os.listdir(input2):
    filename2.append(input2 + filename)

# Load and preprocess images
def load_and_preprocess_image(filename):
    image = cv2.imread(filename)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB
    image = cv2.resize(image, (32, 32))  # Resize to 32x32
    image = image.astype('float32') / 255.0  # Normalize to [0, 1]
    return image

X1 = np.array([load_and_preprocess_image(filename) for filename in filename1])
Y1 = np.ones(len(X1), dtype=np.int32)

X2 = np.array([load_and_preprocess_image(filename) for filename in filename2])
Y2 = np.zeros(len(X2), dtype=np.int32)

# Concatenate and split data
X = np.concatenate((X1, X2), axis=0)
Y = np.concatenate((Y1, Y2), axis=0)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.30, random_state=42)
X_test, X_val, Y_test, Y_val = train_test_split(X_test, Y_test, test_size=0.50, random_state=42)



array([[[[0.70980394, 0.827451  , 0.93333334],
         [0.7254902 , 0.84313726, 0.9490196 ],
         [0.7647059 , 0.85882354, 0.95686275],
         ...,
         [0.99607843, 0.99607843, 0.99607843],
         [0.99607843, 0.99607843, 0.99607843],
         [0.9647059 , 0.9882353 , 0.9882353 ]],

        [[0.7411765 , 0.8352941 , 0.93333334],
         [0.7607843 , 0.8666667 , 0.95686275],
         [0.78431374, 0.8745098 , 0.96862745],
         ...,
         [0.99607843, 0.99607843, 0.99607843],
         [0.99607843, 0.99607843, 0.99607843],
         [0.9843137 , 0.99607843, 0.99215686]],

        [[0.7607843 , 0.8627451 , 0.94509804],
         [0.78431374, 0.88235295, 0.9607843 ],
         [0.8039216 , 0.8862745 , 0.96862745],
         ...,
         [0.99607843, 0.99607843, 0.99607843],
         [0.99607843, 0.99607843, 0.99607843],
         [0.99607843, 0.99607843, 0.99607843]],

        ...,

        [[0.3647059 , 0.32941177, 0.27058825],
         [0.32941177, 0.29411766, 0.2627451 ]

In [3]:
np.shape(X_train)

(228, 32, 32, 3)

In [4]:
import torch
import torch.nn as nn
import numpy as np

class ConvolutionalLayer(nn.Module):
    def __init__(self, input_size, num_channels, filter_size):
        super(ConvolutionalLayer, self).__init__()
        self.input_size = input_size
        self.num_channels = num_channels
        self.filter_size = filter_size
        #make weights matrix the same size as the input
        self.weight_matrix = nn.Parameter(torch.randn(batch_size, num_channels, input_size[0], input_size[1]))
        self.output_size = (input_size[0] - filter_size[0] + 1, input_size[1] - filter_size[1] + 1)
        self.output_feature_map = torch.zeros((self.num_channels, self.output_size[0], self.output_size[1]))

    def forward(self, input_feature_map):
        batch_size = input_feature_map.size(0)
        output_feature_maps = []
        for i in range(batch_size):
            output_feature_map = torch.zeros((self.num_channels, self.output_size[0], self.output_size[1]))
            for k in range(self.num_channels):
                for j in range(self.output_size[0]):
                    for l in range(self.output_size[1]):
                        #the same receptive field is applied to the weights as the input
                        receptive_field = input_feature_map[i, :, j:j+self.filter_size[0], l:l+self.filter_size[1]]
                        receptive_field_weight = self.weight_matrix[i, :, j:j+self.filter_size[0], l:l+self.filter_size[1]]
                        weighted_output = torch.sum(receptive_field * receptive_field_weight, dim=(1,2))
                        output_feature_map[k, j, l] = weighted_output[k]
            output_feature_maps.append(output_feature_map)
        output_feature_maps = torch.stack(output_feature_maps, dim=0)
        return output_feature_maps
    
    def backward(self, grad_output):
        batch_size = grad_output.size(0)
        grad_input = torch.zeros((batch_size, self.input_size[0], self.input_size[1], self.filter_size[0], self.filter_size[1]), device=self.weight_matrix.device)
        grad_weight = torch.zeros_like(self.weight_matrix)
        for i in range(batch_size):
            for k in range(self.num_channels):
                for j in range(self.output_size[0]):
                    for l in range(self.output_size[1]):
                        # compute the gradient of the output w.r.t. the receptive field
                        grad_weight[k] += grad_output[i, k, j, l] * self.input_feature_map[i, :, j:j+self.filter_size[0], l:l+self.filter_size[1]]
                        # compute the gradient of the output w.r.t. the input feature map
                        grad_input[i, :, j:j+self.filter_size[0], l:l+self.filter_size[1]] += grad_output[i, k, j, l] * self.weight_matrix[k]
        self.weight_matrix.grad = torch.sum(grad_weight, dim=0, keepdim=True)
        return grad_input


In [5]:
    
batch_size = 2
num_channels = 3
input_size = (4, 4)
#input_feature_map = torch.randn(batch_size, num_channels, input_size[0], input_size[1])
input_feature_map = X_train

# Create a ConvolutionalLayer instance
conv_layer = ConvolutionalLayer(input_size, num_channels, filter_size=(3, 3))

# Forward pass
output_feature_map = conv_layer(input_feature_map)

# Print the shapes of the input and output feature maps
print("Input feature map shape:", input_feature_map.shape)
print("Output feature map shape:", output_feature_map.shape)
output_feature_map

TypeError: 'int' object is not callable

Now try on X_train

In [6]:

batch_size = 2
num_channels = 3
input_size = (32,32)
#input_feature_map = torch.randn(batch_size, num_channels, input_size[0], input_size[1])
input_feature_map = X_train

# Create a ConvolutionalLayer instance
conv_layer = ConvolutionalLayer(input_size, num_channels, filter_size=(3, 3))

# Forward pass
output_feature_map = conv_layer(input_feature_map)

# Print the shapes of the input and output feature maps
print("Input feature map shape:", input_feature_map.shape)
print("Output feature map shape:", output_feature_map.shape)
output_feature_map

TypeError: 'int' object is not callable