In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm 

In [None]:
REBUILD_DATA = True

class DogsVSCats:
    IMG_SIZE = 50
    CATS = "./data/PetImages/Cat"
    DOGS = "./data/PetImages/Dog"
    LABELS = {CATS: 0, DOGS: 1}
    training_data = []
    dog_count = 0
    cat_count = 0
    
    
    def build_training_data(self):
        for label in self.LABELS:
            for f in tqdm(os.listdir(label)):
                try:
                    path = os.path.join(label, f)
                    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                    img = cv2.resize(img, (self.IMG_SIZE, self.IMG_SIZE))
                    self.training_data.append([np.array(img), np.eye(2)[self.LABELS[label]]])
                    if label == self.CATS:
                        self.cat_count +=1
                    if label == self.DOGS:
                        self.dog_count +=1
                except Exception as e:
                    pass
        np.random.shuffle(self.training_data)
        np.save("training_data.npy",np.array(self.training_data, dtype=object), True, fix_imports=True)
        # print("CATS: ", self.cat_count)
        # print("DOGS: ", self.dog_count+ self.cat_count)

if REBUILD_DATA:
    dogs_vs_cats = DogsVSCats()
    dogs_vs_cats.build_training_data()

  0%|          | 0/12501 [00:00<?, ?it/s]

  1%|          | 130/12501 [00:00<00:09, 1292.56it/s]Corrupt JPEG data: 128 extraneous bytes before marker 0xd9
 66%|██████▌   | 8234/12501 [00:05<00:03, 1370.48it/s]Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
 77%|███████▋  | 9656/12501 [00:06<00:02, 1409.63it/s]Corrupt JPEG data: 239 extraneous bytes before marker 0xd9
 85%|████████▌ | 10679/12501 [00:07<00:01, 1430.96it/s]Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9
Corrupt JPEG data: 99 extraneous bytes before marker 0xd9
100%|██████████| 12501/12501 [00:08<00:00, 1418.80it/s]
  2%|▏         | 292/12501 [00:00<00:08, 1469.03it/s]Corrupt JPEG data: 399 extraneous bytes before marker 0xd9
  7%|▋         | 832/12501 [00:00<00:09, 1283.18it/s]Corrupt JPEG data: 65 extraneous bytes before marker 0xd9
 21%|██▏       | 2674/12501 [00:02<00:09, 999.25it/s] Corrupt JPEG data: 2230 extraneous bytes before marker 0xd9
 33%|███▎      | 4116/12501 [00:03<00:06, 1261.41it/s]Corrupt JPEG data: 254 extraneous bytes be

In [None]:
training_data = np.load("training_data.npy", allow_pickle=True)
training_data[0]

array([array([[240, 254, 235, ..., 251, 255, 245],
              [254, 227, 238, ..., 234, 224, 244],
              [252, 254, 249, ..., 248, 239, 238],
              ...,
              [200, 251, 251, ..., 154, 144, 218],
              [235, 210, 247, ..., 235, 218, 124],
              [253, 212, 212, ..., 151, 123, 243]], dtype=uint8),
       array([0., 1.])], dtype=object)

In [None]:
from torch._tensor import Tensor


from torch._tensor import Tensor
from torch.nn.modules.conv import Conv2d


import torch as T
import torch.nn as nn
import torch.nn.functional as F



class ConvolutionalNeuralNetWork(nn.Module):
    def __init__(self):
        super().__init__()
        self.convolutional_layer_one: Conv2d = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5)
        self.convolutional_layer_two: Conv2d = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5)
        self.convolutional_layer_three: Conv2d = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5)
        
        # Initializing a final layer for distribution
        """
        The first argument -1 means 'for any size of tensor'.
        The second (hint 1) represent the entire batch of data.
        The 50 by 50 represents the size of images within the batch of data.
        """
        x: Tensor = T.randn(50, 50).view(-1, 1, 50, 50)
        self._to_linear = 0
        self.layer_pooling(x)
        self.fully_connected_layer_one = nn.Linear(self._to_linear, 512)
        # Final layer
        self.fully_connected_layer_two = nn.Linear( 512, 2)
        
    def layer_pooling(self, layer: Tensor) -> Tensor:
        output_layer_one: Tensor=  F.max_pool2d(F.relu(self.convolutional_layer_one(layer)), (2,2))
        output_layer_two: Tensor=  F.max_pool2d(F.relu(self.convolutional_layer_two(output_layer_one)), (2,2))
        layer_pool: Tensor=  F.max_pool2d(F.relu(self.convolutional_layer_three(output_layer_two)), (2,2))

        if self._to_linear == 0:
            layer_one_shape = layer_pool[0].shape[0]
            layer_two_shape = layer_pool[0].shape[1]
            layer_three_shape = layer_pool[0].shape[2]
            product_of_layer_dimensions = (layer_one_shape * layer_two_shape * layer_three_shape)
            self._to_linear =  product_of_layer_dimensions
        return layer_pool
    
    def flattenFlattening(self, layer: Tensor) -> Tensor | None:
        layer_hat: Tensor = self.layer_pooling(layer)
        if self._to_linear != 0:
            layer_hat = layer_hat.view(-1, self._to_linear)
            return  layer_hat
        return None

    def forward(self, layer) -> Tensor | None:
        flatten_final_layer: Tensor | None = self.flattenFlattening(layer)
        if flatten_final_layer is not None:
            flatten_final_layer = F.relu(input=self.fully_connected_layer_one(flatten_final_layer))
            final_layer = self.fully_connected_layer_two(flatten_final_layer)
            return F.softmax(final_layer, dim=1)
neural_net: ConvolutionalNeuralNetWork = ConvolutionalNeuralNetWork()


In [None]:
import torch.optim as optim
optimizer = optim.Adam(neural_net.parameters(), lr=0.001)
loss_func = nn.MSELoss()

images = Tensor(np.array([image[0] for image in training_data]))
images = images/255.0
y = Tensor([y[1] for y in training_data])

# The percentage of data we take from the set for sampling
sample_percent = 0.1
sample_end_index = int(len(images) * sample_percent)
Image_sample = images[:-sample_end_index]
y_sample = y[:-sample_end_index]

Image_test_sample = images[-sample_end_index:]
y_test = y[-sample_end_index:]

print(len(Image_sample))
print(len(Image_test_sample))



22452
2494


In [None]:
BATCH_SIZE = 100
EPOCHS = 2
loss = 0
for epoch in range(EPOCHS):
    for index in tqdm(range(0, len(Image_sample), BATCH_SIZE)):
        batch_images = Image_sample[index: index+BATCH_SIZE].view(-1, 1, 50, 50)
        batch_y = y_sample[index: index+BATCH_SIZE]
        neural_net.zero_grad()
        outputs = neural_net(batch_images)
        loss = loss_func(outputs, batch_y)
        loss.backward()
        optimizer.step()

  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
100%|██████████| 225/225 [00:55<00:00,  4.07it/s]
100%|██████████| 225/225 [00:53<00:00,  4.22it/s]

tensor(0.2078, grad_fn=<MseLossBackward0>)





# Run our predictions

In [None]:
correct = 0
total = 0
with T.no_grad():
    for index in tqdm(range(len(Image_test_sample))):
        real_class = T.argmax(y_test[index])
        net_out = neural_net(Image_test_sample[index].view(-1, 1, 50, 50))[0]
        
        predicted_class = T.argmax(net_out)
        if predicted_class == real_class:
            correct +=1
        total +=1
print(f"Accuracy: {(round(correct/total,3)*100)}%")

100%|██████████| 2494/2494 [00:05<00:00, 418.14it/s]

Accuracy: 69.1%





In [2]:
pip freeze


absl-py==2.1.0
accelerate==1.8.1
acme==1.21.0
aiofiles==24.1.0
aiohttp==3.9.5
aiosignal==1.3.1
alembic==1.16.4
altair==5.5.0
anaconda==0.0.1.1
annotated-types==0.7.0
ano-code==8.0.34
anyio==4.4.0
argcomplete==1.8.1
argon2-cffi==25.1.0
argon2-cffi-bindings==21.2.0
arrow==1.3.0
ase==3.22.1
asttokens==3.0.0
astunparse==1.6.3
async-lru==2.0.5
async-timeout==4.0.3
atari-py==0.2.9
attrs==23.2.0
Automat==20.2.0
babel==2.17.0
backoff==2.2.1
bcrypt==3.2.0
beautifulsoup4==4.13.4
bitsandbytes==0.46.1
bleach==4.1.0
blinker==1.9.0
box2d-py==2.3.8
Brotli==1.1.0
CacheControl==0.12.10
cachetools==5.5.2
cachy==0.3.0
certbot==1.21.0
certbot-nginx==1.21.0
certifi==2024.6.2
cffi==1.17.1
chardet==4.0.0
charset-normalizer==3.3.2
cleo==0.8.1
click==8.1.7
clikit==0.6.2
cloud-init==25.1.4
cloudpickle==3.0.0
cmake==3.29.6
cmarkgfm==0.8.0
colorama==0.4.4
colorlog==6.9.0
comm==0.2.2
command-not-found==0.3
ConfigArgParse==1.5.3
configobj==5.0.6
constantly==15.1.0
contourpy==1.3.2
crashtest==0.3.1
cryptography==3.4