In [1]:
import os
import cv2
#real-time computer vision
import numpy as np
from tqdm import tqdm
#smart progress bar

REBUILD_DATA = True
#flag, we dont want to run preprocessing data every single time we run the code, we just want to run it once

class DogsVSCats():
    IMG_SIZE = 50
    #resize the images to 50 by 50
    CATS = "datasets/PetImages/Cat"
    DOGS = "datasets/PetImages/Dog"
    LABELS = {CATS: 0, DOGS : 1}
    training_data = []
    catcount = 0
    dogcount = 0
    
    def make_training_data(self):
        for label in self.LABELS:
            print(label)
            for f in tqdm(os.listdir(label)):
                try:
                    path = os.path.join(label, f)
                    #joining the path + the name of the file
                    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                    #convert to grayscale
                    img = cv2.resize(img,(self.IMG_SIZE, self.IMG_SIZE))
                    #image resize
                    self.training_data.append([np.array(img), np.eye(2)[self.LABELS[label]]])
                    #combine the images with the one hot vector

                    if label == self.CATS:
                        self.catcount += 1
                    elif label == self.DOGS:
                        self.dogcount += 1
                except Exception as e:
                    pass
                
        np.random.shuffle(self.training_data)
        np.save("training_data.npy", self.training_data)
        print("Cats: ", self.catcount)
        print("Dogs: ", self.dogcount)

if REBUILD_DATA:
    dogsvcats = DogsVSCats()
    dogsvcats.make_training_data()
    
training_data = np.load("training_data.npy", allow_pickle = True)
#print(len(training_data))

datasets/PetImages/Cat


100%|███████████████████████████████████████████████████████████████████████████| 12501/12501 [01:11<00:00, 175.11it/s]


datasets/PetImages/Dog


100%|███████████████████████████████████████████████████████████████████████████| 12501/12501 [01:38<00:00, 127.35it/s]
  arr = np.asanyarray(arr)


Cats:  12476
Dogs:  12470


NameError: name 'torch' is not defined

In [167]:
import torch
import torch.nn as nn 
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super().__init__()
        #2 dimensional data (images)
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, 5),
            #nn.BatchNorm2d(32)
        )
        #input is 1, output is 32, window size 5
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, 5),
            #nn.BatchNorm2d(64)
        )
        
        #self.conv3 = nn.Sequential(
            #nn.Conv2d(64, 128, 5),
            #nn.BatchNorm2d(128)
        #)
        
        x = torch.randn(50,50).view(-1,1,50,50)
        self._to_linear = None
        self.convs(x)
        
        self.fc1 = nn.Linear(self._to_linear ,512)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, 2)
        self.relu = nn.ReLU()
        
    def convs(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2,2))
        #x = F.max_pool2d(F.relu(self.conv3(x)), (2,2))
        #print(x[0].shape)
        
        if self._to_linear is None:
            self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
            print(self._to_linear)
        return x
    
    def forward(self, x):
        x = self.convs(x)
        x = x.view(-1, self._to_linear)
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x
        
net = Net()

5184


In [168]:
device = torch.device("cuda:0")
device

if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("running on the GPU")
else:
    device = torch.device("cpu")
    print("running on the CPU")

net = Net().to(device)

running on the GPU
5184


In [169]:
import torch.optim as optim

optimizer = optim.Adam(net.parameters(), lr = 0.001)
loss_function = nn.MSELoss()

X = torch.Tensor([i[0] for i in training_data]).view(-1,50,50)
X = X/255.0
y = torch.Tensor([i[1] for i in training_data])

VAL_PCT = 0.1
#10 percent test set
val_size = int(len(X)*VAL_PCT)
print(val_size)
#2494

2494


In [170]:
train_X = X[:-val_size]
train_y = y[:-val_size]

test_X = X[-val_size:]
test_y = y[-val_size:]
#negative val size onward

In [192]:
BATCH_SIZE = 100
EPOCHS = 5

for epoch in range(EPOCHS):
    print(loss)
    for i in tqdm(range(0, len(train_X), BATCH_SIZE)):
        
        batch_X = train_X[i:i+BATCH_SIZE].view(-1,1,50,50).to(device)
        batch_y = train_y[i:i+BATCH_SIZE].to(device)
        
        net.zero_grad()
        outputs = net(batch_X)
        loss = loss_function(outputs, batch_y)
        loss.backward()
        optimizer.step()
        
print(loss)        



tensor(0.0367, device='cuda:0', grad_fn=<MseLossBackward>)


100%|████████████████████████████████████████████████████████████████████████████████| 225/225 [00:04<00:00, 48.00it/s]


tensor(0.0437, device='cuda:0', grad_fn=<MseLossBackward>)


100%|████████████████████████████████████████████████████████████████████████████████| 225/225 [00:04<00:00, 48.51it/s]


tensor(0.0385, device='cuda:0', grad_fn=<MseLossBackward>)


100%|████████████████████████████████████████████████████████████████████████████████| 225/225 [00:04<00:00, 48.63it/s]


tensor(0.0469, device='cuda:0', grad_fn=<MseLossBackward>)


100%|████████████████████████████████████████████████████████████████████████████████| 225/225 [00:04<00:00, 48.34it/s]


tensor(0.0368, device='cuda:0', grad_fn=<MseLossBackward>)


100%|████████████████████████████████████████████████████████████████████████████████| 225/225 [00:04<00:00, 48.22it/s]

tensor(0.0480, device='cuda:0', grad_fn=<MseLossBackward>)





In [193]:
correct = 0
total = 0
with torch.no_grad():
    for i in tqdm(range(len(test_X))):
        real_class = torch.argmax(test_y[i].to(device))
        net_out = net(test_X[i].view(-1,1,50,50).to(device))[0]
        predicted_class = torch.argmax(net_out)
        if predicted_class == real_class:
            correct += 1
        total += 1
    
print("Accuracy:", round(correct/total,3))

100%|█████████████████████████████████████████████████████████████████████████████| 2494/2494 [00:04<00:00, 602.40it/s]

Accuracy: 0.792



