In [6]:
import os
import cv2
import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

REBUILD_DATA = False # Flag to ensure that we don't build data everytime we run the code



In [7]:
class DogsVSCats():
    IMG_SIZE = 50
    CATS = "PetImages/Cat"
    DOGS = "PetImages/Dog"
    TESTING = "PetImages/Testing"
    LABELS = {CATS: 0, DOGS: 1}
    training_data = []

    catcount = 0
    dogcount = 0

    def make_training_data(self):
        for label in self.LABELS:
            print(label)
            for f in tqdm(os.listdir(label)):
                if "jpg" in f:
                    try: # We do this because there is some problem with some of the images
                        path = os.path.join(label, f)
                        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE) # We ask the question, is color (an added data) required to classify cats and dogs? if answer is no, then we try to simplify the model as much as possible
                        img = cv2.resize(img, (self.IMG_SIZE, self.IMG_SIZE)) # We convert every image to 50X50 size
                        self.training_data.append([np.array(img), np.eye(2)[self.LABELS[label]]])  # do something like print(np.eye(2)[1]), just makes one_hot 
                        #print(np.eye(2)[self.LABELS[label]])

                        if label == self.CATS:
                            self.catcount += 1
                        elif label == self.DOGS:
                            self.dogcount += 1

                    except Exception as e:
                        pass
                        #print(label, f, str(e))

        np.random.shuffle(self.training_data)
        np.save("training_data.npy", self.training_data)
        print('Cats:',dogsvcats.catcount) # Pretty sus for what we count but otherwise showing 0 for some reason
        print('Dogs:',dogsvcats.dogcount)



In [8]:
# Now we are ready to load the data
if REBUILD_DATA:
    dogsvcats = DogsVSCats()
    dogsvcats.make_training_data()

training_data = np.load("training_data.npy", allow_pickle=True)
print(training_data[0])

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 5) # inputs, Outputs and Kernel size
        self.conv2 = nn.Conv2d(32, 64, 5)
        self.conv3 = nn.Conv2d(64, 128, 5)
        
        # Now at some point we would want to go to a linear layer. But that is tricky because we don't have a flatening function and we dont know how the dimensions will change
        # self.fc1 = nn.Linear(?, 512) # This is how we need to proceede
        # self.fc2 = nn.Linear(512, 2)
                
        # What we do is just pass random data and check the dimension that should replace the '?'
        x = torch.randn(50,50).view(-1, 1, 50, 50) # 50*50 is the image size, 1 is the input to the conv1
        self._to_linear = None # We will populate this to the dimension that is needed
        self.convs(x) # This convs will serve as forward method but is actually not the forward method
        
        self.fc1 = nn.Linear(self._to_linear, 512)
        self.fc2 = nn.Linear(512, 2)

        
    def convs(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2)) # (2,2) is the shape of pooling layer
        x = F.max_pool2d(F.relu(self.conv2(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2,2))
        
        # print (x[0].shape)
        if self._to_linear == None:
            self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
        return x
    
    def forward(self, x):
        x = self.convs(x) # Instead of writing the 3 pooling layers again, we just write the function # Convolutional Layers
        # Since self._to_linear got populated, it's no longer None
        x = x.view(-1, self._to_linear)
        x = F.relu(self.fc1(x)) # Linear layers
        x = self.fc2(x) # Final linear layer to output cat/dog
        
        return F.softmax(x, dim = 1) # The x is a batch of data, so how will you reference the batches? The 0th dimensions will be all of the batches, so dim = 1 is required to specify the classification points
      
net = Net()

[array([[ 73,  58,  72, ..., 115,  76,  61],
       [ 72,  67,  66, ..., 118, 124,  62],
       [ 66,  65,  66, ..., 130, 154,  68],
       ...,
       [ 84,  93,  62, ...,  65,  47,  52],
       [ 91,  89,  72, ...,  52,  47,  53],
       [ 85,  83,  80, ...,  58,  55,  55]], dtype=uint8)
 array([1., 0.])]


To check if cuda is available for us, we can use the following line

In [9]:
torch.cuda.is_available()

True

In [10]:
device = torch.device("cuda:0")
device

device(type='cuda', index=0)

We can write a short code to set the device if cuda is available:

In [None]:
if torch.cuda.is_available():
  device = torch.device("cuda:0")
  print("Running on the GPU")
else:
  device = torch.device("cpu")
  print("Running on the CPU")

It's easy in Pytorch to assign multiple layers to multiple GPUs and really common taks is like an encoder and decoder network.

In [11]:
torch.cuda.device_count() # To check how many GPUs we actually have

1

We can take our entire neural network to the devise that we just set using. Tensors on the GPU can only interact with the tensors on the GPU, you can't cross interact, you will have to convert and put them exactly where you want them.

In [12]:
net.to(device) # Or net = Net().to(device) whenever we define net

Net(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=512, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=2, bias=True)
)

In [13]:
X = torch.Tensor(np.array([i[0] for i in training_data])).view(-1, 50, 50) # We are making a numpy array of the list because it is extreemely slow to convert a list to tensor (got as a warning)
X = X/255.0 # To scale the values to lie in 0 to 1
y = torch.Tensor(np.array([i[1] for i in training_data])) 

VAL_PCT = 0.1 # Value percent - testing for 10% of our dataset
val_size = int(len(X)*VAL_PCT)

In [14]:
train_X = X[:-val_size]
test_X = X[-val_size:]

train_y = y[:-val_size]
test_y = y[-val_size:]

print(len(train_X))
print(len(test_X))

22452
2494


In [19]:
BATCH_SIZE = 100 # If you get a memory error, always anywhere lower the batch size
EPOCHS = 10

def train(net):
  optimizer = optim.Adam(net.parameters(), lr=0.001)
  loss_function = nn.MSELoss()
  for epoch in range(EPOCHS):
      for i in tqdm(range(0, len(train_X), BATCH_SIZE)):
          # print(i, i+BATCH_SIZE)
          batch_X = train_X[i:i+BATCH_SIZE].view(-1,1,50,50).to(device) # We add .to(device) here to take the data to the GPU
          batch_y = train_y[i:i+BATCH_SIZE].to(device) # We add .to(device) here to take the data to the GPU
          
          # Now we plan to do a fitnet, and whenever we do that, we need to zero the gradients
          # You can use model.zero_grad (our model is net) or optimizer.zero_grad, the difference between the two is based on the parameters that optimizer controls, in this case it controls all the parameters, no there's no difference
          # There might be a case where we have 2 models with different optimizers on each side. We have to choose the way to do it based on the case
          net.zero_grad() # Is the safest way to do
          outputs = net(batch_X)
          loss = loss_function(outputs, batch_y)
          loss.backward()
          optimizer.step()

      print(f"Epoch: {epoch}. Loss: {loss}")

def test(net):
  correct = 0
  total = 0

  with torch.no_grad():
      for i in tqdm(range(len(test_X))):
          real_class = torch.argmax(test_y[i]).to(device) # We send the testing to the device as well
          net_out = net(test_X[i].view(-1, 1, 50, 50).to(device))[0] # We need test_X to be on the device as well
          predicted_class = torch.argmax(net_out)
          if predicted_class == real_class:
              correct += 1
          total += 1
          
  accuracy = correct/total
  print("")
  print("Accuracy: ", round(accuracy, 3))


train(net)
test(net)

100%|██████████| 225/225 [00:01<00:00, 133.78it/s]


Epoch: 0. Loss: 0.16294144093990326


100%|██████████| 225/225 [00:01<00:00, 141.59it/s]


Epoch: 1. Loss: 0.13670778274536133


100%|██████████| 225/225 [00:01<00:00, 141.66it/s]


Epoch: 2. Loss: 0.09709067642688751


100%|██████████| 225/225 [00:01<00:00, 141.53it/s]


Epoch: 3. Loss: 0.07227069139480591


100%|██████████| 225/225 [00:01<00:00, 141.13it/s]


Epoch: 4. Loss: 0.07734041661024094


100%|██████████| 225/225 [00:01<00:00, 141.39it/s]


Epoch: 5. Loss: 0.0645008236169815


100%|██████████| 225/225 [00:01<00:00, 141.24it/s]


Epoch: 6. Loss: 0.05797215923666954


100%|██████████| 225/225 [00:01<00:00, 141.33it/s]


Epoch: 7. Loss: 0.06194337457418442


100%|██████████| 225/225 [00:01<00:00, 141.43it/s]


Epoch: 8. Loss: 0.04426567628979683


100%|██████████| 225/225 [00:01<00:00, 141.00it/s]


Epoch: 9. Loss: 0.052392859011888504


100%|██████████| 2494/2494 [00:01<00:00, 1779.64it/s]

Accuracy:  0.736





100%|██████████| 2494/2494 [00:01<00:00, 1730.57it/s]

Accuracy:  0.633



