In [1]:
import os
import numpy as np
import cv2
from PIL import Image
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch.optim import Adam
import matplotlib.pyplot as plt

In [2]:
TRAIN_DIR = "Data/dogs-vs-cats/train/train/"
TEST_DIR = "Data/dogs-vs-cats/test1/test1/"

In [3]:
def getlables(dirctory):
    traindata = []
    for file in os.listdir(dirctory):
        lable = file.split('.')[0]
         # 1 = Cat, 0 = Dog
        if lable == 'cat':
            lable = [1,0]
        else:
            lable = [0,1]
            
        img = cv2.imread(os.path.join(dirctory, file))
        img = cv2.resize(img, (200,200), cv2.INTER_AREA)
        img = np.array(img)
        img = np.transpose(img, (2,0,1))
        lable = np.array(lable)
        traindata.append([img, lable])
    return traindata
        
train_data = getlables(TRAIN_DIR)
test_data = getlables(TEST_DIR)

In [4]:
class dog_cat_dataset(Dataset):
    def __init__(self, data, label):
        self.data = data
        self.label = label
    def __getitem__(self, index):
        return self.data[index], self.label[index]
    def __len__(self):
        return len(self.data)

In [5]:
# Define a transform to normalize the data
transform = transforms.Compose([
    transforms.Resize(256),                    
    transforms.CenterCrop(224),                
    transforms.ToTensor(),                     
    transforms.Normalize(                      
        mean=[0.485, 0.456, 0.406],                
        std=[0.229, 0.224, 0.225]                  
    )])
trainer=DataLoader(dataset=train_data,batch_size=32,shuffle=True)
tester = DataLoader(dataset=test_data, batch_size=32, shuffle=True)
# img, lal = next(iter(test))

In [6]:
# Build NN 
# Mini network to be part of larger network
class ConvUnit(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ConvUnit, self).__init__()
        self.conv = nn.Conv2d(in_channels=in_channels, kernel_size = 3,
                             out_channels=out_channels, stride=1, padding=1)
        self.bn = nn.BatchNorm2d(num_features = out_channels)
        self.relu = nn.ReLU()
    
    def forward(self, input):
        output = self.conv(input)
        output = self.bn(output)
        output = self.relu(output)
        return output
        


In [7]:
class DogCatNet(nn.Module):
    def __init__(self, num_classes = 2):
        super(DogCatNet, self).__init__()
        
        self.conv1 = ConvUnit(in_channels=3, out_channels=32)
        self.conv2 = ConvUnit(in_channels=32, out_channels=32)
        self.conv3 = ConvUnit(in_channels=32, out_channels=32)
        
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        
        self.conv4 = ConvUnit(in_channels=32, out_channels=64)
        self.conv5 = ConvUnit(in_channels=64, out_channels=64)
        self.conv6 = ConvUnit(in_channels=64, out_channels=64)
        self.conv7 = ConvUnit(in_channels=64, out_channels=64)
        
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        
        self.conv8 = ConvUnit(in_channels=64, out_channels=128)
        self.conv9 = ConvUnit(in_channels=128, out_channels=128)
        self.conv10 = ConvUnit(in_channels=128, out_channels=128)
        self.conv11 = ConvUnit(in_channels=128, out_channels=128)
        
        self.pool3 = nn.MaxPool2d(kernel_size=2)
        
        self.conv12 = ConvUnit(in_channels=128, out_channels=128)
        self.conv13 = ConvUnit(in_channels=128, out_channels=128)
        self.conv14 = ConvUnit(in_channels=128, out_channels=128)
        
        self.avgpool = nn.AvgPool2d(kernel_size=4)
        
        # now add it into a sequntial model
        self.net = nn.Sequential(self.conv1, self.conv2, self.conv3,
                                self.pool1,
                                self.conv4, self.conv5, self.conv6,self.conv7,
                                self.pool2,
                                self.conv8, self.conv9, self.conv10, self.conv11,
                                self.pool3,
                                self.conv12, self.conv13, self.conv14,
                                self.avgpool)
        
        self.fc = nn.Linear(in_features=128, out_features=num_classes)
    def forward(self, input):
        print(input.shape)
        output = self.net(input)
        output = output.view(-1, 128)
        output = self.fc(output)
        return output
        
        
        
        

In [8]:
# Check if gpu support is available
cuda_avail = torch.cuda.is_available()
print(cuda_avail)

False


In [9]:
# create model
model = DogCatNet(num_classes=2)

# If cuda is there
if cuda_avail:
    model.cuda()

opt = Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
loss_function = nn.CrossEntropyLoss()

# Create a learning rate adjustment function that divides the learning rate by 10 every 30 epochs
def adjust_learning_rate(epoch):
    lr = 0.001

    if epoch > 180:
        lr = lr / 1000000
    elif epoch > 150:
        lr = lr / 100000
    elif epoch > 120:
        lr = lr / 10000
    elif epoch > 90:
        lr = lr / 1000
    elif epoch > 60:
        lr = lr / 100
    elif epoch > 30:
        lr = lr / 10

    for param_group in optimizer.param_groups:
        param_group["lr"] = lr

In [10]:
def test():
    model.eval()
    test_acc = 0.0
    for i, (images, lables) in enumerate(tester):
        if cuda_avail:
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
        
        # Predict the class
        outputs = model(images.float())
        _, predictions = torch.max(outputs.data, 1)
        test_acc += torch.sum(predictions == lables.data)
    
    test_acc = test_acc/tester.sampler
    return test_acc

def train(num_epochs):
    best_acc = 0.0
    for epoch in range(num_epochs):
        model.train()
        train_acc = 0.0
        train_loss = 0.0
        for i, (images, lables) in enumerate(trainer):
            if cuda_avail:
                images = Variable(images.cuda())
                labels = Variable(labels.cuda())

            opt.zero_grad()
            outputs = model(images.float())
            loss = loss_function(outputs, lables)
            loss.backward()

            opt.step()
            train_loss += loss.cpu().data[0] * images.size(0)
            _, prediction = torch.max(outputs.data, 1)
            train_acc += torch.sum(prediction == lables.data)

        adjust_learning_rate(epoch)
        train_acc = train_acc/trainer.sampler
        train_loss = train_loss/trainer.sampler
        # evaluate the model
        test_acc = test()
        
        # Print the metrics
        print("Epoch {}, Train Accuracy: {} , TrainLoss: {} , Test Accuracy: {}".format(epoch, train_acc, train_loss,test_acc))
                        
 

In [11]:
train(10)

torch.Size([32, 3, 200, 200])


ValueError: Expected input batch_size (1152) to match target batch_size (32).