In [None]:
import numpy as np
import pandas as pd
import os
import torch
import torchvision
import torch.nn as nn
from torchvision import transforms
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import ImageFolder
from torch import optim
from PIL import Image

In [None]:
from google.colab import drive
drive.mount('/content/drive')
!7z x /content/drive/MyDrive/Colab\ Notebooks/Data\ Science/Learning/Artificial\ Neural\ Networks/PyTorch/dogs-vs-cats.zip
!7z x /content/train.zip
!7z x /content/test1.zip 

In [None]:
import glob
path = '/content/dogs-vs-cats'
os.mkdir(path)
os.rename('/content/train', os.path.join(path, 'train'))
os.rename('/content/test1', os.path.join(path, 'test1'))
os.mkdir(os.path.join(path, 'valid'))
files = glob.glob(os.path.join(path, 'train/*.jpg'))
no_of_images = len(files)
shuffle = np.random.permutation(no_of_images)
valid_size = int(3*int(no_of_images / 10))
for i in ['train', 'valid']:
  for folder in ['dog', 'cat']:
    os.mkdir(os.path.join(path, i, folder))
for i in shuffle[:valid_size]:
  image = files[i].split('/')[-1]
  folder = image.split('.')[0]
  os.rename(files[i], os.path.join(path, 'valid', folder, image))
for i in shuffle[valid_size:]:
  image = files[i].split('/')[-1]
  folder = image.split('.')[0]
  os.rename(files[i], os.path.join(path, 'train', folder, image))

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3),
])
valid_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])
train_data = ImageFolder('/content/dogs-vs-cats/train', transform = train_transform)
valid_data = ImageFolder('/content/dogs-vs-cats/valid', transform = valid_transform)

In [None]:
class ImageLoader(Dataset):
  def __init__(self, dataset, transform = None):
    self.dataset = self.checkChannel(dataset)
    self.transform = transform
  def __len__(self):
    return len(self.dataset)
  def __getitem__(self, idx):
    image = Image.open(self.dataset[idx][0])
    classCategory = self.dataset[idx][1]
    if self.transform:
      image = self.transform(image)
    return image, classCategory
  def checkChannel(self, dataset):
    datasetRGB = []
    for index in range(len(dataset)):
      if (Image.open(dataset[index][0]).getbands() == ('R', 'G', 'B')):
        datasetRGB.append(dataset[index])
    return datasetRGB

In [None]:
train_loader = DataLoader(train_data, batch_size = 64, shuffle = True)
valid_loader = DataLoader(valid_data, batch_size = 64, shuffle = True)

In [None]:
from tqdm import tqdm
from torchvision.models import resnet50
model = resnet50(pretrained = True)

for param in model.parameters():
  param.requires_grad = False

num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)
model.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.01)

def train(num_epoch, model, train_loader):
  for epoch in range(0, num_epoch):
    current_loss = 0.0
    current_corrects = 0
    losses = []
    model.train()
    loop = tqdm(enumerate(train_loader), total = len(train_loader))
    for batch_idx, (data, target) in loop:
      data = data.to(device = device)
      target = target.to(device = device)
      scores = model(data)

      loss = criterion(scores, target)
      optimizer.zero_grad()
      losses.append(loss)
      loss.backward()
      optimizer.step()
      _, preds = torch.max(scores, 1)
      current_loss += loss.item()
      current_corrects += (preds == target).sum().item()
      accuracy = int(current_corrects / len(train_loader.dataset) * 100)
      loop.set_description(f'Epoch {epoch + 1} / {num_epoch} process: {int((batch_idx / len(train_loader)) * 100)}')
      loop.set_postfix(loss = loss.data.item())
    print(f'Epoch: {epoch} Loss: {current_loss / len(train_loader.dataset)}. Accuracy: {current_corrects / len(train_loader.dataset)}')
    torch.save({
          'model_state_dict' : model.state_dict(),
          'optimizer_state_dict' : optimizer.state_dict(),
    }, 'checkpoint_epoch'+str(epoch) +'.pt')


In [None]:
def test(model, valid_loader):
  model.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for x, y in valid_loader:
      x = x.to(device)
      y = y.to(device)
      output = model(x)
      _, predictions = torch.max(output, 1)
      correct += (predictions == y).sum().item()
      test_loss += criterion(output, y)
  test_loss /= len(valid_loader.dataset) 
  print('Average Loss:', test_loss, 'Accuracy:', correct, '/'), len(valid_loader.dataset), ' ', int(correct / len(valid_loader.dataset) * 100 ), '%'

In [None]:
train(5, model, train_loader)
test(model, valid_loader)

In [None]:
#loading checkpoint
checkpoint = torch.load('/content/checkpoint_epoch4.pt')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

In [None]:
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

path = '/content/dogs-vs-cats/test1'
files = glob.glob(os.path.join(path, '*.jpg'))
os.mkdir(os.path.join(path, 'test'))

for i in range(0, len(files)):
  image = files[i].split('/')[-1]
  os.rename(files[i], os.path.join(path, 'test', image))

test_dataset = ImageFolder('/content/dogs-vs-cats/test1', transform = test_transform)
test_loader = DataLoader(test_dataset, batch_size = 64, shuffle = False)

In [None]:
def predict(model, test_loader):
  with torch.no_grad():
    model.eval()
    for data, target in test_loader:
      data, target = data.to(device), target.to(device)
      output = model(data)
      _, pred = torch.max(output, 1)
      print(f'predict: {pred[0]}')

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.01)

def train(num_epoch, model, train_loader):
  for epoch in range(0, num_epoch):
    losses = []
    current_loss = 0.0
    current_accuracy = 0.0
    model.train()
    loop = tqdm(enumerate(train_loader), total = len(train_loader))
    for batch_idx, (data, target) in loop:
      data, target = data.to(device), target.to(device)
      output = model(data)
      optimizer.zero_grad()
      loss = criterion(output, target)
      _, preds = torch.max(output, 1)
      model.backward()
      optimizer.step()
      losses.append(loss)
      current_loss += loss.item()
      current_accuracy += (preds == target).sum().item()
      loop.set_description(f'Epoch: {epoch + 1} / {num_epoch} process: {int((batch_idx / len(train_loader)) * 100)}')
      loop.set_postfix(loss = loss.data.item())
    print(f'Epoch: {epoch} Loss: {current_loss / len(train_loader.dataset)}. Accuracy: {current_accuracy / len(train_loader.dataset)}')
    torch.save({
        'model_state_dict' : model.state_dict()
        'optimizer_state_dict' : optimizer.state_dict()
    }, 'checkpoint_epoch' + str(epoch) + '.pt')


def valid(num_epoch, model, valid_loader):
  model.eval()
  loss = 0
  correct = 0
  with torch.no_grad:
    for data, target in valid_loader:
      data, target = data.to(device), target.to(device)
      output = model(data)
      _, preds = torch.max(output, 1)
      correct += (preds == target).sum().item()
  loss = criterion(output, target)

checkpoint = torch.load('/content/checkpoint_epoch0.pt')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])