<a href="https://colab.research.google.com/github/Huxwell/ColabNNs/blob/main/cats_n_dogs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch.nn as nn
import torch
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import zipfile
import glob
from tqdm import tqdm
from PIL import Image
import random # do random.seed(13) before every shuffle. order of shuffle() execution changes results order.

In [None]:
!cp -rv runs

Notes:
- I have no labels for test here, so I am dropping 'test.zip' related code. I can split train into train, val, test; in fact I don't want to have a lot of examples for train set.
- The sets are almost balanced, accuracy is ok here

In [None]:
!nvidia-smi

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(13)
if device =='cuda':
    torch.cuda.manual_seed_all(13)

In [None]:
dataset_dir = '/content/drive/MyDrive/cats_n_dogs_unsure/dogs-vs-cats-redux-kernels-edition'
os.listdir(dataset_dir)

In [None]:
os.makedirs('local_data/', exist_ok=True)
train_dir = 'local_data/train'

In [None]:
with zipfile.ZipFile(os.path.join(dataset_dir, 'train.zip')) as train_zip:
    train_zip.extractall('local_data')

In [None]:
os.listdir(train_dir)[:5]

In [None]:
cats_list = sorted(glob.glob(os.path.join(train_dir,'cat*.jpg')))
dogs_list = sorted(glob.glob(os.path.join(train_dir,'dog*.jpg')))
print(len(cats_list))
print(len(dogs_list))
random.seed(13)
random.shuffle(cats_list)
random.seed(13) #multiple seed() executions are on purpose.
random.shuffle(dogs_list)
print(cats_list[:3])
print(dogs_list[:3])
# some sanity check to make sure no uncontrolled randomness beyond this point
assert cats_list[2] == 'local_data/train/cat.801.jpg'
assert dogs_list[2] == 'local_data/train/dog.801.jpg'
assert len(cats_list) == len(dogs_list)


In [None]:
full_train_list = cats_list[:7500] + dogs_list[:7500]
subset_train_list = cats_list[:250] + dogs_list[:250]
val_list = cats_list[7500:10000] + dogs_list[7500:10000]
test_list = cats_list[10000:] + dogs_list[10000:]
print(len(full_train_list), len(val_list), len(test_list))

random.seed(13)
random.shuffle(full_train_list)
random.seed(13)
random.shuffle(subset_train_list)
random.seed(13)
random.shuffle(val_list)
random.seed(13)
random.shuffle(test_list)
print(full_train_list[:9])
print(subset_train_list[:9])
print(val_list[:9])
print(test_list[:9])
assert full_train_list[4] == 'local_data/train/cat.1612.jpg'
assert subset_train_list[4] == 'local_data/train/cat.1787.jpg'
assert val_list[4] == 'local_data/train/dog.12023.jpg'
assert test_list[4] == 'local_data/train/dog.6485.jpg'

In [None]:
train_list = subset_train_list # 5 batches

In [None]:
np.random.seed(13131313)
random_idx = np.random.randint(1,len(train_list),size=10)
print(random_idx)
fig = plt.figure(figsize=(20., 10.))

for i, img_idx in enumerate(random_idx):
    ax = fig.add_subplot(2,5,i+1)
    img = Image.open(train_list[img_idx])
    plt.imshow(img)
    i+=1

plt.axis('off')
plt.show()

In [None]:
train_list[0].split('/')[-1].split('.')[0]

# Data Augmentation

In [None]:
train_transforms =  transforms.Compose([
  transforms.Resize((224, 224)),
  transforms.RandomResizedCrop(224),
  transforms.RandomHorizontalFlip(),
  transforms.ToTensor(),
])

val_transforms = transforms.Compose([
  transforms.Resize((224, 224)),
  transforms.RandomResizedCrop(224),
  transforms.RandomHorizontalFlip(),
  transforms.ToTensor(),
])

test_transforms = transforms.Compose([   
  transforms.Resize((224, 224)),
  transforms.RandomResizedCrop(224),
  transforms.RandomHorizontalFlip(),
  transforms.ToTensor()
])

In [None]:
class dataset(torch.utils.data.Dataset):
    def __init__(self,file_list,transform=None):
        self.file_list = file_list
        self.transform = transform
        
    #dataset length
    def __len__(self):
        self.filelength = len(self.file_list)
        return self.filelength
    
    def __getitem__(self,idx):
        img_path = self.file_list[idx]
        img = Image.open(img_path)
        img_transformed = self.transform(img)
        
        label = img_path.split('/')[-1].split('.')[0]
        if label == 'dog':
            label=1
        elif label == 'cat':
            label=0
            
        return img_transformed,label

In [None]:
train_data = dataset(train_list, transform=train_transforms)
val_data = dataset(val_list, transform=test_transforms)
test_data = dataset(val_list, transform=test_transforms)

In [None]:
batch_size = 100 # we will use mini-batch method

In [None]:
train_loader = torch.utils.data.DataLoader(dataset = train_data, batch_size=batch_size, shuffle=False )
val_loader = torch.utils.data.DataLoader(dataset = val_data, batch_size=batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(dataset = val_data, batch_size=batch_size, shuffle=False)

In [None]:
print(len(train_data), len(train_loader), len(test_loader))

In [None]:
print(sum(1 for filename in train_data.file_list if 'cat' in filename))
print(sum(1 for filename in train_data.file_list if 'dog' in filename))
print(sum(1 for filename in val_data.file_list if 'cat' in filename))
print(sum(1 for filename in val_data.file_list if 'dog' in filename))
print(sum(1 for filename in test_data.file_list if 'cat' in filename))
print(sum(1 for filename in test_data.file_list if 'dog' in filename))

In [None]:
#check our images shape
train_data[0][0].shape

In [None]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)

In [None]:
model = model.to(device)
model.train()

In [None]:
optimizer = optim.Adam(params = model.parameters(),lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
!mv -v runs/ temp_runs_backup/

In [None]:
writer = SummaryWriter()
epochs = 10 #10
min_val_loss = float('inf')
epochs_since_min_loss = 0
patience = 7

for epoch in range(epochs):
    print("\n","="*30,"\n")
    epoch_loss = 0
    epoch_accuracy = 0
    
    with tqdm(train_loader, unit="iteration") as tepoch:
      
      for data, label in tepoch:
          tepoch.set_description(f"Training epoch {epoch}")
          data = data.to(device)
          label = label.to(device)
          
          output = model(data)
          loss = criterion(output, label)
          
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()
          
          acc = ((output.argmax(dim=1) == label).float().mean())
          epoch_accuracy += acc/len(train_loader)
          epoch_loss += loss/len(train_loader)
          tepoch.set_postfix(loss=loss.item(), accuracy=100. * epoch_accuracy)
      print('Epoch : {}, train accuracy : {}, train loss : {}'.format(epoch, epoch_accuracy,epoch_loss))

    
      with torch.no_grad():
          epoch_val_accuracy=0
          epoch_val_loss =0
          with tqdm(val_loader, unit="iteration") as tqdm_wrapped_valid_loader:
            tepoch.set_description(f"Validation after epoch {epoch}")
            for data, label in tqdm_wrapped_valid_loader:
                data = data.to(device)
                label = label.to(device)
                
                val_output = model(data)
                val_loss = criterion(val_output,label)
                
                
                acc = ((val_output.argmax(dim=1) == label).float().mean())
                epoch_val_accuracy += acc/ len(val_loader)
                epoch_val_loss += val_loss/ len(val_loader)
                tqdm_wrapped_valid_loader.set_postfix(epoch_val_accuracy=epoch_val_accuracy, val_loss=val_loss)
              
          print('Epoch : {}, val_accuracy : {}, val_loss : {}'.format(epoch, epoch_val_accuracy,epoch_val_loss))
    writer.add_scalar("train_accuracy", epoch_accuracy, epoch)
    writer.add_scalar("val_accuracy", epoch_val_accuracy, epoch)
    writer.add_scalar("train_loss_avg_iteration", epoch_loss, epoch)
    writer.add_scalar("val_loss_avg_iteration", epoch_val_loss, epoch)
    scalar_val_loss = epoch_val_loss.item()
    if scalar_val_loss < min_val_loss:
      min_val_loss = scalar_val_loss 
      epochs_since_min_loss = 0 
      print("New best model, min_val_loss:", min_val_loss)
    else:
      epochs_since_min_loss+=1
      print("epochs_since_min_loss",epochs_since_min_loss)
    if epochs_since_min_loss > patience:
      print("Early stopping.")
      break
    

writer.flush()
writer.close()

In [None]:
dog_probs = []
model.eval()
i=0
with torch.no_grad():
    for data, fileid in val_loader:
        i+=1
        if i>10:
          break
        data = data.to(device)
        preds = model(data)
        print(preds)
        preds_list = F.softmax(preds, dim=1)[:, 1].tolist() #https://stats.stackexchange.com/questions/542054/why-does-torchvision-models-resnet18-not-use-softmax
        dog_probs += list(zip(list(fileid), preds_list))