# Importing Libraries

In [1]:
import numpy as np 
import pandas as pd

import zipfile
import os
import shutil

import random


import torch
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder
import torchvision.transforms as T
from torchvision.models import resnet18 ,ResNet18_Weights
from torch import nn 
from torch.optim import SGD ,lr_scheduler
import torch.cuda

import time
from torch.autograd import Variable

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.device(device)

device(type='cuda')

# Data Preparation 

In [3]:
# setting file paths
train_zip_path = "/kaggle/input/dogs-vs-cats/train.zip"
train_images_path = "/kaggle/working/training/"

# creating training directory
os.makedirs(train_images_path, exist_ok=True)

# unzipping files to train folder

with zipfile.ZipFile(train_zip_path) as zip_f:
    zip_f.extractall(train_images_path)
    
#creating folder for Training 
os.makedirs("/kaggle/working/training/Train/", exist_ok=True)
#creating folder for Validation 
os.makedirs("/kaggle/working/training/Valid/", exist_ok=True)

In [4]:
images = os.listdir("training/train")
L = len(images)
# creating directories for each class
os.makedirs("training/Train/cat", exist_ok=True)
os.makedirs("training/Train/dog", exist_ok=True)



for i, file in enumerate(images):
    # getting category
    dir_name = file.split(".")[0].lower()
    
    # copying to correct folder
    os.rename(f"/kaggle/working/training/train/{file}",
              f"/kaggle/working/training/Train/{dir_name}/{file}")

In [5]:
#deleting the old train folder
shutil.rmtree("/kaggle/working/training/train")

In [6]:
#creating dog & cat folders in Valid folder
os.makedirs("training/Valid/cat", exist_ok=True)
os.makedirs("training/Valid/dog", exist_ok=True)

In [7]:
source_dog = 'training/Train/dog'
source_cat = 'training/Train/cat'
dest_dog = 'training/Valid/dog'
dest_cat = 'training/Valid/cat'
files_dog = os.listdir(source_dog)
files_cat = os.listdir(source_cat)
no_of_files_dog = int(len(files_dog) // 5)
no_of_files_cat = int(len(files_dog) // 5)

for file_name in random.sample(files_dog, no_of_files_dog):
    shutil.move(os.path.join(source_dog, file_name), dest_dog)
    
for file_name in random.sample(files_cat, no_of_files_cat):
    shutil.move(os.path.join(source_cat, file_name), dest_cat)

In [8]:
simple_transform = T.Compose([T.Resize((224,224)) , T.ToTensor(), 
                              T.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])])

In [9]:
train_image = ImageFolder("/kaggle/working/training/Train/",simple_transform)
valid_image = ImageFolder("/kaggle/working/training/Valid/",simple_transform)

train_image.class_to_idx

{'cat': 0, 'dog': 1}

In [10]:
train_image

Dataset ImageFolder
    Number of datapoints: 20000
    Root location: /kaggle/working/training/Train/
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=warn)
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [11]:
train_dataloader = torch.utils.data.DataLoader(
    train_image,batch_size = 32 ,num_workers = 2, shuffle = True)

valid_dataloader = torch.utils.data.DataLoader(
    valid_image,batch_size = 32 ,num_workers = 2, shuffle = False)

dataloaders = {'training':train_dataloader,'validation':valid_dataloader}
dataset_sizes = {'training':len(train_dataloader.dataset),'validation':len(valid_dataloader.dataset)}
print(dataset_sizes)

{'training': 20000, 'validation': 5000}


In [12]:
torch.cuda.is_available()

True

# Model Creating

In [13]:
model = resnet18(ResNet18_Weights)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features,2)

if device:
    model = model.cuda()

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 83.7MB/s]


In [14]:
#loss and optimizer
LR = 0.001

criterion = nn.CrossEntropyLoss()

optimizer = SGD(model.parameters(), lr=LR, momentum=0.9)
exp_LR_scheduler = lr_scheduler.StepLR(optimizer,step_size=7,gamma=0.1) 
#StepLR: function helps in dynamically changing the learningrate.

# Training the model

In [15]:
def train_model(model,criterion,optimizer,scheduler,num_epochs=6):
    since = time.time()
    
    best_model_weights = model.state_dict()
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        print("epoch {}/{}".format(epoch+1,num_epochs))
        print("*" * 10)
        
        #training and validation for each epoch
        for x in ['training','validation']:
            if x == 'training':
                model.train()
            else: 
                model.eval()
            
            running_loss = 0.0
            running_corrects = 0
            
            #iterating over data
            for data in dataloaders[x]:
                # get the inputs
                inputs, labels = data

                # wrap them in Variable
                if torch.cuda.is_available():
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)
                    
                # zero the parameter gradients
                optimizer.zero_grad()
                
                # forward
                outputs = model(inputs)

                _, preds = torch.max(outputs.data, 1)
                
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if x == 'training':
                    loss.backward()
                    optimizer.step()
                    
                
                # statistics
                running_loss += loss.data
                running_corrects += torch.sum(preds == labels.data)
                

            epoch_loss = running_loss / dataset_sizes[x]
            epoch_acc = running_corrects / dataset_sizes[x]
            
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(x, epoch_loss, epoch_acc))

            # deep copy the model
            if x == 'validation' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_weights = model.state_dict()

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_weights)
    return model

In [16]:
model = train_model(model, criterion, optimizer, exp_LR_scheduler, num_epochs=6)

epoch 1/6
**********
training Loss: 0.0024 Acc: 0.9695
validation Loss: 0.0012 Acc: 0.9872

epoch 2/6
**********
training Loss: 0.0010 Acc: 0.9883
validation Loss: 0.0012 Acc: 0.9876

epoch 3/6
**********
training Loss: 0.0006 Acc: 0.9936
validation Loss: 0.0012 Acc: 0.9876

epoch 4/6
**********
training Loss: 0.0004 Acc: 0.9961
validation Loss: 0.0012 Acc: 0.9892

epoch 5/6
**********
training Loss: 0.0002 Acc: 0.9978
validation Loss: 0.0012 Acc: 0.9882

epoch 6/6
**********
training Loss: 0.0002 Acc: 0.9982
validation Loss: 0.0012 Acc: 0.9900

Training complete in 8m 55s
Best val Acc: 0.990000
