In [None]:
import torch
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision.utils import make_grid
import torch.nn as nn
from torchvision import transforms, models, datasets
import matplotlib.pyplot as plt
from google.colab import drive
import torch.optim as optim
import numpy as np
from torch.optim import lr_scheduler
from sklearn.model_selection import train_test_split
from time import time
from copy import deepcopy
from tqdm.notebook import tqdm
import io

%matplotlib inline

from pylab import rcParams
rcParams['figure.figsize'] = 5, 5

In [None]:
!pip install efficientnet_pytorch

In [None]:
!pip install google

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
num_epochs = 25
batch_size = 4
learning_rate = 0.001
seed = 1337
test_size = 0.25
step_size = 10
gamma = 0.1
steps_to_checkpoint = 1
bias = True
test_num = 4

In [None]:
data_dir = "/content/gdrive/MyDrive/Colab Notebooks/data/flowers_classification"
path_to_load_info = "/content/gdrive/MyDrive/Colab Notebooks/pretrained_weights"
info_name = "NNPractice_1.pth"
drive.mount('/content/gdrive')

In [None]:
def train_valid_loader(data_dir, batch_size, random_seed, valid_size=0.1, 
                       shuffle=True, num_workers=2, mean=[0.5, 0.5, 0.5],
                       std=[0.25, 0.25, 0.25], normalize=True):
    normalize = transforms.Normalize(mean=mean, std=std)
    
    transform = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize])
    
    image_dataset = datasets.ImageFolder(data_dir, transform)
    
    len_train = len(image_dataset)
    indices = list(range(len_train))
    split = int(np.floor(valid_size * len_train))
    
    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)
    
    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)
    
    train_loader = torch.utils.data.DataLoader(image_dataset, batch_size=batch_size,
                                sampler=train_sampler, num_workers=num_workers)
    valid_loader = torch.utils.data.DataLoader(image_dataset, batch_size=batch_size,
                                sampler=valid_sampler, num_workers=num_workers)
    return (train_loader, valid_loader, image_dataset)

In [None]:
def imshow(input, title, mean=[0.5, 0.5, 0.5], std=[0.25, 0.25, 0.25]):
    inp = input.numpy().transpose(1,2,0)
    inp = inp * std + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    plt.title(title)
    plt.show()

In [None]:
best_statement = {
    "accuracy": 0.0,
    "epoch": 0,
    "weights": {},
    "criterion": {},
    "optimizer": {},
    "scheduler": {}
}

In [None]:
def copy_info(model, criterion, optimizer, scheduler, accuracy=0.0, epoch=0):
    state = deepcopy(best_statement)
    state["weights"] = deepcopy(model.state_dict())
    state["criterion"] = deepcopy(criterion.state_dict())
    state["optimizer"] = deepcopy(optimizer.state_dict())
    state["scheduler"] = deepcopy(scheduler.state_dict())
    state["accuracy"] = accuracy
    state["epoch"] = epoch
    return state

In [None]:
def save_to_drive(file_name, data_dir, statement):
    path = f"{data_dir}/{file_name}"
    torch.save(statement, path)

In [None]:
def load_from_drive(file_name, data_dir, map_location="gpu"):
    path = f"{data_dir}/{file_name}"
    statement = torch.load(path, map_location=map_location)
    return statement

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    start = time()
    best_statement = copy_info(model, criterion, optimizer, scheduler)
    for epoch in tqdm(range(num_epochs)):
        for phase in tqdm(["train", "validate"]):
            if phase == "train":
                model.train()
            else:
                model.eval()
            current_loss = 0.0
            current_correct = 0
            for inputs, labels in tqdm(train_dataset if phase == "train" else valid_dataset):
                inputs = inputs.to(device)
                labels = labels.to(device)
                with torch.set_grad_enabled(phase=="train"):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    if phase == "train":
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()
                current_loss += loss.item() * inputs.size(0)
                current_correct += torch.sum(preds==labels.data)
            if phase == "train":
                scheduler.step()
            index = 0 if phase == "train" else 1
            epoch_loss = current_loss / (dataset_sizes[index] * batch_size)
            epoch_accuracy = current_correct.double() / (dataset_sizes[index] * batch_size)
            print(f"{phase} loss: {epoch_loss:.4f} accuracy: {epoch_accuracy:.4f}")
            if phase == "val" and epoch_accuracy > best_statement["accuracy"]:
                best_statement = copy_info(model, criterion, optimizer,
                                           scheduler, epoch_accuracy, epoch)
            # saving info
            if (epoch + 1) % steps_to_checkpoint == 0:
                save_to_drive(info_name, path_to_load_info, best_statement)
    time_passed = time() - start
    print(f"Training complete in {time_passed//60}m:{time_passed%60}")
    print(f"Best accuracy is: {best_statement['accuracy']}")
    return best_statement      

In [None]:
train_dataset, valid_dataset, image_dataset = train_valid_loader(data_dir=data_dir,
    batch_size=batch_size, random_seed=seed)

dataset_sizes = [len(x) for x in [train_dataset, valid_dataset]]
class_names = image_dataset.classes

In [None]:
class_names

In [None]:
dataset_sizes

In [None]:
inputs, classes = next(iter(train_dataset))
out = make_grid(inputs)
# what make grid does
imshow(out, title=[class_names[x] for x in classes])
# HOW TO INCREASE SIZE OF IMAGES

In [None]:
model = torchvision.models.vgg16_bn(pretrained=True, progress=True)
for param in model.parameters():
    param.requires_grad = False

In [None]:
# Parameters of newly constructed modules have requires_grad=True by default
num_features = model.fc.in_features
model.fc = nn.Linear(in_features=num_features, out_features=len(class_names), bias=bias)

In [None]:
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=0.1)

In [None]:
model = train_model(model, criterion, optimizer, scheduler, num_epochs)

In [None]:
data = torch.load("/content/gdrive/MyDrive/Colab Notebooks/pretrained_weights/NNPractice_1.pth", map_location='cpu')["weights"]

model.load_state_dict(data)

model.eval()

In [None]:
for num in range(test_num):
    with torch.no_grad():
        inputs, classes = next(iter(train_dataset))
        out = make_grid(inputs)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        print([class_names[x] for x in preds])
        imshow(out, title=[class_names[x] for x in classes])
        print(torch.sum(preds==classes.data).item() / batch_size)