<a href="https://colab.research.google.com/github/Rosie-Brigham/Art-Genie/blob/master/model_1c.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Imports
import torch
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
import torch.optim as optim # For all Optimization algorithms, SGD, Adam, etc.
import torchvision.transforms as transforms # Transformations we can perform on our dataset
import torchvision
from torch.optim import lr_scheduler
import time
import copy

import os
import pandas as pd
from skimage import io
from torch.utils.data import Dataset, DataLoader # Gives easier dataset managment and creates mini batches
from PIL import Image

In [None]:
!git clone https://github.com/monumentalconservation/training-set-circle-6.git
import os
os.getcwd()

labels = pd.read_csv(r'training-set-circle-6/image-list.csv')
test_labels = pd.read_csv(r'training-set-circle-6/testing-image-list.csv')

train_path = r'training-set-circle-6/images/'
test_path = r'training-set-circle-6/testing-images/'

Cloning into 'training-set-circle-6'...
remote: Enumerating objects: 68, done.[K
remote: Counting objects: 100% (68/68), done.[K
remote: Compressing objects: 100% (58/58), done.[K
remote: Total 176 (delta 11), reused 66 (delta 10), pack-reused 108[K
Receiving objects: 100% (176/176), 152.16 MiB | 12.30 MiB/s, done.
Resolving deltas: 100% (11/11), done.


In [None]:
def quantify_rainfall(row):
  if row['none'] == 1:
    val = 0
  elif row['some'] == 1:
    val = 1
  elif row['a lot'] == 1:
    val = 1
  elif row['substantial'] == 1:
    val = 2
  elif row['extensive'] == 1:
    val = 2
  else:
    return 
  return val

In [None]:
# Create nice datasets to pass into dataloader

labels['class'] = labels.apply(quantify_rainfall, axis=1)
labels = labels.drop(['none',	'some',	'a lot',	'substantial',	'extensive'], axis=1)

test_labels['class'] = test_labels.apply(quantify_rainfall, axis=1)
test_labels = test_labels.drop(['none',	'some',	'a lot',	'substantial',	'extensive'], axis=1)


Thirs attempt at classification - following this tutorial: https://www.youtube.com/watch?v=ZoZHd0Zm3RY&ab_channel=AladdinPersson

In [None]:
# Dataloader
class DampDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.annotations = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_path = os.path.join(self.root_dir, str(self.annotations.iloc[index, 0]) + '.jpg')
        image = io.imread(img_path)
        y_label = torch.tensor(int(self.annotations.iloc[index, 1]))
        if self.transform:
            image = self.transform(image)

        return (image, y_label)


In [None]:

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Transforms
damp_transform = transforms.Compose(
    [transforms.ToPILImage(),
     transforms.Resize((256,256)), # needed as tuple because images in dataset are different
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


# Hyperparameters
# in_channel = 3
# num_classes = 5
# learning_rate = 1e-3
# batch_size = 32
# num_epochs = 10

num_epochs = 15;
batch_size = 10;
learning_rate = 0.0005;
num_classes = 3

# Load Data
dataset = DampDataset(dataframe = labels, root_dir = 'training-set-circle-6/images/', transform = damp_transform)
test_dataset = DampDataset(dataframe = test_labels, root_dir = 'training-set-circle-6/testing-images/', transform = damp_transform)

dataset_sizes = {'train': len(dataset), 'val':len(test_dataset)}

In [None]:

# Create the dataloaders
train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

dataloaders = {'train':train_loader, 'val':test_loader}
print(dataloaders)

{'train': <torch.utils.data.dataloader.DataLoader object at 0x7f47b8ed5e50>, 'val': <torch.utils.data.dataloader.DataLoader object at 0x7f47b8ed5bd0>}


In [None]:
# This inherits from AlexNet not CNN - not sure what the difference is, find that out
class AlexNet(nn.Module):
    def __init__(self, num_classes=5):
        super(AlexNet, self).__init__()
    
        self.features = nn.Sequential(nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
                                      nn.ReLU(inplace=True),
                                      nn.MaxPool2d(kernel_size=3, stride=2),
                                      nn.Conv2d(64, 192, kernel_size=5, padding=2),
                                      nn.ReLU(inplace=True),
                                      nn.MaxPool2d(kernel_size=3, stride=2),
                                      nn.Conv2d(192, 384, kernel_size=3, padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(384, 256, kernel_size=3, padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(256, 256, kernel_size=3, padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.MaxPool2d(kernel_size=3, stride=2),)
    
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        
        self.classifier = nn.Sequential(nn.Dropout(),
                                        nn.Linear(256 * 6 * 6, 4096),
                                        nn.ReLU(inplace=True),
                                        nn.Dropout(),
                                        nn.Linear(4096, 4096),
                                        nn.ReLU(inplace=True),
                                        nn.Linear(4096, num_classes),)
    
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        return x


In [None]:
#instance of the Conv Net
cnn = AlexNet()
cnn.cuda()

# Model
model = torchvision.models.googlenet(pretrained=True)
model.to(device)

#loss function and optimizer
criterion = nn.CrossEntropyLoss();
optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate);
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /root/.cache/torch/hub/checkpoints/googlenet-1378be20.pth


HBox(children=(FloatProgress(value=0.0, max=52147035.0), HTML(value='')))




In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
   

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                # inputs = inputs.to(device)
                # labels = labels.to(device)
                inputs, labels = inputs.cuda(), labels.cuda() 
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_epoch = epoch
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    print(f'Best epoch: {best_epoch}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    path = F"/content/drive/MyDrive/PhD/models/model_five/best_model" 
    torch.save(model.state_dict(), path)
    print(f"saved {best_epoch}")
    return model

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
trained_model = train_model(model, criterion, optimizer, exp_lr_scheduler, num_epochs=50)

Epoch 0/49
----------
train Loss: 7.4244 Acc: 0.0000
val Loss: 7.7987 Acc: 0.0000

Epoch 1/49
----------
train Loss: 7.4440 Acc: 0.0000
val Loss: 7.8637 Acc: 0.0000

Epoch 2/49
----------
train Loss: 7.5109 Acc: 0.0000
val Loss: 7.8151 Acc: 0.0000

Epoch 3/49
----------
train Loss: 7.5202 Acc: 0.0000
val Loss: 7.7609 Acc: 0.0000

Epoch 4/49
----------
train Loss: 7.4961 Acc: 0.0000
val Loss: 7.7694 Acc: 0.0000

Epoch 5/49
----------
train Loss: 7.5330 Acc: 0.0000
val Loss: 7.7561 Acc: 0.0000

Epoch 6/49
----------
train Loss: 7.3918 Acc: 0.0000
val Loss: 7.7625 Acc: 0.0000

Epoch 7/49
----------
train Loss: 7.5223 Acc: 0.0160
val Loss: 7.7535 Acc: 0.0000

Epoch 8/49
----------
train Loss: 7.4873 Acc: 0.0000
val Loss: 7.7422 Acc: 0.0000

Epoch 9/49
----------
train Loss: 7.4477 Acc: 0.0000
val Loss: 7.7380 Acc: 0.0000

Epoch 10/49
----------
train Loss: 7.5761 Acc: 0.0000
val Loss: 7.7868 Acc: 0.0000

Epoch 11/49
----------
train Loss: 7.5701 Acc: 0.0000
val Loss: 7.7624 Acc: 0.0000

Ep

UnboundLocalError: ignored

In [None]:
# from torch.autograd import Variable


# losses = [];
# for epoch in range(num_epochs):
#     for i, (images, labels) in enumerate(train_loader):
#         # images, labels = images.cuda(), labels.cuda() - #UNCOMMENT when training with googlenet!
#         images = images.float()
#         # print(images.shape)
#         # Forward + Backward + Optimize
#         optimizer.zero_grad()
#         outputs = cnn(images)
#         loss = criterion(outputs, labels)
#         loss.backward()
#         optimizer.step()
        
#         losses.append(loss.item());
        
#         if (i+1) % 10 == 0:
#             print ('Epoch : %d/%d, Iter : %d/%d,  Loss: %.4f' 
#                    %(epoch+1, num_epochs, i+1, len(dataset)//batch_size, loss.item()))

In [None]:


#Saving model to drive
# model_save_name = 'alexnet-15e-b10.pt'
# path = F"/content/drive/MyDrive/PhD/models/model_four/{model_save_name}" 
# torch.save(model.state_dict(), path)


In [None]:
# LOAD PREVIOUSLY TRAINED MODEL HERE
# path = "/content/drive/MyDrive/PhD/models/model_four/[].pt" 
# model.load_state_dict(torch.load(path))
# model.eval()

In [None]:
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Check accuracy on training to see how good our model is
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()
    
    with torch.no_grad():
        for x, y in loader:
            x = x.cuda()
            y = y.cuda()
            model.cuda()
            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
        
        print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}') 
    
    model.train()

print("Checking accuracy on Training Set")
check_accuracy(train_loader, model)

print("Checking accuracy on Test Set")
check_accuracy(test_loader, model)

In [None]:
# test-the-model
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in train_loader:
        images = images.cuda()
        labels = labels.cuda()
        outputs = cnn(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
          
    print('Train Accuracy of the model: {} %'.format(100 * correct / total))

# Save 
# torch.save(model.state_dict(), 'model.ckpt')

From here, these snippets are taken from this colab: https://colab.research.google.com/github/pytorch/tutorials/blob/gh-pages/_downloads/17a7c7cb80916fcdf921097825a0f562/cifar10_tutorial.ipynb#scrollTo=TzrMwxulq03d

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# see if this works...

classes = ('none', 'some', 'a lot', 'substantial', 'extensive')

dataiter = iter(test_loader)
images, labels = dataiter.next()

def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.figure(figsize = [20,20])
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# print images
imshow(torchvision.utils.make_grid(images, nrow=5))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(10)))

In [None]:
class_correct = list(0. for i in range(5))
class_total = list(0. for i in range(5))
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images = images.cuda()
        labels = labels.cuda()
        outputs = cnn(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(5):
    # print(class_total[i])
    print('Accuracy of %5s : %2d %%' % (classes[i], float(100) * class_correct[i] / class_total[i]))