# Resnet

The labeled data should be stored according to a specified folder structure as follows:

<img src="assets/FolderStructure.jpg" width="500"/>

We will experiment with training against this dataset using resnet18 resnet34, resnet50 and so forth

The experiments will span pretrained = True/False 
with various hyperparameters.

These models will be trained against data collected from central New Mexico. The waypoint information is NOT provided. These models will achieve accuracies above 90% and do a great job of mapping the central New Mexico Jurassic Morrison formation. 

The real challenge is to SCORE similar geological depositional envirnoments in other parts of the US - for example in Utah.



In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torchvision
from torchvision import *
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import time
import copy
import os
import matplotlib.pyplot as plt
import logging

classes = 3
lr = 3e-4 #3e-4
batch_size = 64  #64
modelName = "resnet34"
logfn = f"logfile{modelName}_Bat{batch_size:03d}_LR{lr:5.0g}"

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO, filename= logfn, filemode="a+",
                        format="%(asctime)-15s %(levelname)-8s %(message)s")
    
n_epochs = 20

# SGD
momentum = .9

# Adam
betas = (0.9, 0.999)   # Adam betas: It is used as a parameter that calculates the averages of the gradient.
weight_decay = 1e-4   # for Adam control regularization

padding = 47

transforms = transforms.Compose(
[
    transforms.Pad(padding),
    transforms.RandomAffine(degrees=360,scale=(.9, 1.1)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

train_dataset = datasets.ImageFolder(root= "data/ThreeClassManualRemove0s/train/", transform=transforms)
test_dataset = datasets.ImageFolder(root= "data/ThreeClassManualRemove0s/val/", transform=transforms)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

def imshow(inp, title=None):
    
    inp = inp.cpu() if device else inp
    inp = inp.numpy().transpose((1, 2, 0))
    
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)
    
if modelName == 'resnet18':
    net = models.resnet18(pretrained=True)
if modelName == 'resnet34':
    net = models.resnet34(pretrained=False)
if modelName == 'resnet50':
    net = models.resnet50(pretrained=False)    

In [None]:
criterion = nn.CrossEntropyLoss()

#optimizer = optim.SGD(net.parameters(), lr= lr, momentum= momentum)
optimizer = optim.Adam(net.parameters(), lr = lr, betas = betas, weight_decay = weight_decay)
logging.info(f"model {modelName}") 
logging.info("optim.Adam")
logging.info("batch_size = {}".format(batch_size))
logging.info("lr = {}".format(lr))
logging.info("betas = {}".format(betas))
logging.info("weight_decay = {}".format(weight_decay))
logging.info("image padding = {}".format(padding))
def accuracy(out, labels):
    _,pred = torch.max(out, dim=1)
    return torch.sum(pred==labels).item()

num_ftrs = net.fc.in_features
# net.fc = nn.Sequential(
#     nn.Dropout(0.25),
#     nn.Linear(num_ftrs, classes)
# )
net.fc = nn.Linear(num_ftrs, classes)
net.fc
# pick up where last one died - code below
#net.load_state_dict(torch.load('resnet-GOLD94Perc_ThreeClass.pt'))
#net.load_state_dict(torch.load(f'{modelName}.pt'))

In [None]:
import time

print_every = 10
valid_loss_min = np.Inf
val_loss = []
val_acc = []
train_loss = []
train_acc = []
total_step = len(train_dataloader)
EpochStart = time.time()
batchPrint = batchPrint =  int(len(train_dataset)/batch_size/5 ) # 8
startEpoch = time.time()
for epoch in range(1, n_epochs+1):
    lastEpoch = time.time()
    running_loss = 0.0
    correct = 0
    total=0
    print(f'Model: {modelName}\t Batch size: {batch_size}\tEpoch: {epoch}\tLearning rate: {lr}\n')
    logging.info(f'Model: {modelName}\t Batch size: {batch_size}\tEpoch: {epoch}\tLearning rate: {lr}\n')
    logging.info('-' * 10)
    lastStep = time.time()
    
    for batch_idx, (data_, target_) in enumerate(train_dataloader):
        data_, target_ = data_.to(device), target_.to(device)
        optimizer.zero_grad()
        outputs = net(data_)
        loss = criterion(outputs, target_)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _,pred = torch.max(outputs, dim=1)
        correct += torch.sum(pred==target_).item()
        total += target_.size(0)
        if (batch_idx) % batchPrint == 0:
            print ('Epoch [{}/{}]\tStep [{}/{}]\tLoss: {:.4f}\tStep Time {:4.1f}' 
                   .format(epoch, n_epochs, batch_idx, total_step, loss.item(), time.time() - lastStep))
            lastStep = time.time()
    train_acc.append(100 * correct / total)
    train_loss.append(running_loss/total_step)
    print(f'train-loss: {np.mean(train_loss):.4f}, train-acc: {(100 * correct/total):.4f}')
    temp = f'train-loss: {np.mean(train_loss):.4f}, train-acc: {(100 * correct/total):.4f}'
    logging.info(temp)     
    batch_loss = 0
    total_t=0
    correct_t=0
    with torch.no_grad():
        net.eval()
        for data_t, target_t in (test_dataloader):
            data_t, target_t = data_t.to(device), target_t.to(device)
            outputs_t = net(data_t)
            loss_t = criterion(outputs_t, target_t)
            batch_loss += loss_t.item()
            _,pred_t = torch.max(outputs_t, dim=1)
            correct_t += torch.sum(pred_t==target_t).item()
            total_t += target_t.size(0)
        val_acc.append(100 * correct_t/total_t)
        val_loss.append(batch_loss/len(test_dataloader))
        network_learned = batch_loss < valid_loss_min
        print(f'validation loss: {np.mean(val_loss):.4f}, validation acc: {(100 * correct_t/total_t):.4f}\n')
        temp = f'validation loss: {np.mean(val_loss):.4f}, validation acc: {(100 * correct_t/total_t):.4f}\n'
        logging.info(temp) 
        if network_learned:
            valid_loss_min = batch_loss
            torch.save(net.state_dict(), f'models/{modelName}.pt')
            torch.save(net, f'models/{modelName}.full.pt')
            print('Improvement-Detected, save-model')
            logging.info('Improvement-Detected, save-model') 
    net.train()
    print(f"Epoch time: {time.time() - lastEpoch:5.1f}")

In [None]:
# dataiter = iter(test_dataloader)
# images, labels = dataiter.next()
# images = images.numpy() 

# fig = plt.figure(figsize=(25,4))
# for idx in np.arange(10):
#     ax = fig.add_subplot(2, 10/2, idx+1, xticks=[], yticks=[])
#     plt.imshow(np.transpose(images[idx], (1,2,0)).astype('uint8'))

In [None]:
fig = plt.figure(figsize=(20,10))
plt.title("Train-Validation Accuracy")
plt.plot(train_acc, label='train')
plt.plot(val_acc, label='validation')
plt.xlabel('num_epochs', fontsize=12)
plt.ylabel('accuracy', fontsize=12)
plt.legend(loc='best')

In [None]:
# for Violence Class
from torchvision import datasets, models, transforms

def scoreSingleImage(ImagePath, model, dataset_classes):
    from PIL import Image
    import torch.nn.functional as F
    from torch.autograd import Variable
    model.eval()
    #model.to(device)
    img = Image.open(ImagePath)    
    x_test = data_transforms['val'](img)[:3]   #3 channels in case png bobc
    x_test.unsqueeze_(0)  # Add batch dimension
    x_test2 = Variable(x_test)
    output = model(x_test)
    class_names = dataset_classes
    predArgmax = torch.argmax(output[0]).numpy()
    confidence = F.softmax(output, dim=0)
    score = []
    score.append( class_names[predArgmax] )
    score.append( float(confidence[0][predArgmax]) )
    return score 

def calc_metrics(tp_rowcol, cm):
    # this works only for col 0, row 0 for now 
    #will troubleshoot other columns later
    # so its works for Violence but i have not generalized the cal to accomdate other row,col as the tp

    tmp = 0
    tp_rowcol = -tp_rowcol
    tmp = np.roll(cm, tp_rowcol, axis=1)
    cm = np.roll(tmp, tp_rowcol, axis=0)  

    L = len(cm)
    tp = cm[0][0]
    fn = sum(cm[0][1:L])
    fp = sum(cm, axis = 0)[0] - tp
    ftn = sum(cm, axis = 0) - cm[0]
    tn = sum(cm[1:L,1:L])
    sensitivity_recall =  tp  / (tp + fn + 0.)
    specificity =  tn / (tn + fp + 0.)
    precision =  tp / (tp + fp + 0.)
    accuracy =  (tp+tn+ 0.)/(tp+fp+fn+tn + 0.)
    f1 = 2.0*precision*sensitivity_recall/(precision+sensitivity_recall)
    return(accuracy, precision, sensitivity_recall, specificity, f1)
def print_metrics(accuracy, precision, sensitivity_recall, specificity, f1):
    print ('accuracy: ', accuracy)
    print ('sensitivity_recall: ',sensitivity_recall)
    print ('specificity: ', specificity)
    print ('precision: ', precision)
    print ('f1: ', f1)
def metricsAsDataframe(accuracy, precision, sensitivity_recall, specificity, f1):
    data = [{'metric': 'accuracy', 'Value': accuracy, 'Description': '(tp+tn)/(tp+fp+fn+tn)'},
             {'metric': 'precision',  'Value': precision, 'Description': 'tp/(tp+fp)' },
             {'metric': 'sensitivity_recall',  'Value': sensitivity_recall, 'Description': 'tp  / (tp + fn)'},
             {'metric': 'specificity',  'Value': specificity,  'Description': 'tn / (tn + fp)'},
            {'metric': 'F1',  'Value': f1,  'Description': '2*precision*recall/(precision+recall)'}]
    dfObj = pd.DataFrame(data, columns=['metric', 'Value', 'Description'])
    return dfObj

input_size = 224
data_dir = "data/ThreeClassManualRemove0s/"

batch_size = 64
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((input_size,input_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((input_size,input_size)),        
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [None]:
my_classes = ['0', '1', '2']
filename = 'data/ThreeClassBalanced5000/train/2/Batwing07.png'
pred = scoreSingleImage(filename, net, my_classes)
print(filename, pred[0])

filename = 'data/ThreeClassBalanced5000/val/0/HFNoBone029.png'
pred = scoreSingleImage(filename, net, my_classes)
print(filename, pred[0])

# Score val folder to print metrics

In [None]:
import json
import glob
import os
from torchvision import datasets, models, transforms
my_classes = ['0', '1', '2']
data_dir = "data/ThreeClassManualRemove0s/"
files = []
class_pred = []
class_true = []
for fl in my_classes:
    path = data_dir + 'val/' + fl + '/'
    for filename in glob.glob(os.path.join(path, '*.png')):
        files.append(filename)
        try: 
            pred = scoreSingleImage(filename, net, my_classes)
            class_pred.append(pred[0])
            class_true.append(filename.split('/')[-2])
            if class_pred[-1] != class_true[-1]:
                print("pred: ", class_pred[-1], "  true: ", class_true[-1], filename)
        except:
            print ("File not compatible (channels)", filename)

In [None]:
%matplotlib inline
import matplotlib as plt
from matplotlib import *
import sys
from pylab import *
from sklearn.metrics import confusion_matrix

y_true = class_true
y_pred = class_pred

myset = set(y_true)
labels = list(myset)

cm = confusion_matrix(y_true, y_pred,  labels= my_classes)
cmd = cm.copy()
print(cm)