<a href="https://colab.research.google.com/github/Allan-Perez/AIMisc/blob/master/terra_Nova.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Terra Nova

## <a name="Content">Contents</a>
* [Setup](#setup)
  * [Environment](#env)
  * [Load Configurations](#assembly)
* [DataSet](#DataSet)
* [Pre-Processing](#pre)
* [Architectures](#Arch)


## <a name="setup">Setup</a>
[Return to Top](#Content)

Import appropriate libraries and configure visualisation

### <a name="env">Environment</a>

In [1]:
# Standard imports
import numpy as np
import cv2
import pandas as pd
import random
import matplotlib.pyplot as plt
import matplotlib as mpl
import sys
import os
from os import path
from queue import *

## ML imports
import skimage.io, scipy.ndimage, scipy.interpolate, scipy.signal
import skimage.morphology, skimage.transform, skimage.feature
%matplotlib inline


mpl.rcParams['figure.figsize'] = (12.0, 6.0)
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms, models
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

random_seed = 0
use_cuda = not False and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
learning_rate=1e-5

# Allow read/write to Drive
from google.colab import drive
drive.mount('/content/gdrive/', force_remount=True)
root_dir = "/content/gdrive/Shared drives/GU Orbit"
base_dir = "/content/gdrive/Shared drives/GU Orbit"
sys.path.insert(0, os.path.join(base_dir,"scripts"))

from torch_assembly import *

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive/


## <a name="assembly">Load Configurations</a>
[Return to Top](#Content)<br/>
**Options**: Specifies which hyper-parameters may be changed. Maps hyper-parameter name to option (type or list)<br />
**Configuration**: Specific set of hyper-parameter values. Maps hyper-parameter name to value (function or primitive)


In [3]:
# ToDo: Ignore any configurations that have already been trained (check models dir)
# place
q = Queue()
options = Options("terranova",filePath=path.join(base_dir,"options","options_terranova.pickle"))
Config(options=options)
for file_name in os.listdir(path.join(base_dir,"configurations")):
   print(f"Loading Configuration: {file_name}")
   q.put(Config(file_path=path.join(base_dir,"configurations",file_name)))
# q.get()
config = q.get().as_dict() #Drop as_dict()?
print(config)

<class 'dict'> 0
Setting Options for all Config Instances
Loading Configuration: configuration_default.pickle
Loading configuration from configuration_default.pickle
configured True <class 'bool'>
loss torch.nn.modules.loss.BCEWithLogitsLoss <class 'str'>
activation torch.nn.modules.activation.Sigmoid <class 'str'>
optimizer torch.optim.Adam <class 'str'>
no_epochs 20 <class 'int'>
batch_size 5 <class 'int'>
shuffle True <class 'bool'>
architecture torchvision.models.segmentation.fcn_resnet101 <class 'str'>
dropout_probability 0.5 <class 'float'>
learning_rate 1e-05 <class 'float'>
{'configured': (True, None), 'loss': ('torch.nn.modules.loss.BCEWithLogitsLoss', {}), 'activation': ('torch.nn.modules.activation.Sigmoid', {}), 'optimizer': ('torch.optim.Adam', {'lr': 1e-05}), 'no_epochs': (20, None), 'batch_size': (5, None), 'shuffle': (True, None), 'architecture': ('torchvision.models.segmentation.fcn_resnet101', {'lr': 1e-05}), 'dropout_probability': (0.5, None), 'learning_rate': (1e-05

## <a name="DataSet">Dataset</a>
[Return to Top](#Content)

Reference DataSet Configuration
* 18 Unique SceneIDs in DataSet
* No. of Patches is NOT consistent between Scenes
* PatchID + SceneID == UniqueID


In [0]:
# train_patches = pd.read_csv(base_dir + "/data/train/training_patches_38-Cloud.csv",
#                             sep="_LC08_",names=["Patch","SceneID"],header=None,skiprows=[0]).iloc[0:8401]
# train_patches.head()

In [0]:
# print("Number of Unique SceneIDs:\n{}".format(len(train_patches.SceneID.unique())))

# print("\nPatch Count:")
# patch_counts = train_patches.groupby(['SceneID'], as_index=False).count()
# print(patch_counts.head(18))

# print("Sum:{}".format(patch_counts.sum(axis=0)))

In [0]:
class CloudDataset(Dataset):

  def __init__(self, datatype = "train", ids=None,transforms=None):
    
    self.datatype = datatype;

    if datatype == 'test':
        self.data_folder = base_dir+"/data/test"
    else:
        self.data_folder = base_dir+"/data/train"
    self.ids = ids # patchidx + "_" + sceneidx
    self.transforms = transforms 
        
  def __getitem__(self, ID):
    # Given SceneIDX - a string
    # Find all Channel Files (RGB) + NIR (Near-infrared)
    # Combine Channels into 1 Tensor (4,W,H)
    if type(ID) in [int,np.int32,np.int64]: # Allow indexing
      ID = self.ids[ID]
    print(ID,type(ID))
    channels = []
    for color in ["red","green","blue"]:#,"nir"]:
      #print(self.data_folder+"/"+color+"/"+color + "_" + ID+".TIF")
      channels.append(skimage.io.imread(self.data_folder+"/"+color+"/"+color+"_"+ID+".TIF"))
    img = np.stack(channels)
    #img = img.astype(np.int32)
    #img = img.astype(np.int64)
    img = img.astype(np.float32)

    ground_folder = self.data_folder + "/"+ "gt"
    #label = skimage.io.imread(ground_folder+"/"+"gt_"+ID+".TIF")
    label = np.stack([skimage.io.imread(ground_folder+"/"+ ("gt_" if self.datatype == "train" else "edited_corrected_gts")+ID+".TIF")])
    label = label.astype(np.float32)
    #label.astype(np.float32)

    if self.transforms is not None:
        img = self.transforms(img)

    return (img,label)

  def __len__(self):
    return len(os.listdir(self.data_folder+"/red/"))

## <a name="pre">Pre-Processing</a>
[Return to Top](#Content)

In [7]:
for color in ["red","green","blue","nir"]:
  train_ids = os.listdir(base_dir+"/data/train/"+color)
  train_ids = [ID.strip(color+"_")[:-4] for ID in train_ids]
  print("Training Set Size {}: {}".format(color,len(train_ids)))
'''
# it's too big. Just want to try tensorboard. Not necessary rn.
for color in ["red","green","blue","nir"]:
  test_ids = os.listdir(base_dir+"/data/test/"+color)
  test_ids = [ID.strip(color+"_")[:-4] for ID in test_ids]
  print("Testing Set Size {}: {}".format(color,len(test_ids)))
'''

Training Set Size red: 8400
Training Set Size green: 8400
Training Set Size blue: 8400
Training Set Size nir: 8400


'\nfor color in ["red","green","blue","nir"]:\n  test_ids = os.listdir(base_dir+"/data/test/"+color)\n  test_ids = [ID.strip(color+"_")[:-4] for ID in test_ids]\n  print("Testing Set Size {}: {}".format(color,len(test_ids)))\n'

In [10]:
normalize = transforms.Compose([transforms.Resize(256),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),
                                transforms.Normalize(mean = [0.485, 0.456, 0.406], 
                                            std = [0.229, 0.224, 0.225])                                           
])

trainset = CloudDataset(datatype="train",ids=train_ids,transforms=normalize)

train_loader = torch.utils.data.DataLoader(trainset,
                                           batch_size=config.get("batch_size",(5,None))[0],
                                           shuffle=config.get("shuffle",(True,None))[0])

'''
testset = CloudDataset(datatype="test",ids=test_ids,transforms=normalize)

test_loader = torch.utils.data.DataLoader(testset,
                                          batch_size=config.get("batch_size",(5,None))[0],
                                          shuffle=config.get("shuffle",(True,None))[0])
'''

'\ntestset = CloudDataset(datatype="test",ids=test_ids,transforms=normalize)\n\ntest_loader = torch.utils.data.DataLoader(testset,\n                                          batch_size=config.get("batch_size",(5,None))[0],\n                                          shuffle=config.get("shuffle",(True,None))[0])\n'

## <a name="Arch">Architectures</a>

### Load Architecture from Configuration

In [11]:
config.get("architecture")

('torchvision.models.segmentation.fcn_resnet101', {'lr': 1e-05})

In [12]:
architecture = config.get("architecture",("torchvision.models.segmentation.fcn_resnet101",None))[0]
print(f"Using {architecture} architecture")
try:
  net = eval(f"{architecture}(pretrained=False,num_classes=1)")#models.segmentation.fcn_resnet101(pretrained=False,num_classes=1)
  print(f"Loaded {architecture} with pretrained=False and num_classes=1")
except:
  net = eval(f"{architecture}(pretrained=False")
  print("Loaded architecture does not support num_classes parameter, override the input & output layers")

Using torchvision.models.segmentation.fcn_resnet101 architecture


Downloading: "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth" to /root/.cache/torch/checkpoints/resnet101-5d3b4d8f.pth


HBox(children=(IntProgress(value=0, max=178728960), HTML(value='')))


Loaded torchvision.models.segmentation.fcn_resnet101 with pretrained=False and num_classes=1


**Input**: \\
**3-channel RGB Images** of shape (N,3,H,W).

*Where N is the Number of Images, H and W are $>=224px$*

Normalization:
* $\mu$ = \[0.485,0.456,0.406\]
* $\sigma$ = \[0.229,0.224,0.225\]

**Output**: \\
OrderedDict with 2 Tensors of W and H as Input Tensor but with 2 classes (Cloud or No Cloud)


## Load Previously Saved Model


## Training


In [0]:
def decode_segmap(image, classes=1):
   
  label_colors = np.array([(255, 255, 255)])
 
  r = np.zeros_like(image).astype(np.uint8)
  g = np.zeros_like(image).astype(np.uint8)
  b = np.zeros_like(image).astype(np.uint8)
   
  for l in range(0, classes):
    idx = image == l
    r[idx] = label_colors[l, 0]
    g[idx] = label_colors[l, 1]
    b[idx] = label_colors[l, 2]
  try:
    rgb = np.stack([r, g, b], axis=2)
  except Exception as e:
    print("Axis Error r:{}g:{}b:{}".format(r.shape,g.shape,b.shape))
    return np.full((224,224,3),0)
  finally:
    passtorch.nn.MSELoss
  return rgb

def test(model, device, test_loader, loss_func=F.binary_cross_entropy_with_logits):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)['out']
            # ToDo: Substitute loss
            test_loss = loss_func(output, target, reduction='sum').item() # sum up batch loss
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
            pixel_map = torch.argmax(output.squeeze(), dim=0).detach().cpu().numpy()
            rgb_img = decode_segmap(pixel_map,classes=1)
            plt.imshow(rgb_img); plt.show()

    
    # test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}\n'.format(
        test_loss))

In [0]:
def train(model, device, train_loader, epoch, criterion, optimizer=torch.optim.Adam):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)['out']
        #print(output.dtype,target.dtype,output.shape,target.shape)
        loss = criterion(output,target)
        #loss = loss_func(output,target)
        #loss = F.poisson_nll_loss(output, target)
        #loss = F.mse_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
    return data,output
        
device = "cuda"     
model = net.to(device)
optimizer, kwargs = config.get('optimizer',('torch.optim.Adam',{}))        
optimizer = eval(f"{optimizer}(model.parameters(),**{kwargs})")
loss_func, kwargs = config.get('loss',('F.binary_cross_entropy_with_logits',{}))   
print(f"Kwargs:{kwargs}")   
criterion = eval(f"{loss_func}(**{kwargs})")
print("Loss Function: {}, Optimizer: {}".format(config.get('loss','F.binary_cross_entropy_with_logits'),config.get("optimizer","torch.optim.Adam")))

for epoch in range(1, config.get('no_epochs',(20,None))[0]):
  train(model, device, train_loader, epoch, criterion, optimizer=optimizer)

In [15]:
## ATTEMPT TO IMPLEMENT TENSORBOARD
from torch.utils.tensorboard import SummaryWriter

def train(model, device, train_loader, epoch, criterion, optimizer=torch.optim.Adam, local_writer=None):
    model.train()
      
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)['out']
        #print(output.dtype,target.dtype,output.shape,target.shape)
        loss = criterion(output,target)
        #loss = loss_func(output,target)
        #loss = F.poisson_nll_loss(output, target)
        #loss = F.mse_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            local_writer.add_scalar('Loss/train', loss.item(), epoch*batch_idx)
    return data,output

writer = SummaryWriter()
device = "cuda"
model = net.to(device)
optimizer, kwargs = config.get('optimizer',('torch.optim.Adam',{}))        
optimizer = eval(f"{optimizer}(model.parameters(),**{kwargs})")
loss_func, kwargs = config.get('loss',('F.binary_cross_entropy_with_logits',{}))   
print(f"Kwargs:{kwargs}")   
criterion = eval(f"{loss_func}(**{kwargs})")
print("Loss Function: {}, Optimizer: {}".format(config.get('loss','F.binary_cross_entropy_with_logits'),config.get("optimizer","torch.optim.Adam")))

for epoch in range(1, config.get('no_epochs',(2,None))[0]):
  train(model, device, train_loader, epoch, criterion, optimizer=optimizer, local_writer=writer)

writer.close()

Kwargs:{}
Loss Function: ('torch.nn.modules.loss.BCEWithLogitsLoss', {}), Optimizer: ('torch.optim.Adam', {'lr': 1e-05})
patch_34_2_by_12_LC08_L1TP_064017_20160420_20170223_01_T1 <class 'str'>


TypeError: ignored

In [0]:
tensorboard --logdir=runs

In [0]:
config.Options

In [0]:
config.get("batch_size")

(5, None)

# Do not run the following code


In [0]:
# this is for the loss evaluation
import matplotlib.pyplot as plt
import numpy as np

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    max_loss=float("-inf")
    min_loss=float("inf")
    total=0
    n=0
    for batch_idx, (data, target) in enumerate(train_loader):
        n+=1
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)['out']
        #print(output.dtype,target.dtype,output.shape,target.shape)
        loss = F.binary_cross_entropy_with_logits(output,target)
        if max_loss < loss:
            max_loss = loss
        if min_loss > loss:
            min_loss = loss
        total += loss
        n += 1
        #loss = F.poisson_nll_loss(output, target)
        #loss = F.mse_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
    average_loss = total/n
    loss_eval = (average_loss, max_loss, min_loss)
    return data,output,loss_eval


            
#device = "cpu"     
model = resnet.to(device)        
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  
#train(model, device, train_loader, optimizer, 24)

# 1. Read loss_eval of each element
# 2. Extract the average, max, min
# 3. Plot in Graph

n_epochs = 7
x = np.array([x for x in range(1,n_epochs)])
losses_average = np.zeros(n_epochs)
losses_max = np.zeros(n_epochs)
losses_min = np.zeros(n_epochs)
for epoch in range(1, 7):
    out = train(model, device, train_loader, optimizer, epoch)
    loss_eval = out[2]
    losses_average[epoch] = loss_eval[0]
    losses_max[epoch] = loss_eval[1]
    losses_min[epoch] = loss_eval[2]

def graph_the_losses():
    plt.plot(x, losses_min, label = "minimum")
    plt.plot(x, losses_average, label = "average")
    plt.plot(x, losses_max, label = "maximum")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legeng()
    plt.show()

graph_the_losses()

In [0]:
test(model,device,test_loader)

In [0]:
torch.save(model.state_dict(),base_dir+"/models/model7.pth")

In [0]:
print(max_loss)

# Archive

**Test** with Pre-Trained Model (unrelated to cloud segmentation)

Load Image 

Resize Image to be 256x256, then Centre Crop to get 224x224. Finally normalize the Tensor values.

Check size of output

Extract 2D image where each pixel corresponds to a different class.

Map Class to Colour in 2D image

View Final Segmented Output