In [120]:
import sys
import random
import numpy as np
import cv2
from matplotlib import pyplot as plt
from collections import Counter

In [112]:
# Image paths
transect1 = '../Images/Transect 1 Hi-Res.tiff'
transect1_truth = '../Images/Transect 1 Truth data.tif'

# Load images
img1 = cv2.imread(transect1,cv2.IMREAD_UNCHANGED)
img1_truth = cv2.imread(transect1_truth,cv2.IMREAD_UNCHANGED)
                
# Remap truth data
if transect1_truth == '../Images/Transect 1 Truth data.tif':
    img1_truth[img1_truth == 16] = 0  # Sand
    img1_truth[img1_truth == 160] = 1 # Branching
    img1_truth[img1_truth == 198] = 2 # Mounding
    img1_truth[img1_truth == 38] = 3 # Rock

'''Reformat image into input vector style
Input:
    imgset: set of images, N_images x nrow x ncol x n_channels
    toremove: index of channel dimension to remove
    labels: set of labels, N_images x N_labels (N_labels > 1 when we want to classify a number of pixels per image) 
    num_labels: number of possible labels for the entire dataset (4 for morphology, 2 for coral cover)
Output:
    dataset: set of vectorized images, N_images x (nrow*ncol) x (n_channels-n_toremove)
    labels: set of vectorized labels in the form of logits, N_images x N_labels x num_labels
'''
def reformat(imgset, toremove, labels, num_labels):   
    imgsetcut = imgset
    if toremove is not None:
        imgsetcut = np.delete(imgset,toremove,-1) # Remove specific 3rd dimension of array
    print(imgsetcut.shape)
    dataset = imgsetcut.reshape((-1, imgsetcut.shape[1]*imgsetcut.shape[2],imgsetcut.shape[3])).astype(np.float32)
    labels = np.asarray([[(np.arange(num_labels) == labels[i,j]).astype(np.float32) for j in range(labels.shape[1])] for i in range(dataset.shape[0])])
    return dataset, labels

In [197]:
num_labels = 4
image_size = 25 # side length of one sample image
N_samples = 20000 # number of training samples per class (same as N_images)

# Randomly select points
crop_len = int(np.floor(image_size/2))
img1_truth_crop = img1_truth[crop_len:img1_truth.shape[0]-crop_len, crop_len:img1_truth.shape[1]-crop_len]

train_datasets = []
train_labels = []
for k in range(num_labels):
    [i,j] = np.where(img1_truth_crop == k)
    idx = np.asarray(random.sample(range(len(i)), N_samples)).astype(int)
    train_datasets.append([img1[i[idx[nn]]:i[idx[nn]]+image_size, j[idx[nn]]:j[idx[nn]]+image_size, :] for nn in range(len(idx))])
    train_labels.append([img1_truth_crop[i[idx[nn]], j[idx[nn]]] for nn in range(len(idx))])

train_datasets = np.asarray(train_datasets) # train_datasets is in the format of num_labels x N_samples x nrows x ncols x n_channels
train_labels = np.asarray(train_labels) # train_labels is in the format of num_labels x N_samples

12


In [201]:
# TEST STUFF
print(train_datasets.shape)
print(train_labels.shape)
print(train_labels[1])

(4, 20000, 25, 25, 4)
(4, 20000)
[1 1 1 ..., 1 1 1]


In [113]:
# TEST STUFF
temp = []
temp.append(img1[100:150,100:150,:])
temp.append(img1[101:151,101:151,:])
temp = np.asarray(temp)
templabels = np.asarray([[1],[2]])

print(temp.shape)
print(templabels.shape)
temp2,temp2labels = reformat(temp,3,templabels,4)

print(Counter(img1_truth.flatten()).keys()) # equals to list(set(words))
print(Counter(img1_truth.flatten()).values()) # counts the elements' frequency


(2, 50, 50, 4)
(2, 1)
(2, 50, 50, 3)
dict_keys([0, 2, 1, 3])
dict_values([25298837, 7651431, 7360654, 551068])
