## Data Preparation

To access our data in Colab

In [None]:
#import colab package and mount you accounts associated google drive
from google.colab import drive
drive.mount('/content/drive')

%cd /content/drive/MyDrive/Data

Set up the python environment

Install and load all the necessary packages and libraries

In [None]:
#Install all packages
%pip install opencv-python tifffile scikit-image rasterio scipy imagecodecs

In [None]:
import cv2
import os
import tifffile
import numpy as np
import matplotlib.pyplot as plt
import glob
from skimage.transform import resize
import rasterio
from rasterio.plot import show
import scipy
import imagecodecs

Load the scenes and masks into colab




In [None]:
#Load the scenes
top_train_list = glob.glob('./scenes/*.tif')
top_train_list = np.sort(top_train_list)
print(top_train_list)

In [None]:
#Load validation scenes
top_test_list = glob.glob('./val/*.tif')
top_test_list = np.sort(top_test_list)
print(top_test_list)

Lets see how our data looks

In [None]:
#Let us visualize one of the images
src = rasterio.open('./scenes/T19XEG_20210926T180019_B01.tif')
img0 = show(src.read(1),transform=src.transform, cmap='gray')

Reshaping parameters and directories

In [None]:
#creating empty arrays to store our data
imgResize = 512
channels = 3

# Define the directory where you want to save the individual images
save_dir_train = './train_images/'
save_dir_test = './test_images/'

# Create directories if they don't exist
os.makedirs(save_dir_train, exist_ok=True)
os.makedirs(save_dir_test, exist_ok=True)


Save the reshaped files in directory

In [None]:
for i in range(len(top_train_list)):
    img0 = tifffile.imread(top_train_list[i])  # Read the image
    img_reshaped = resize(img0, (imgResize, imgResize, channels))  # Resize it

    # Local normalization & standardization of the image values
    img_norm = np.clip((img_reshaped - img_reshaped.mean()) / (0.5 * img_reshaped.std()), 0, 1)

    # Save the individual reshaped image as TIFF
    save_path = os.path.join(save_dir_train, f'image_{i}.tif')
    tifffile.imwrite(save_path, img_norm)

In [None]:
for i in range(len(top_test_list)):
    img0 = tifffile.imread(top_test_list[i])  # Read the image
    img_reshaped = resize(img0, (imgResize, imgResize, channels))  # Resize it

    # Local normalization & standardization of the image values
    img_norm = np.clip((img_reshaped - img_reshaped.mean()) / (0.5 * img_reshaped.std()), 0, 1)

    # Save the individual reshaped image as TIFF
    save_path = os.path.join(save_dir_test, f'image_{i}.tif')
    tifffile.imwrite(save_path, img_norm)

Let us process the labels

In [None]:
label_train_list = glob.glob('./masks/*.tif')
label_train_list = np.sort(label_train_list)
print(label_train_list)

In [None]:
label_test_list = glob.glob('./val/masks/*.tif')
label_test_list = np.sort(label_test_list)
print(label_test_list)

In [None]:
#Lets view one of the labels
img = tifffile.imread(label_train_list[1])
plt.imshow(img, cmap='Blues')



Let's start reshaping our labels.

In [None]:
# ! pip install imagecodecs

In [None]:
# Define the directory where you want to save the individual label images
label_save_dir_train = './train_label_images/'
label_save_dir_test = './test_label_images/'

# Create directories if they don't exist
os.makedirs(label_save_dir_train, exist_ok=True)
os.makedirs(label_save_dir_test, exist_ok=True)

Reshape the Labels

In [None]:
import tifffile
label_train_total = np.zeros((len(label_train_list), imgResize, imgResize, 1))
for i in range(len(label_train_list)):
    img = tifffile.imread(label_train_list[i])
    #again in our case resizing is not necessary but reshaping
    img_reshaped = resize(img, (imgResize, imgResize, 1))
    label_train_total[i] = img_reshaped

In [None]:
label_test_total = np.zeros((len(label_test_list), imgResize, imgResize, 1))
for i in range(len(label_test_list)):
    img = tifffile.imread(label_test_list[i])
    img_reshaped = resize(img, (imgResize, imgResize, 1))
    label_test_total[i] = img_reshaped

TASK 3: What are the label values and dimensions before and after reshaping? And why do we need to one-hot-encode our images?

Let us encode our Label images

In [None]:
onehot_label_train_total = np.zeros((len(label_train_list),imgResize,imgResize,2), dtype=float)
for k in range(len(label_train_list)):
    for i in range(imgResize):
        for j in range(imgResize):
            # ocean
            if label_train_total[k,i,j,0]==0.:
                onehot_label_train_total[k,i,j,0]=1
            # ice sheet
            elif label_train_total[k,i,j,0]>0:
                onehot_label_train_total[k,i,j,1]=1

In [None]:
onehot_label_test_total = np.zeros((len(label_test_list),imgResize,imgResize,2), dtype=float)
for k in range(len(label_test_list)):
    for i in range(imgResize):
        for j in range(imgResize):
            # ocean
            if label_test_total[k,i,j,0]==0.:
                onehot_label_test_total[k,i,j,0]=1
            # ice sheet
            elif label_test_total[k,i,j,0]> 0:
                onehot_label_test_total[k,i,j,1]=1

Let's check how our one-hot-encoded labels look like

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10,10))
axes[0].imshow(onehot_label_test_total[2,:,:,0], cmap='gray')
axes[1].imshow(onehot_label_test_total[2,:,:,1], cmap='gray')
print(np.unique(onehot_label_train_total))

Let's save our Labels

In [None]:
np.save('./label_train.npy', onehot_label_train_total)
np.save('./label_test.npy', onehot_label_test_total)