In [1]:
#I used this file for multiple data manipulations including scaling, binarization, and augmentation--code snippets for each of these tasks are included

from google.colab import drive
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import glob
import cv2 as cv
import os
drive.mount('/content/drive/')
from scipy.ndimage import gaussian_filter
from osgeo import gdal

Mounted at /content/drive/


Method for Augmentation

In [2]:
def data_augmentation(X_train,Y_train):
    #mirror data
    X_train_flipped = np.flip(X_train,axis=1)
    Y_train_flipped = np.flip(Y_train,axis=1)
    #rotate original data by 90,180,270
    X_train_rot_90 = np.rot90(X_train,k=1,axes=(1,2))
    Y_train_rot_90 = np.rot90(Y_train,k=1,axes=(1,2))
    X_train_rot_180 = np.rot90(X_train,k=2,axes=(1,2))
    Y_train_rot_180 = np.rot90(Y_train,k=2,axes=(1,2))
    X_train_rot_270 = np.rot90(X_train,k=3,axes=(1,2))
    Y_train_rot_270 = np.rot90(Y_train,k=3,axes=(1,2))
    #rotate mirrored data by 90,180,270
    X_train_flipped_rot_90 = np.rot90(X_train_flipped,k=1,axes=(1,2))
    Y_train_flipped_rot_90 = np.rot90(Y_train_flipped,k=1,axes=(1,2))
    X_train_flipped_rot_180 = np.rot90(X_train_flipped,k=2,axes=(1,2))
    Y_train_flipped_rot_180 = np.rot90(Y_train_flipped,k=2,axes=(1,2))
    X_train_flipped_rot_270 = np.rot90(X_train_flipped,k=3,axes=(1,2))
    Y_train_flipped_rot_270 = np.rot90(Y_train_flipped,k=3,axes=(1,2))
    X = np.concatenate((X_train,X_train_rot_90,X_train_rot_180,X_train_rot_270,X_train_flipped,X_train_flipped_rot_90,X_train_flipped_rot_180,X_train_flipped_rot_270))
    Y = np.concatenate((Y_train,Y_train_rot_90,Y_train_rot_180,Y_train_rot_270,Y_train_flipped,Y_train_flipped_rot_90,Y_train_flipped_rot_180,Y_train_flipped_rot_270))
    #shuffle
    stacked = np.concatenate((X,Y),axis=-1)
    shuffled = tf.random.shuffle(stacked)
    X = shuffled[:,:,:,0:3]
    Y = shuffled[:,:,:,3:5]
    return X,Y

Code for Binarization

In [None]:
#load outputs
atlantic = glob.glob("/content/drive/My Drive/atlantic_train/*_output.npy")
pacific = glob.glob("/content/drive/My Drive/pacific_train/*_output.npy")
indian = glob.glob("/content/drive/My Drive/indian_train/*_output.npy")
training_data = atlantic+pacific+indian

In [None]:
#binarize outputs
for img in training_data:
  output = np.load(img)
  chl = (abs(gaussian_filter(output[:,:,0],sigma=3))>0.005).astype(np.int8)
  sst = (abs(gaussian_filter(output[:,:,1],sigma=3))>0.1).astype(np.int8)

  output = np.stack([chl,sst], axis=2)

  np.save('/content/drive/My Drive/binary/' + img.split("/")[-1][:-4] + '.npy', output)

  plt.imsave('/content/drive/My Drive/binary/' + img.split("/")[-1][:-4] + '_chl.png', chl)
  plt.imsave('/content/drive/My Drive/binary/' + img.split("/")[-1][:-4] + '_sst.png', sst)

Some Code for Scaling

In [None]:
#load inputs
atlantic = glob.glob("/content/drive/My Drive/atlantic_train/*_input.npy")
pacific = glob.glob("/content/drive/My Drive/pacific_train/*_input.npy")
indian = glob.glob("/content/drive/My Drive/indian_train/*_input.npy")
training_data = atlantic+pacific+indian

In [None]:
b1 = []
b2 = []
b3 = []
b4 = []
b10 = []

for input in training_data:
  img = np.load(input)
  b1.append(img[:,:,0].flatten())
  b2.append(img[:,:,1].flatten())
  b3.append(img[:,:,2].flatten())
  b4.append(img[:,:,3].flatten())
  b10.append(img[:,:,4].flatten())

In [None]:
#calculate metrics
print("B1: ", np.mean(b1), np.min(b1), np.max(b1))
print("B2: ", np.mean(b2), np.min(b2), np.max(b2))
print("B3: ", np.mean(b3), np.min(b3), np.max(b3))
print("B4: ", np.mean(b4), np.min(b4), np.max(b4))
print("B10: ", np.mean(b10), np.min(b10), np.max(b10))

B1:  73.8655378887719 62.231609502711784 125.88908410358262
B2:  66.2964271222989 56.528149110075624 126.63707890917424
B3:  54.88118509206154 48.15216560996104 130.15109617294323
B4:  49.26527626205458 44.08494385455657 137.01550683675808
B10:  195.6448890329182 139.5166144679551 231.23947750362845


In [None]:
#scale data
new_b1 = (b1-np.min(b1))/(np.max(b1)-np.min(b1))*255
new_b2 = (b2-np.min(b2))/(np.max(b2)-np.min(b2))*255
new_b3 = (b3-np.min(b3))/(np.max(b3)-np.min(b3))*255
new_b4 = (b4-np.min(b4))/(np.max(b4)-np.min(b4))*255
new_b10 = (b10-np.min(b10))/(np.max(b10)-np.min(b10))*255

print("B1: ", np.mean(new_b1), np.min(new_b1), np.max(new_b1))
print("B2: ", np.mean(new_b2), np.min(new_b2), np.max(new_b2))
print("B3: ", np.mean(new_b3), np.min(new_b3), np.max(new_b3))
print("B4: ", np.mean(new_b4), np.min(new_b4), np.max(new_b4))
print("B10: ", np.mean(new_b10), np.min(new_b10), np.max(new_b10))

B1:  46.603352662764216 0.0 255.0
B2:  35.529152994558395 0.0 255.0
B3:  20.925882278643744 0.0 255.0
B4:  14.214750470897139 0.0 255.0
B10:  156.04299233985975 0.0 255.0
