In [1]:
import torch
from torch.utils import data
from torchvision import transforms
import numpy as np
import datetime
import matplotlib.pyplot as plt
from models.stclassifier import dLtae
from models.ltae import LTAE
from learning.focal_loss import FocalLoss
import torchnet as tnt
from learning.metrics import mIou, confusion_matrix_analysis
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score 

from utils import *
# from learn_curve import plot_curve
import json
import os
import time
import random

In [2]:
# file_ = "../../../results/ltae/results/2019/Seed_0/seed_0_batchsize_2048_epochs_15_factor_5266_trainlog.json"
# plot_curve(file_, "")

In [3]:
from dataset import SITSData

In [4]:
def recursive_todevice(x, device):
    if isinstance(x, torch.Tensor):
        return x.to(device)
    else:
        return [recursive_todevice(c, device) for c in x]

In [19]:
def evaluation(model, criterion, loader, device, config, mode='val'):
    y_true = []
    y_pred = []

    acc_meter = tnt.meter.ClassErrorMeter(accuracy=True)
    loss_meter = tnt.meter.AverageValueMeter()

    for (x, y) in loader:
        start_time = time.time()
        y_true.extend(list(map(int, y)))
        x = recursive_todevice(x, device)
        y = y.to(device)

        with torch.no_grad():
            prediction = model(x)
            loss = criterion(prediction, y)

        acc_meter.add(prediction, y)
        loss_meter.add(loss.item())

        y_p = prediction.argmax(dim=1).cpu().numpy()
        y_pred.extend(list(y_p))
        
        print("evaluation iteration completed in {:.4f} seconds".format(time.time() - start_time))
    metrics = {'{}_accuracy'.format(mode): acc_meter.value()[0],
               '{}_loss'.format(mode): loss_meter.value()[0],
               '{}_IoU'.format(mode): mIou(y_true, y_pred, config['num_classes'])}

    if mode == 'val':
        return metrics
    elif mode == 'test':
        return metrics, confusion_matrix(y_true, y_pred, labels=list(range(config['num_classes']))), f1_score(y_true, y_pred, average='weighted')

In [6]:
# 2019
m = '../../../results/ltae/results/2019/Seed_0/model.pth.tar'
state_dict = torch.load(m)['state_dict']
c = "../../../results/ltae/results/2019/Seed_0/conf.json"
config = json.load(open(c))

In [6]:
# 2018
m = '../../../results/ltae/results/2018/Seed_0/model.pth.tar'
state_dict = torch.load(m)['state_dict']
c = "../../../results/ltae/results/2018/Seed_0/conf.json"
config = json.load(open(c))

In [7]:
# c = "../../../results/ltae/results/2019/Seed_0/conf.json"
# config = json.load(open(c))

In [7]:
# call the model
model = dLtae(in_channels = config['in_channels'], n_head = config['n_head'], d_k= config['d_k'], n_neurons=config['n_neurons'], dropout=config['dropout'], d_model= config['d_model'],
                 mlp = config['mlp4'], T =config['T'], len_max_seq = config['len_max_seq'], 
              positions=None, return_att=False)

In [8]:
device = "cuda"

In [9]:
model = model.to(device)
model = model.double()
model.load_state_dict(state_dict)
    
model.eval()

dLtae(
  (temporal_encoder): LTAE(
    (inconv): Sequential(
      (0): Conv1d(10, 256, kernel_size=(1,), stride=(1,))
      (1): LayerNorm((256, 33), eps=1e-05, elementwise_affine=True)
    )
    (position_enc): Embedding(34, 256)
    (inlayernorm): LayerNorm((10,), eps=1e-05, elementwise_affine=True)
    (outlayernorm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    (attention_heads): MultiHeadAttention(
      (fc1_k): Linear(in_features=256, out_features=128, bias=True)
      (attention): ScaledDotProductAttention(
        (dropout): Dropout(p=0.1, inplace=False)
        (softmax): Softmax(dim=2)
      )
    )
    (mlp): Sequential(
      (0): Linear(in_features=256, out_features=128, bias=True)
      (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (dropout): Dropout(p=0.2, inplace=False)
  )
  (decoder): Sequential(
    (0): Linear(in_features=128, out_features=64, bias=True)
    (1): BatchNorm1d(64, eps=1

In [11]:
# case 2018
# case = "2"
# mean = np.loadtxt('../ltae/mean_std/source_mean.txt')
# std = np.loadtxt('../ltae/mean_std/source_std.txt')

In [10]:
# 2019
case = "3"
mean = np.loadtxt('../ltae/mean_std/target_mean.txt')
std = np.loadtxt('../ltae/mean_std/target_std.txt')

In [11]:
# case 2019
case = "2"
# mean = np.loadtxt('../ltae/mean_std/source_mean.txt')
# std = np.loadtxt('../ltae/mean_std/source_std.txt')
npz_ = "../../../data/theiaL2A_zip_img/output/2019/2019_SITS_data.npz" 


In [20]:
# case 2018
case = "3"
# mean = np.loadtxt('../ltae/mean_std/target_mean.txt')
# std = np.loadtxt('../ltae/mean_std/target_std.txt')
npz_ = "../../../data/theiaL2A_zip_img/output/2018/2018_SITS_data.npz"

In [12]:
transform = transforms.Compose([standardize(mean, std)])

dataset_test_alt = SITSData(npz_, config['seed'], config['dates'], partition='test', transform = transform)

In [13]:
test_loader = data.DataLoader(dataset_test_alt, batch_size=config['batch_size'],
                                       num_workers=config['num_workers'], 
                                        shuffle=True,
                                         pin_memory=True)

In [14]:
# for (x, y) in test_loader:
#     print(x.shape)

In [15]:
print(len(dataset_test_alt))
print(len(test_loader))
print(len(dataset_test_alt)/2048)
print(len(test_loader)*2048)

1518343
742
741.37841796875
1519616


In [16]:
criterion = FocalLoss(config['gamma'])

In [20]:
test_metrics, conf_mat, fs= evaluation(model, criterion, test_loader, device=device, mode='test', config=config)

evaluation iteration completed in 0.1270 seconds
evaluation iteration completed in 0.0845 seconds
evaluation iteration completed in 0.1099 seconds
evaluation iteration completed in 0.1479 seconds
evaluation iteration completed in 0.0510 seconds
evaluation iteration completed in 0.0431 seconds
evaluation iteration completed in 0.0346 seconds
evaluation iteration completed in 0.0326 seconds
evaluation iteration completed in 0.0392 seconds
evaluation iteration completed in 0.0368 seconds
evaluation iteration completed in 0.0361 seconds
evaluation iteration completed in 0.0355 seconds
evaluation iteration completed in 0.0416 seconds
evaluation iteration completed in 0.0408 seconds
evaluation iteration completed in 0.0643 seconds
evaluation iteration completed in 0.0298 seconds
evaluation iteration completed in 0.0491 seconds
evaluation iteration completed in 0.0504 seconds
evaluation iteration completed in 0.0423 seconds
evaluation iteration completed in 0.0738 seconds
evaluation iteration

In [21]:
print('Loss {:.4f},  Acc {:.2f},  IoU {:.4f}'.format(test_metrics['test_loss'], test_metrics['test_accuracy'], test_metrics['test_IoU']))

Loss 0.1887,  Acc 93.71,  IoU 0.6380


In [22]:
fs

0.9323446892828722

In [45]:
config['res_dir']

'../../../results/ltae/results/2019'

In [23]:
def save_results(metrics, conf_mat, config):
    with open(os.path.join(config['res_dir'], 'Seed_{}'.format(config['seed']), 'seed_{}_case_{}_batchsize_{}_epochs_{}_factor_{}_test_metrics.json'.format(config['seed'], case, config['batch_size'], config['epochs'], config['factor'])), 'w') as outfile:
        json.dump(metrics, outfile, indent=4)
    # pkl.dump(conf_mat, open(os.path.join(config['res_dir'], 'Seed_{}'.format(config['seed']), 'seed_{}_batchsize_{}_epochs_{}_factor_{}_conf_mat.pkl'.format(config['seed'], config['batch_size'], config['epochs'], config['factor'])), 'wb'))

In [24]:
save_results(test_metrics, conf_mat, config)

In [23]:
# m = '../../../results/ltae/trials/Seed_0/model.pth.tar'

# model.load_state_dict(torch.load(m))

In [5]:
def date_positions(gfdate_path):
    with open(gfdate_path, "r") as f:
        out_date_list = f.readlines()
    out_date_list = [x.strip() for x in out_date_list]
    out_date_list = [datetime.datetime.strptime(x, "%Y%m%d").timetuple().tm_yday for x in out_date_list]
    string_date_list = [x for x in out_date_list]
    return string_date_list

In [6]:
# st = time.time()
# t = date_positions("dates.txt")
# s = date_positions("dates.txt")
# print(t)
# print(s)
# print("time: ", time.time() - st)

In [7]:
n_channel = 10

In [8]:
def generate_ids():
    """
    Descr: 
        Aim: To write and returns the partition (Train, Validation and Test) ids 
        with respect to the grid split index (range 1 - 100)
        
        - A random seed value is set within a random intger 1-10,
        - the set is spltted into 80:10:10,
        - save into a text file with the seed value used
    """
    # set a random seed value within the range 1 -10 
    start_time = time.time()
    seed_value = 9
    # seed_value = np.random.randint(0,10)
    np.random.seed(seed_value)
    # # block id range 1 - 100 (splitted grid)
    block_range = np.arange(1, 101)

    # Train, Validation and Test
    random.shuffle(block_range)
    train_id = block_range[:80] # 80%
    val_id = block_range[80:90] # 10%
    test_id = block_range[90:] # 10%
    print("Seed value: ", seed_value)
    
    if not os.path.exists("./ids/train_val_eval_seed_" + str(seed_value)+".txt"):
        with open("./ids/train_val_eval_seed_" + str(seed_value)+".txt", "w") as f:
            f.write("Training: " + str(list(train_id)) + "\n")
            f.write("Validation: " + str(list(val_id)) + "\n")
            f.write("Testing: " + str(list(test_id)) + "\n")
            f.close()
    print('Read set ids completed: %s second' % (time.time() - start_time))
# generate_ids()

Seed value:  9
Read set ids completed: 0.009698629379272461 second


In [9]:
def read_ids(seed_value):
    """
    Read ids from file
    """
    assert seed_value >= 0 and seed_value <= 10
    
    with open("./ids/train_val_eval_seed_" + str(seed_value)+".txt", "r") as f:
        lines = f.readlines()
        Train_ids = eval(lines[0].split(":")[1])
        Val_ids = eval(lines[1].split(":")[1])
        test_ids = eval(lines[2].split(":")[1])
    return Train_ids, Val_ids, test_ids

In [10]:
Train_ids, Val_ids, test_ids = read_ids(0)

In [9]:
j = Val_ids + test_ids

In [10]:
len(test_ids)

10

In [11]:
test_ids

[80, 39, 18, 65, 81, 8, 94, 76, 93, 32]

In [21]:
j

[79, 82, 34, 30, 46, 20, 9, 7, 23, 45, 80, 39, 18, 65, 81, 8, 94, 76, 93, 32]

In [2]:
def compute_mean_std(source_sits, target_sits, case):
    """
    Descr: Compute mean and std for each channel
    Input: both SITS dataset(.npz) paths
            Case[1 - 3]:
            1 - concatenate both dataset, while 2 & 3 rep source and target respectively
    The data(from N,LxD) is reshaped into (N,D,L);
        where N - pixel, D - Bands (10), L - Time (33)
    
    """
    
    # case = 1: both, case = 2: target, case = 3: target
    if case == 1:
        sits = [source_sits, target_sits]
    elif case == 2:
        sits = source_sits
    elif case == 3:
        sits = target_sits
    else:
        print('Select case between 1-3')
        return None
    
    # if sits is a list, then it's a list of paths
    if isinstance(sits, list):
        # load data
        X_source = np.load(sits[0])['X']
        X_target = np.load(sits[1])['X']
        # concatenate the data
        X = np.concatenate((X_source, X_target), axis=0)
    # if sits is a string, then it's a path
    else: 
        with np.load(sits) as data:
            X = data['X']

    X = X.reshape(X.shape[0], n_channel, int(X.shape[1]/n_channel))
    # compute mean and std
    X_mean = np.mean(X, axis=(0,2))
    X_std = np.std(X, axis=(0,2))
    print('mean shape: ', X_mean.shape)
    print('std shape: ', X_std.shape)
    # save X_mean and X_std sepearately for sits as txt file
    np.savetxt(os.path.join('mean_'+ str(case) +'.txt'), X_mean)
    np.savetxt(os.path.join('std_'+ str(case) +'.txt'), X_std)

# for i in [1,2,3]:
#     start_time = time.time()
#     source_path = "../../../data/theiaL2A_zip_img/output/2018/2018_SITS_data.npz"
#     target_path = "../../../data/theiaL2A_zip_img/output/2019/2019_SITS_data.npz"
#     compute_mean_std(source_path, target_path, i)
#     print("run time: ", time.time() - start_time)

In [3]:
def compute_mean_stdv2(sits, domain='source'):
    """
    Descr: Compute mean and std for each channel
    Input: both SITS dataset(.npz) paths
    The data(from N,LxD) is reshaped into (N,D,L);
        where N - pixel, D - Bands (10), L - Time (33)
    """
    with np.load(sits) as data:
            X = data['X']

    X = X.reshape(X.shape[0], n_channel, int(X.shape[1]/n_channel))
    # compute mean and std
    X_mean = np.mean(X, axis=(0,2))
    X_std = np.std(X, axis=(0,2))
    print('mean shape: ', X_mean.shape)
    print('std shape: ', X_std.shape)
    # save X_mean and X_std sepearately for sits as txt file
    np.savetxt(os.path.join('./mean_std/', domain + '_mean.txt'), X_mean)
    np.savetxt(os.path.join('./mean_std/', domain + '_std.txt'), X_std)

source_path = '../../../data/theiaL2A_zip_img/output/2018/2018_SITS_subset_data.npz'
target_path = '../../../data/theiaL2A_zip_img/output/2019/2019_SITS_subset_data.npz'
# compute_mean_stdv2(source_path, 'source')
# compute_mean_stdv2(target_path, domain = 'target')

In [13]:
class SITSDatav2(data.Dataset):
    def __init__(self, sits, seed, partition='train', transform=None):
        
        self.sits = sits
        self.seed = seed
        self.transform = transform
        
        # get partition ids using the read_id() func
        start_time = time.time()
        self.train_ids, self.val_ids, self.test_ids = read_ids(self.seed)
        print("read ids completed: %s second" % (time.time() - start_time))

        # select partition
        if partition == 'train':
            self.ids = self.train_ids
        elif partition == 'val':
            self.ids = self.val_ids
        elif partition == 'test':
            self.ids = self.test_ids
        else:
            raise ValueError('Invalid partition: {}'.format(partition))
        
        start_time = time.time()
        print('reading files....')
        X, y, block_ids = load_npz(self.sits)
        print("load npz: %s seconds" % (time.time() - start_time))
        
        start_time = time.time()
        y = np.unique(y, return_inverse=True)[1]
        print("reassigning %s seconds" % (time.time() - start_time))
        
        # concatenate the data
        start_time = time.time()
        data_ = np.concatenate((X, y[:, None], block_ids[:, None]), axis=1)
        print("Concatenating completed: %s seconds" % (time.time() - start_time))

        # filter by block_id
        start_time = time.time()
        data_ = data_[np.isin(data_[:, -1], self.ids)]
        print("filtering ids completed: %s seconds" % (time.time() - start_time))
        
        self.X_ = data_[:, :-2]
        self.y_ = data_[:, -2]                          
        
        del X
        del y
        del block_ids
        del data_

    def __len__(self):
        return len(self.y_)

    def __getitem__(self, idx):
        start_time = time.time()
        self.X = self.X_[idx]
        self.y = self.y_[idx]
        print("getting data: %s seconds" % ((time.time() - start_time)*100))

        start_time = time.time()
        self.X = np.array(self.X).astype('float32')
        self.y = np.array(self.y).astype('float32')
        print("conversion: %s seconds" % ((time.time() - start_time)*100))
        
        start_time = time.time()
        self.X = self.X.reshape(int(self.X.shape[0]/n_channel), n_channel)
        print("reshape data: %s seconds" % ((time.time() - start_time)*100))

        # transform
        start_time = time.time()
        if self.transform:
            self.X = self.transform(self.X)
        print("transform data: %s seconds" % ((time.time() - start_time)*100))
        print(self.X.shape)
        
        start_time = time.time()
        torch_x = torch.from_numpy(self.X)
        torch_y = torch.from_numpy(self.y)
        print("tensor: %s seconds" % ((time.time() - start_time)*100))
        
        return torch_x, torch_y

In [8]:
class standardize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, sample):
        return (sample - self.mean) / self.std

In [9]:
mean = np.loadtxt('./mean_std/source_mean.txt')
std = np.loadtxt('./mean_std/source_std.txt')
seed = 0 
transform = transforms.Compose([standardize(mean, std)])

# paths
source_path = '../../../data/theiaL2A_zip_img/output/2018/2018_SITS_subset_data.npz'
# target_path = '../../../data/theiaL2A_zip_img/output/2019/2019_SITS_subset_data.npz'

train_dataset = SITSDatav2(source_path, seed, partition='train', transform=transform)
# val_dataset = SITSDatav2(source_path, seed, partition='val', transform=transform)
# test_dataset = SITSDatav2(source_path, seed, partition='test', transform=transform)

read ids completed: 0.001961231231689453 second
reading files....
load npz: 7.151129961013794 seconds
reassigning 0.0646812915802002 seconds
Concatenating completed: 1.7340800762176514 seconds
filtering ids completed: 1.6481618881225586 seconds


In [31]:
source_path = '../../../data/theiaL2A_zip_img/output/2018/2018_SITS_subset_data.npz'
X, y, _ = load_npz(source_path)

In [32]:
# dict_ = {0:1, 
#         1:2, 
#         2:3, 
#         3:4, 
#         4:5, 
#         5:6, 
#         6:7,
#         7:8,
#         8:9,
#         9:10,
#         10:12,
#         11:13,
#         12:14,
#         13:15,
#         14:16,
#         15:17,
#         16:18,
#         17:19,
#         18:23}

In [24]:
y_uniq = np.unique(y)

In [28]:
y_revs_uniq = np.unique(np.unique(y, return_inverse=True)[1])

In [36]:
dict__ = dict(zip(y_uniq, y_revs_uniq))
dict__

{1: 0,
 2: 1,
 3: 2,
 4: 3,
 5: 4,
 6: 5,
 7: 6,
 8: 7,
 9: 8,
 10: 9,
 12: 10,
 13: 11,
 14: 12,
 15: 13,
 16: 14,
 17: 15,
 18: 16,
 19: 17,
 23: 18}

In [38]:
yy = [dict__[i] for i in y]

In [39]:
np.unique(yy)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18])

In [19]:
st = time.time()
y = np.unique(y, return_inverse=True)[1]

y_train_mapped = np.zeros(y.shape)
for i in range(y.shape[0]):
        y_train_mapped[i] = dict_[y[i]]
print("stop time", time.time() - st)

stop time 2.462660789489746


In [40]:
str_model =['RF', 'LTAE']
str_model[1]

'LTAE'

In [21]:
np.unique(y_train_mapped)

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 12., 13., 14.,
       15., 16., 17., 18., 19., 23.])

In [25]:
for x, y, z in l_se:
    print('Train {}, Val {}, Test {}'.format(len(x), len(y), len(z)))
    # print('Starting Fold {}'.format(i + 1))

Train 10784283, Val 1606360, Test 1701000


In [11]:
batch_size = 2
train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True)

In [12]:
def recursive_todevice(x, device):
    if isinstance(x, torch.Tensor):
        return x.to(device)
    else:
        return [recursive_todevice(c, device) for c in x]