### First experiments with U-Net on CESM slp data

So far, we look for the patch (or grid point), that leads to strongest loss reduction, when added. This patch (or grid points) is then fixed, before we successively add more patches (or grid points). But the question remains: Is that optimal? To answer this question, we start with large patch sizes (48 and 24) and compute loss reduction for *ALL* possible combination of patches.

In [11]:
import os
import sys
sys.path.append('../reconstruct_missing_data')

from pathlib import Path
from json import dump, load

import math
import numpy as np
import pandas as pd
from itertools import permutations
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from matplotlib import pyplot as plt

from data_loading import find_data_files, load_data_set, get_anomalies, clone_data, create_missing_mask, split_and_scale_data
from models import build_unet_4conv
from relevance import compute_single_relevance_map

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Lambda, concatenate, Conv1D, Conv2D, MaxPool2D, UpSampling2D, BatchNormalization, LeakyReLU
from tensorflow.keras.optimizers import Adam, SGD
import tensorflow.keras.initializers as tfi
import tensorflow.keras.regularizers as tfr
from tensorflow.keras.utils import plot_model

# Suppress Tensorflow warnings
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [2]:
# Set working directory, according to working directory in scripts:
os.chdir('/gxfs_work1/geomar/smomw511')

In [3]:
## Reload parameters and pre-trained model:

# Specify experiment:
model_config = 'unet_4conv'
feature = 'sea-level-pressure' # Choose either 'sea-level-pressure' or 'sea-surface-temperature' as feature.
feature_short = 'slp' # Free to set short name, to store results, e.g. 'slp' and 'sst'.
source = 'CESM' # Choose Earth System Model, either 'FOCI' or 'CESM'.
mask_type = 'variable'
missing_type = 'range'
range_string = '_50_999'
augmentation_factor = 3
run = '_final'

# Get path to parameters:
path_to_parameters = Path('GitGeomar/marco-landt-hayen/reconstruct_missing_data_results/'+model_config+'_'+feature_short+'_'+source+'_'
                      +mask_type+'_'+missing_type+range_string+'_factor_'+str(augmentation_factor)+run)

print(path_to_parameters)

# Reload parameters relevant for data pre-processing for this experiment:
with open(path_to_parameters / 'parameters.json', 'r') as f:
    parameters=load(f)

seed = parameters['seed']
train_val_split = parameters['train_val_split']
scale_to = parameters['scale_to']
missing_values = parameters['missing_values']

# Get path to pre-trained model:
path_to_model = Path('GitGeomar/marco-landt-hayen/reconstruct_missing_data_results/'+model_config+'_'+feature_short+'_'+source+'_'
                      +mask_type+'_'+missing_type+range_string+'_factor_'+str(augmentation_factor)+run+'/missing'+range_string+'/model')

print(path_to_model)

# Reload final model, trained on range:
model = tf.keras.models.load_model(path_to_model)


GitGeomar/marco-landt-hayen/reconstruct_missing_data_results/unet_4conv_slp_CESM_variable_range_50_999_factor_3_final
GitGeomar/marco-landt-hayen/reconstruct_missing_data_results/unet_4conv_slp_CESM_variable_range_50_999_factor_3_final/missing_50_999/model


2023-02-15 09:32:51.407716: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /.singularity.d/libs
2023-02-15 09:32:51.407738: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2023-02-15 09:32:51.407750: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (neshcl124): /proc/driver/nvidia/version does not exist
2023-02-15 09:32:51.407915: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
## Prepare input samples:

# Path to full data:
path_to_data = 'climate_index_collection/data/raw/2022-08-22/'

# Load data:
data = load_data_set(data_path=path_to_data, data_source_name=source)

# Select single feature and compute anomalies, using whole time span as climatology:
data = get_anomalies(feature=feature, data_set=data)

# Create synthetic missing_mask of ONEs, to load FULL validation samples:
missing_mask_1 = (np.ones(data.shape)==1)

# Get scaled validation inputs and targets. Note: Using missing_mask of ONEs, validation inputs and targets are 
# identical. Only difference is found in dimensionality: inputs have channel number (=1) as final dimension, targets don't.
train_input, val_input, train_target, val_target, train_min, train_max, _, _ = split_and_scale_data(
    data, 
    missing_mask_1,
    train_val_split, 
    scale_to
)

In [48]:
## Set further parameters:

# Set sample number to start from:
start_sample = 5

# Define number of validation samples to consider:
n_samples = 1

# Define list of patch sizes:
patch_sizes = [48,]

## Optionally define stopping criteria:

# Specify maximum number of patches to include (or set -1, to include ALL patches):
max_patch_num = 4

# Specify threshold for maximum accumulated rel. loss reduction (or set 1.0, for NO threshold):
max_acc_rel_loss_reduction = 1.0             

In [49]:
## Loop over list of patch sizes:
for p in range(len(patch_sizes)):
    
    # Get status:
    print("patch size: ",p+1," of ",len(patch_sizes))
    
    # Get current patch size:
    patch_size = patch_sizes[p]
    
    # Get parameters for patches, enumerated line-by-line, from left to right, from top to bottom, starting with ZERO.
    n_lat = int(val_input[0:1].shape[1] / patch_size)
    n_lon = int(val_input[0:1].shape[2] / patch_size)

    # Obtain total number of patches:
    n_patches = int(n_lat * n_lon)
    
    # Check for maximum number of desired patches: If given as -1, set to total number of patches.
    if max_patch_num == -1:
        max_patch_num = n_patches
    
    ## Loop over samples:
    for s in range(n_samples):
    
        # Initialize storage for found orders:
        found_orders = []
        
        # Initialize storage for optimal patch orders:
        patch_orders = []
        
        # Initialize storage for checking identity of found and optimal orders:
        identical_checks = [] 
        
        # Get status:
        print("  sample: ",s+1," of ",n_samples)
    
        # Get current input sample:
        input_sample = train_input[start_sample+s:start_sample+s+1]
        
        # Compute relevance map, patch order and (acc.) rel. and abs. loss reduction for current sample with given patch size:
        (
            rel_loss_reduction_map, 
            patch_order, 
            abs_loss_reduction, 
            rel_loss_reduction, 
            acc_rel_loss_reduction
        ) = compute_single_relevance_map(input_sample=input_sample,
                                         patch_size=patch_size, 
                                         model=model,
                                         max_patch_num=max_patch_num,
                                         max_acc_rel_loss_reduction=max_acc_rel_loss_reduction,
                                        )
        
        ## Loop over possible number of patches to include. 
        ## Max. include max_patch_num patches. And start from 2, since single patch is not relevant. Look for permutations!
        for n in np.arange(2,max_patch_num+1):
            
            # Get status:
            print("    num. of included patches: ",n," of ",max_patch_num)
            
            # Get permutations of specified number of patches:
            permutation_list = list(permutations(range(n_patches),int(n)))
            
            # Create list of patch indices:
            patch_indices = list(np.arange(n_patches))

            # Create empty sample of just ZEROs:
            empty_sample = np.zeros((1, input_sample.shape[1], input_sample.shape[2]))

            ## Create patches:

            # Initialize storage for patches as boolean array. Dimension (# of permutations, latitude, longitude)
            patches = (np.zeros((len(permutation_list), input_sample.shape[1], input_sample.shape[2])) != 0)
            
            # Run over list of permutations:
            for l in range(len(permutation_list)):

                # Get current permutation:
                perm = permutation_list[l]

                # Loop over patch indices in current permutations:
                for patch_index in perm:

                    # Get x and y coordinate from current patch index:
                    y = patch_index // n_lon
                    x = patch_index % n_lon    

                    # Store mask for current patch:
                    patches[l,int(y*patch_size):int((y+1)*patch_size),int(x*patch_size):int((x+1)*patch_size)] = True 
                    
            # Expand dimensions of patches: Have last dimension for channel (=1), to match requirements for CNN inputs.
            patches_extended = np.expand_dims(patches, axis=-1)

            # Create input samples from first validation sample:
            patchy_input = patches_extended * input_sample

            # Get mean state on empty sample as input:
            mean_state = model.predict(empty_sample)
            
            ## Get patch from optimal patch order up to n included patches:
            
            # Initialize storage for optimal patch:
            optimal_patch = (np.zeros((1,input_sample.shape[1], input_sample.shape[2])) != 0)
            
            # Extract sub-order:
            optimal_order = patch_order[:n]
                        
            # Loop over patch indices in optimal order:
            for patch_index in optimal_order:

                # Get x and y coordinate from current patch index:
                y = patch_index // n_lon
                x = patch_index % n_lon    

                # Store mask for optimal patch:
                optimal_patch[0,int(y*patch_size):int((y+1)*patch_size),int(x*patch_size):int((x+1)*patch_size)] = True
            
            # Get prediction from complete sample as input:
            optimal_pred = model.predict(np.expand_dims(optimal_patch, axis=-1)*input_sample)

            # Get model predictions on patchy inputs:
            patchy_pred = model.predict(patchy_input)
            
            # Compute mean state loss from prediction on empty sample compared to target (= complete input sample):
            mean_state_loss = np.mean((mean_state[:,:,:,0] - input_sample[0,:,:,0])**2)

            # Compute min loss from prediction on complete sample compared to target (= complete input sample):
            min_loss = np.mean((optimal_pred[:,:,:,0] - input_sample[0,:,:,0])**2)

            # Compute loss of patchy predictions compared to targets (= complete input sample):
            patchy_loss = np.mean((patchy_pred[:,:,:,0] - input_sample[0,:,:,0])**2,axis=(1,2))
            
            # Get index for patch leading to lowest loss, when adding:
            min_index = np.argsort(patchy_loss)[0]
            
            # Store found and optimal patch order and check, if both include the same initial patches:
            found_orders.append(permutation_list[min_index])
            patch_orders.append(patch_order[:n].astype(int))
            identical_checks.append(all(np.sort(permutation_list[min_index])==np.sort(patch_order[:n].astype(int))))
            
            # Print results:
            print('    found_order: ',permutation_list[min_index])
            print('    patch_order: ',patch_order[:n].astype(int))
            print('    identical: ',all(np.sort(permutation_list[min_index])==np.sort(patch_order[:n].astype(int))))
            print('    min_loss: ', min_loss)
            print('    found loss: ', patchy_loss[min_index])
            
#         # Define filenames to store informormation:
#         found_orders_filename = 'found_orders_sample_'+str(start_sample+s)+'_patchsize_'+str(patch_size)+'_patches_'+str(max_patch_num)+'.npy'
#         patch_orders_filename = 'patch_orders_sample_'+str(start_sample+s)+'_patchsize_'+str(patch_size)+'_patches_'+str(max_patch_num)+'.npy'
#         identical_checks_filename = 'identity_checks_sample_'+str(start_sample+s)+'_patchsize_'+str(patch_size)+'_patches_'+str(max_patch_num)+'.npy'

#         # Save files:
#         np.save(path_to_parameters / found_orders_filename, found_orders)
#         np.save(path_to_parameters / patch_orders_filename, patch_orders)
#         np.save(path_to_parameters / identical_checks_filename, identical_checks)

patch size:  1  of  1
  sample:  1  of  1
    num. of included patches:  2  of  4
    found_order:  (2, 0)
    patch_order:  [0 2]
    identical:  True
    min_loss:  0.0007736901
    found loss:  0.0007736901
    num. of included patches:  3  of  4
    found_order:  (0, 1, 2)
    patch_order:  [0 2 1]
    identical:  True
    min_loss:  0.0005818426
    found loss:  0.0005818426
    num. of included patches:  4  of  4
    found_order:  (3, 0, 4, 2)
    patch_order:  [0 2 1 4]
    identical:  False
    min_loss:  0.0004328457
    found loss:  0.00042778993


In [30]:
## Reload results:

# Set experiment name:
exp_name = 'order_5'

# Reload parameters relevant for this experiment:
with open(path_to_parameters / exp_name / 'parameters.json', 'r') as f:
    parameters=load(f)
    
# Extract relevant parameters for reloading results:
start_sample = parameters['start_sample']
n_samples = parameters['n_samples']
patch_sizes = parameters['patch_sizes']
n_patches = parameters['n_patches']
max_patch_num = parameters['max_patch_num']

print("run:",exp_name)
print("n_samples:", n_samples)
print("patch_sizes:", patch_sizes)
print("max_patch_num:", max_patch_num)
print('\n')


# Check for maximum number of desired patches: If given as -1, set to total number of patches.
if max_patch_num == -1:
    max_patch_num = n_patches

## Loop over list of patch sizes:
for p in range(len(patch_sizes)):
    
    # Get current patch size:
    patch_size = patch_sizes[p]

    # Initialize storage for identical checks. Dimension: (max_patch_num - 1).
    identical_checks_all = np.zeros(max_patch_num-1)
    
    ## Loop over possible number of patches to include. 
    ## Max. include max_patch_num patches. And start from 2, since single patch is not relevant. Look for permutations!
    for n in np.arange(2,max_patch_num+1):
            
        ## Loop over samples:
        for s in range(n_samples):

            # Define filenames to store informormation:
            found_orders_filename = 'found_orders_sample_'+str(start_sample+s)+'_patchsize_'+str(patch_size)+'_patches_'+str(max_patch_num)+'.npy'
            patch_orders_filename = 'patch_orders_sample_'+str(start_sample+s)+'_patchsize_'+str(patch_size)+'_patches_'+str(max_patch_num)+'.npy'
#            identical_checks_filename = 'identity_checks_sample_'+str(start_sample+s)+'_patchsize_'+str(patch_size)+'_patches_'+str(max_patch_num)+'.npy'

            # Reload files:
            found_orders = np.load(path_to_parameters / exp_name / found_orders_filename,allow_pickle=True)
            patch_orders = np.load(path_to_parameters / exp_name / patch_orders_filename,allow_pickle=True)
#            identical_checks = np.load(path_to_parameters / exp_name / identical_checks_filename)

            # Store identical check for current sample and number of included patches:
            if all(np.sort(found_orders[n-2])==np.sort(patch_orders[n-2])):
                identical_checks_all[n-2]+=1
            
#             print(
#                 'sample: ',s,', patches: ',n,', identical: ',all(np.sort(found_orders[n-2])==np.sort(patch_orders[n-2])),
#                 '\nfound_order: ',found_orders[n-2],
#                 '\npatch_order: ',patch_orders[n-2]
#             )
#         print('\n')

        # Output results:
        print("Checked",n_samples,"samples with patch size",patch_size,", find first",n,"patches to be identical for",
             int(identical_checks_all[n-2]),"samples, that's",np.round(identical_checks_all[n-2]/n_samples*100,1),"%")
    print('\n')

run: order_5
n_samples: 100
patch_sizes: [48]
max_patch_num: 5
Checked 100 samples with patch size 48 , find first 2 patches to be identical for 95 samples, that's 95.0 %
Checked 100 samples with patch size 48 , find first 3 patches to be identical for 85 samples, that's 85.0 %
Checked 100 samples with patch size 48 , find first 4 patches to be identical for 86 samples, that's 86.0 %
Checked 100 samples with patch size 48 , find first 5 patches to be identical for 86 samples, that's 86.0 %




In [31]:
## Reload results:

# Set experiment name:
exp_name = 'order_6'

# Reload parameters relevant for this experiment:
with open(path_to_parameters / exp_name / 'parameters.json', 'r') as f:
    parameters=load(f)
    
# Extract relevant parameters for reloading results:
start_sample = parameters['start_sample']
n_samples = parameters['n_samples']
patch_sizes = parameters['patch_sizes']
n_patches = parameters['n_patches']
max_patch_num = parameters['max_patch_num']

print("run:",exp_name)
print("n_samples:", n_samples)
print("patch_sizes:", patch_sizes)
print("max_patch_num:", max_patch_num)
print('\n')


# Check for maximum number of desired patches: If given as -1, set to total number of patches.
if max_patch_num == -1:
    max_patch_num = n_patches

## Loop over list of patch sizes:
for p in range(len(patch_sizes)):
    
    # Get current patch size:
    patch_size = patch_sizes[p]

    # Initialize storage for identical checks. Dimension: (max_patch_num - 1).
    identical_checks_all = np.zeros(max_patch_num-1)
    
    ## Loop over possible number of patches to include. 
    ## Max. include max_patch_num patches. And start from 2, since single patch is not relevant. Look for permutations!
    for n in np.arange(2,max_patch_num+1):
            
        ## Loop over samples:
        for s in range(n_samples):

            # Define filenames to store informormation:
            found_orders_filename = 'found_orders_sample_'+str(start_sample+s)+'_patchsize_'+str(patch_size)+'_patches_'+str(max_patch_num)+'.npy'
            patch_orders_filename = 'patch_orders_sample_'+str(start_sample+s)+'_patchsize_'+str(patch_size)+'_patches_'+str(max_patch_num)+'.npy'
#            identical_checks_filename = 'identity_checks_sample_'+str(start_sample+s)+'_patchsize_'+str(patch_size)+'_patches_'+str(max_patch_num)+'.npy'

            # Reload files:
            found_orders = np.load(path_to_parameters / exp_name / found_orders_filename,allow_pickle=True)
            patch_orders = np.load(path_to_parameters / exp_name / patch_orders_filename,allow_pickle=True)
#            identical_checks = np.load(path_to_parameters / exp_name / identical_checks_filename)

            # Store identical check for current sample and number of included patches:
            if all(np.sort(found_orders[n-2])==np.sort(patch_orders[n-2])):
                identical_checks_all[n-2]+=1
            
#             print(
#                 'sample: ',s,', patches: ',n,', identical: ',all(np.sort(found_orders[n-2])==np.sort(patch_orders[n-2])),
#                 '\nfound_order: ',found_orders[n-2],
#                 '\npatch_order: ',patch_orders[n-2]
#             )
#         print('\n')

        # Output results:
        print("Checked",n_samples,"samples with patch size",patch_size,", find first",n,"patches to be identical for",
             int(identical_checks_all[n-2]),"samples, that's",np.round(identical_checks_all[n-2]/n_samples*100,1),"%")
    print('\n')

run: order_6
n_samples: 100
patch_sizes: [24]
max_patch_num: 3


Checked 100 samples with patch size 24 , find first 2 patches to be identical for 84 samples, that's 84.0 %
Checked 100 samples with patch size 24 , find first 3 patches to be identical for 75 samples, that's 75.0 %




In [32]:
## Reload results:

# Set experiment name:
exp_name = 'order_8'

# Reload parameters relevant for this experiment:
with open(path_to_parameters / exp_name / 'parameters.json', 'r') as f:
    parameters=load(f)
    
# Extract relevant parameters for reloading results:
start_sample = parameters['start_sample']
n_samples = parameters['n_samples']
patch_sizes = parameters['patch_sizes']
n_patches = parameters['n_patches']
max_patch_num = parameters['max_patch_num']

print("run:",exp_name)
print("n_samples:", n_samples)
print("patch_sizes:", patch_sizes)
print("max_patch_num:", max_patch_num)
print('\n')


# Check for maximum number of desired patches: If given as -1, set to total number of patches.
if max_patch_num == -1:
    max_patch_num = n_patches

## Loop over list of patch sizes:
for p in range(len(patch_sizes)):
    
    # Get current patch size:
    patch_size = patch_sizes[p]

    # Initialize storage for identical checks. Dimension: (max_patch_num - 1).
    identical_checks_all = np.zeros(max_patch_num-1)
    
    ## Loop over possible number of patches to include. 
    ## Max. include max_patch_num patches. And start from 2, since single patch is not relevant. Look for permutations!
    for n in np.arange(2,max_patch_num+1):
            
        ## Loop over samples:
        for s in range(n_samples):

            # Define filenames to store informormation:
            found_orders_filename = 'found_orders_sample_'+str(start_sample+s)+'_patchsize_'+str(patch_size)+'_patches_'+str(max_patch_num)+'.npy'
            patch_orders_filename = 'patch_orders_sample_'+str(start_sample+s)+'_patchsize_'+str(patch_size)+'_patches_'+str(max_patch_num)+'.npy'
#            identical_checks_filename = 'identity_checks_sample_'+str(start_sample+s)+'_patchsize_'+str(patch_size)+'_patches_'+str(max_patch_num)+'.npy'

            # Reload files:
            found_orders = np.load(path_to_parameters / exp_name / found_orders_filename,allow_pickle=True)
            patch_orders = np.load(path_to_parameters / exp_name / patch_orders_filename,allow_pickle=True)
#            identical_checks = np.load(path_to_parameters / exp_name / identical_checks_filename)

            # Store identical check for current sample and number of included patches:
            if all(np.sort(found_orders[n-2])==np.sort(patch_orders[n-2])):
                identical_checks_all[n-2]+=1
            
#             print(
#                 'sample: ',s,', patches: ',n,', identical: ',all(np.sort(found_orders[n-2])==np.sort(patch_orders[n-2])),
#                 '\nfound_order: ',found_orders[n-2],
#                 '\npatch_order: ',patch_orders[n-2]
#             )
#         print('\n')

        # Output results:
        print("Checked",n_samples,"samples with patch size",patch_size,", find first",n,"patches to be identical for",
             int(identical_checks_all[n-2]),"samples, that's",np.round(identical_checks_all[n-2]/n_samples*100,1),"%")
    print('\n')

run: order_8
n_samples: 100
patch_sizes: [12]
max_patch_num: 2


Checked 100 samples with patch size 12 , find first 2 patches to be identical for 75 samples, that's 75.0 %




In [33]:
## Reload results:

# Set experiment name:
exp_name = 'order_7'

# Reload parameters relevant for this experiment:
with open(path_to_parameters / exp_name / 'parameters.json', 'r') as f:
    parameters=load(f)
    
# Extract relevant parameters for reloading results:
start_sample = parameters['start_sample']
n_samples = parameters['n_samples']
patch_sizes = parameters['patch_sizes']
n_patches = parameters['n_patches']
max_patch_num = parameters['max_patch_num']

print("run:",exp_name)
print("n_samples:", n_samples)
print("patch_sizes:", patch_sizes)
print("max_patch_num:", max_patch_num)
print('\n')


# Check for maximum number of desired patches: If given as -1, set to total number of patches.
if max_patch_num == -1:
    max_patch_num = n_patches

## Loop over list of patch sizes:
for p in range(len(patch_sizes)):
    
    # Get current patch size:
    patch_size = patch_sizes[p]

    # Initialize storage for identical checks. Dimension: (max_patch_num - 1).
    identical_checks_all = np.zeros(max_patch_num-1)
    
    ## Loop over possible number of patches to include. 
    ## Max. include max_patch_num patches. And start from 2, since single patch is not relevant. Look for permutations!
    for n in np.arange(2,max_patch_num+1):
            
        ## Loop over samples:
        for s in range(n_samples):

            # Define filenames to store informormation:
            found_orders_filename = 'found_orders_sample_'+str(start_sample+s)+'_patchsize_'+str(patch_size)+'_patches_'+str(max_patch_num)+'.npy'
            patch_orders_filename = 'patch_orders_sample_'+str(start_sample+s)+'_patchsize_'+str(patch_size)+'_patches_'+str(max_patch_num)+'.npy'
#            identical_checks_filename = 'identity_checks_sample_'+str(start_sample+s)+'_patchsize_'+str(patch_size)+'_patches_'+str(max_patch_num)+'.npy'

            # Reload files:
            found_orders = np.load(path_to_parameters / exp_name / found_orders_filename,allow_pickle=True)
            patch_orders = np.load(path_to_parameters / exp_name / patch_orders_filename,allow_pickle=True)
#            identical_checks = np.load(path_to_parameters / exp_name / identical_checks_filename)

            # Store identical check for current sample and number of included patches:
            if all(np.sort(found_orders[n-2])==np.sort(patch_orders[n-2])):
                identical_checks_all[n-2]+=1
            
#             print(
#                 'sample: ',s,', patches: ',n,', identical: ',all(np.sort(found_orders[n-2])==np.sort(patch_orders[n-2])),
#                 '\nfound_order: ',found_orders[n-2],
#                 '\npatch_order: ',patch_orders[n-2]
#             )
#         print('\n')

        # Output results:
        print("Checked",n_samples,"samples with patch size",patch_size,", find first",n,"patches to be identical for",
             int(identical_checks_all[n-2]),"samples, that's",np.round(identical_checks_all[n-2]/n_samples*100,1),"%")
    print('\n')

run: order_7
n_samples: 1
patch_sizes: [6]
max_patch_num: 2


Checked 1 samples with patch size 6 , find first 2 patches to be identical for 0 samples, that's 0.0 %




In [18]:
# Compute total number of permutations, given patch size and number of patches to include:
patch_sizes = [48,24,12,6]
max_patch_nums = [5,3,2,2]

## Loop over list of patch sizes:
for p in range(len(patch_sizes)):
    
    # Get current patch size:
    patch_size = patch_sizes[p]
    
    # Get parameters for patches, enumerated line-by-line, from left to right, from top to bottom, starting with ZERO.
    n_lat = int(val_input[0:1].shape[1] / patch_size)
    n_lon = int(val_input[0:1].shape[2] / patch_size)

    # Obtain total number of patches:
    n_patches = int(n_lat * n_lon)
    
    # Output:
    print("For patch size",patch_size,", have",n_patches,"patches.")
    
    # Get max. number of patches to include for current patch size:
    max_patch_num = max_patch_nums[p]
    
    # Loop over number of patches to include, starting from 2:
    for n in np.arange(2,max_patch_num+1):
        
        print("  Including",n,"patches, have",int(math.factorial(n_patches)/math.factorial(n_patches-n)),"possible permutations.")
        
    print('\n')
    


For patch size 48 , have 6 patches.
  Including 2 patches, have 30 possible permutations.
  Including 3 patches, have 120 possible permutations.
  Including 4 patches, have 360 possible permutations.
  Including 5 patches, have 720 possible permutations.


For patch size 24 , have 24 patches.
  Including 2 patches, have 552 possible permutations.
  Including 3 patches, have 12144 possible permutations.


For patch size 12 , have 96 patches.
  Including 2 patches, have 9120 possible permutations.


For patch size 6 , have 384 patches.
  Including 2 patches, have 147072 possible permutations.




In [6]:
# Check for maximum number of desired patches: If given as -1, set to total number of patches.
if max_patch_num == -1:
    max_patch_num = n_patches

## Loop over list of patch sizes:
for p in range(len(patch_sizes)):
    
    # Get current patch size:
    patch_size = patch_sizes[p]
    
    ## Loop over samples:
    for s in range(n_samples):

        ## Loop over possible number of patches to include. 
        ## Max. include max_patch_num patches. And start from 2, since single patch is not relevant. Look for permutations!
        for n in np.arange(2,max_patch_num+1):

            # Define filenames to store informormation:
            found_orders_filename = 'found_orders_sample_'+str(start_sample+s)+'_patchsize_'+str(patch_size)+'_patches_'+str(max_patch_num)+'.npy'
            patch_orders_filename = 'patch_orders_sample_'+str(start_sample+s)+'_patchsize_'+str(patch_size)+'_patches_'+str(max_patch_num)+'.npy'
            identical_checks_filename = 'identity_checks_sample_'+str(start_sample+s)+'_patchsize_'+str(patch_size)+'_patches_'+str(max_patch_num)+'.npy'

            # Reload files:
            found_orders = np.load(path_to_parameters / exp_name / found_orders_filename,allow_pickle=True)
            patch_orders = np.load(path_to_parameters / exp_name / patch_orders_filename,allow_pickle=True)
            identical_checks = np.load(path_to_parameters / exp_name / identical_checks_filename)
            print(
                'sample: ',s,', patches: ',n,', identical: ',all(identical_checks[-2:]),
                '\nfound_order: ',found_orders[-2:],
                '\npatch_order: ',patch_orders[-2:]
            )
        print('\n')

sample:  0 , patches:  2 , identical:  True 
found_order:  [(3, 0, 5, 4) (3, 0, 1, 4, 5)] 
patch_order:  [array([4, 0, 5, 3]) array([4, 0, 5, 3, 1])]
sample:  0 , patches:  3 , identical:  True 
found_order:  [(3, 0, 5, 4) (3, 0, 1, 4, 5)] 
patch_order:  [array([4, 0, 5, 3]) array([4, 0, 5, 3, 1])]
sample:  0 , patches:  4 , identical:  True 
found_order:  [(3, 0, 5, 4) (3, 0, 1, 4, 5)] 
patch_order:  [array([4, 0, 5, 3]) array([4, 0, 5, 3, 1])]
sample:  0 , patches:  5 , identical:  True 
found_order:  [(3, 0, 5, 4) (3, 0, 1, 4, 5)] 
patch_order:  [array([4, 0, 5, 3]) array([4, 0, 5, 3, 1])]


sample:  1 , patches:  2 , identical:  True 
found_order:  [(1, 0, 4, 5) (5, 1, 0, 4, 3)] 
patch_order:  [array([4, 1, 5, 0]) array([4, 1, 5, 0, 3])]
sample:  1 , patches:  3 , identical:  True 
found_order:  [(1, 0, 4, 5) (5, 1, 0, 4, 3)] 
patch_order:  [array([4, 1, 5, 0]) array([4, 1, 5, 0, 3])]
sample:  1 , patches:  4 , identical:  True 
found_order:  [(1, 0, 4, 5) (5, 1, 0, 4, 3)] 
patch_o

In [226]:
permutation_list = list(permutations(range(n_patches),4))

In [227]:
len(permutation_list)

255024

In [218]:
# Get parameters for patches, enumerated line-by-line, from left to right, from top to bottom, starting with ZERO.
n_lat = int(val_input[0:1].shape[1] / patch_size)
n_lon = int(val_input[0:1].shape[2] / patch_size)

# Obtain total number of patches:
n_patches = int(n_lat * n_lon)
