## Fatbox for numerical modelling - correlation

The following IPython notebook detail the workflow used to **correlate faults** in the **numerical modelling** application of Fatbox paper. The process is the same used by Derek Neuharth in his study (he kindly agreed for the diffusion of the code).

The 2D continental rifting model was made using the geodynamic code ASPECT coupled to the landscape evolution code FastScape. This model simulates a continental rift while incorporating sedimentation and erosion processes.

In this workflow we use strain data from the cross section view of the model. We show how to **correlate the faults extracted** using Fatbox.
This is tutorial 2 of the numerical modelling application.

Neuharth, D., Brune, S., Wrona, T., Glerum, A., Braun, J., & Yuan, X. (2022). Evolution of rift systems and their fault networks in response to surface processes. Tectonics, 41, e2021TC007166. https://doi.org/10.1029/2021TC007166

In [None]:
# COMMENT IF RUNNING OUTSIDE GOOGLE COLAB
from google.colab import drive
drive.mount('/content/drive')
!pwd

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/Fatbox/modules


In [None]:
# COMMENT IF RUNNING OUTSIDE GOOGLE COLAB
!pip install earthpy
!pip install cv-algorithms
!pip install vtk



In [None]:
import matplotlib.pyplot as plt
import numpy as np
import networkx as nx
import pickle
from scipy.spatial import distance_matrix
import pandas as pandas
import math
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
from joblib import Parallel, delayed
import multiprocessing
import cv2
import timeit
from tqdm import tqdm
from pathlib import Path
import os

#Paste your own directory
path_folder=Path('/content/drive/MyDrive/Fatbox')
path_modules=path_folder/'modules'
os.chdir(path_modules) # make modules as working directory
# on colab in a new cell: print(path_modules) #make sure path_modules = '/Fatbox/modules'
# on you IDE: type pwd in console and make sure it is '/Fatbox/modules'

# Fatbox import
import preprocessing
import metrics
import plots
import utils
import structural_analysis
import edits


data_path=Path(path_folder)/'tutorials'/'num'/'data_num'

save_path=Path(path_folder)/'tutorials'/'num'/'plots_num'

array_path=Path(path_folder)/'tutorials'/'num'/'array_num'


Let's set the parameters
The first group of settings are the same as for the extraction

Then R is important. The faults are considered similar and correlated when the average distance between them is lower than the set distance R (in pixels).
High R values loosen the correlation.
Low R values tighten the correlation.

In [None]:
# Data from parameter file (=bash file) of the model
#endtime=0      # in myr, 0 will take last available time
#starttime=0.015  # in myr 0.015
x_pixels=2880
xlength=450  # in km
y_pixels=448
strain_rate_factor=0.1
minimum_distance=1.5
R=5
num_proc=12 # number of processor or the computer if using parallelisation
min_fault_length=1.5       # in km
scale = xlength/x_pixels

start=200
end=450
step=50
times = list(range(start, end, step))

factor=1

R_new = [6,  6, 6] # R in pixel

Definition of some functions

In [None]:

### Define functions
def get_nodes(G):
    labels = metrics.get_fault_labels(G)
    point_set=[]
    for label in labels:
        G_fault = metrics.get_fault(G, label)
        points = []
        for node in G_fault:
            points.append(G_fault.nodes[node]['pos'])
        point_set.append(points)
    return point_set

def is_A_in_B(set_A, set_B, R):
      distances = np.zeros((len(set_A), len(set_B)))
      for n, pt_0 in enumerate(set_A):
          for m, pt_1 in enumerate(set_B):
              distances[n,m] = math.sqrt((pt_0[0]-pt_1[0])**2 + (pt_0[1]-pt_1[1])**2)
      if np.mean(np.min(distances, axis=1)) > R:
          return False
      else:
          return True

def compute_similarity(set_A, set_B):
      distances = np.zeros((len(set_A), len(set_B)))
      for n, pt_0 in enumerate(set_A):
          for m, pt_1 in enumerate(set_B):
              distances[n,m] = math.sqrt((pt_0[0]-pt_1[0])**2 + (pt_0[1]-pt_1[1])**2)
      return np.mean(np.min(distances, axis=1))

def correlation_slow(G_0, G_1, R):
    # A function that labels the faults in G_1 according to G_0 using the
    # minimum radius R
    #R is a measure for the minimal distance between faults to be correlated,
    #so higher values loosen the correlation and lower values tighten it.


    # Get labels and nodes
    fault_labels_0 = metrics.get_fault_labels(G_0)
    fault_labels_1 = metrics.get_fault_labels(G_1)

    nodes_0 = get_nodes(G_0)
    nodes_1 = get_nodes(G_1)


    # Compute similarities
    #smf stands for forward similarities, smb for backwards similarities
    smf = np.zeros((len(fault_labels_0), len(fault_labels_1)))
    smb = np.zeros((len(fault_labels_1), len(fault_labels_0)))


    for n in tqdm(range(len(fault_labels_0)), desc='   Compute similarities'):
        for m in range(len(fault_labels_1)):
            smf[n,m] = compute_similarity(nodes_0[n], nodes_1[m])
            smb[m,n] = compute_similarity(nodes_1[m], nodes_0[n])


    # Determine correlations
    #check for Euclidean distance: radius R determines threshold similarity
    #correlations stores similar faults for potential renaming in next step
    correlations = set()
    for n in tqdm(range(len(fault_labels_0)), desc='   Find correlations'):
        for m in range(len(fault_labels_1)):
            if smf[n,m] < R:
                correlations.add((fault_labels_0[n], fault_labels_1[m]))
            if smb[m,n] < R:
                correlations.add((fault_labels_0[n], fault_labels_1[m]))

    return correlations, smf, smb

def relabel(G_1, correlations):

    # A function, which relabels G_1 using the correlations
    for node in G_1:
        G_1.nodes[node]['correlated']=0

    lengths = [metrics.total_length(metrics.get_fault(G_0, correlation[0])) for correlation in correlations]
    lengths, correlations = zip(*sorted(zip(lengths, correlations)))


    for node in G_1:
        for correlation in correlations:
            if G_1.nodes[node]['component'] == correlation[1]:
                G_1.nodes[node]['family'].extend([G_1.nodes[node]['fault']])
                #remove duplicates
                G_1.nodes[node]['family'] = list(set(G_1.nodes[node]['family']))
                G_1.nodes[node]['fault'] = correlation[0]
                G_1.nodes[node]['correlated'] = 1

    max_comp = max(metrics.get_fault_labels(G_1))

    G_1_sub = nx.subgraph(G_1, [node for node in G_1 if G_1.nodes[node]['correlated']==0])
    for label, cc in enumerate(sorted(nx.connected_components(G_1_sub))):
        for n in cc:
            G_1.nodes[n]['fault'] = label+max_comp+1

    return G_1

class fault_database():

    def __init__(self):
        self.Gs = []
        self.times = []
        self.matrices = []
        self.correlations = []



    def add_graph(self, G, time):
        self.Gs.append(G)
        self.times.append(time)



    def replace_graph(self, G, time):
        index = 0
        for G, t in zip(self.Gs, self.times):
            if t == time:
                self.Gs[index] = G
            index += 1

    def get_graphs_by_time(self, times):

        if isinstance(times, int):
            for G, t in zip(self.Gs, self.times):
                if t == times:
                    return G

        if isinstance(times, list):
            Gs = []
            for G, t in zip(self.Gs, self.times):
                if t in times:
                    Gs.append(G)
            return Gs


def plot_results(G, time):

    data = pandas.read_csv(str(data_path)+'/data_' + str(time) + '.csv', delimiter=',')

    # Get positions for fields we need.
    nps_pos = data.columns.get_loc("noninitial_plastic_strain")

    # Convert pandas to numpy.
    data = data.to_numpy()
    data = np.flip(data, axis=0)

    non_strain = data[:,nps_pos].reshape(y_pixels, x_pixels)
    non_strain = np.flip(non_strain, axis=1)


    Gmaxx = x_pixels
    Gminx = 0
    Gmaxy = y_pixels
    Gminy = 0

    maxy = Gmaxy*scale#/1000
    maxx = Gmaxx*scale#/1000
    minx = Gminx*scale#/1000

    xint = 5
    if (maxx - minx) < 40:
        xint = 5
    elif (maxx - minx) < 80:
        xint = 10
    elif (maxx - minx) < 155:
        xint = 25
    else:
        xint = 50

    yint = 5
    if (maxy + 10) < 35:
        yint = 5
    else:
        yint = 10

    # correct labels of the axis
    xlab = np.array(np.zeros(math.floor((maxx-minx)/xint)))
    xlab[len(xlab)//2] = math.ceil(minx/xint)*xint
    for i in range(1, (len(xlab)//2)+1):
        xlab[(len(xlab)//2)-i]=-(xint*i)
        xlab[(len(xlab)//2)+i]=+(xint*i)

    ylab = np.array(np.zeros(math.floor((maxy+10)/yint)))
    ylab[0] = -10
    for i in range(2, len(ylab)):
        ylab[i] = ylab[i-1] + yint

    fig, axs = plt.subplots(1, 1, figsize=(16,8))
    p = axs.imshow(non_strain, cmap='gray_r',aspect="equal")

    plots.plot_faults(G, ax=axs, node_size=0.8, label=True)

    listx_G=[]
    for node in G.nodes:
        listx_G.append(G.nodes[node]['x'])

    #axs.set_xlim([Gminx, Gmaxx])
    #axs.set_xlim([Gminx, Gmaxx])
    axs.set_xlim([1000, 2000])
    axs.set_ylim([Gmaxy, 0])
    #axs[0].set_title('Non-initial plastic strain, Time: ' + str(title_time) + ' Myr, File:' +str(file), fontweight='bold')
    axs.set_title(str('Non-initial plastic strain with faults correlated' + 'time='+str(time)), fontweight='bold')
    axs.set_ylabel('Depth (km)')
    axs.set_xlabel('Distance from model center (km)')
    axs.xaxis.tick_top()

    locs_y=ylab/scale #locs in pixels of the labels.
    #plt.yticks(locs_y,ylab)

    locs_x=(xlab+max(xlab))/scale
    #plt.xticks(locs_x,xlab)

    plt.savefig(save_path/'correlated'/str('image_' + str(time) + '.png'), dpi=200)
    #plt.savefig(save_path / str('image_' + str(file).zfill(5) +'.png'), dpi=200)

    plt.close("all")

def get_nonstrain(time):
    data = pandas.read_csv(str(data_path)+'/data_' + str(time) + '.csv', delimiter=',')

    # Get positions for fields we need.
    nps_pos = data.columns.get_loc("noninitial_plastic_strain")

    # Convert pandas to numpy.
    data = data.to_numpy()
    data = np.flip(data, axis=0)

    non_strain = data[:,nps_pos].reshape(y_pixels, x_pixels)
    non_strain = np.flip(non_strain, axis=1)

    return non_strain


Load the Graph stored after the extraction.

In [None]:
#Load the networks
FD = fault_database()

for time in tqdm(times, desc='Load graphs'):
    G = pickle.load(open((array_path/str('G'+ str(time)+'.pickle')), 'rb'))
    if nx.is_empty(G):
        times.remove(time)
        # print('Removed time ' + str(time))
    else:
        FD.add_graph(G, time)

Load graphs: 100%|██████████| 5/5 [00:02<00:00,  2.40it/s]


The fault extraction assign arbitray labels to every time steps independently. But to follow the evolution of a fault. it needs to be identified by the same label (the same name if you want) during the whole workflow. This is the goal of the correlation.


## Correlation between two time steps
For more details about the correlation. See the step by step made by Thilo Wrona https://github.com/thilowrona/fatbox_tutorials/tree/main/Numerical_models
Fatbox library has been much enlarged since his published version but the principle and the correlation_slow function remain unchanged.


## Correlation to n+2 and n-2

For every timestep (times[n]), the faults are compared to all the faults of the following timestep (times[n+1]) and the timestep after  (times[n+2]) forward correlation. The idea is to identify that fault in the following timesteps as the same structure that evolved and is maybe a bit longer for example. Once recognized based on the best similarity, the fault is relabeled according to the previous timestep. This comparison with 2 timestep ahead insure to take into account small scale variations of the networks. Respectively, the correlation is also made backward, 1 and 2 timesteps before, to ensure consistency of the labels.

In [None]:

### Time stepping
for n, time in enumerate(times[:-2]): #we need to stop 2 steps before end for upwind comparison


    print('\nTime step = ' + str(times[n]))

    if n == 0:
        G_0 = FD.get_graphs_by_time(times[n])
        for node in G_0:
            G_0.nodes[node]['fault'] = G_0.nodes[node]['component']
            #initialise family with empty set
            G_0.nodes[node]['family'] = []

        FD.replace_graph(G_0, times[n])

    else:
        G_0 = FD.get_graphs_by_time(times[n])


    G_1 = FD.get_graphs_by_time(times[n+1])
    G_2 = FD.get_graphs_by_time(times[n+2])

    for node in G_1:
        G_1.nodes[node]['fault'] = G_1.nodes[node]['component']
        #initialise family with empty set
        G_1.nodes[node]['family'] = []

    for node in G_2:
        G_2.nodes[node]['fault'] = G_2.nodes[node]['component']
        #initialise family with empty set
        G_2.nodes[node]['family'] = []

    fig, axs = plt.subplots(2, 3, figsize=(19.2,10),num=str(times[n])+' R='+str(R_new[n]))

    axs[0,0].imshow(get_nonstrain(times[n]), cmap='gray_r',aspect="equal")
    axs[0,0].set_title('raw' + str(times[n]))
    axs[0,0].set_xlim([1000, 2000])
    axs[0,0].set_ylim([y_pixels, 0])

    axs[0,1].imshow(get_nonstrain(times[n+1]), cmap='gray_r',aspect="equal")
    axs[0,1].set_title('raw' + str(times[n+1]))
    axs[0,1].set_xlim([1000, 2000])
    axs[0,1].set_ylim([y_pixels, 0])

    axs[0,2].imshow(get_nonstrain(times[n+2]), cmap='gray_r',aspect="equal")
    axs[0,2].set_title('raw' + str(times[n+2]))
    axs[0,2].set_xlim([1000, 2000])
    axs[0,2].set_ylim([y_pixels, 0])

    plots.plot_faults(G_0, node_size=2, ax=axs[0,0])
    plots.plot_faults(G_1, node_size=2, ax=axs[0,1])
    plots.plot_faults(G_2, node_size=2, ax=axs[0,2])


    ########## FIRST CORRELATION
    correlations01, smf01, smb01 = correlation_slow(edits.simplify(G_0, factor),
                                                    edits.simplify(G_1, factor),
                                                    R_new[n])
    G_1 = relabel(G_1, correlations01)


    axs[1,0].imshow(get_nonstrain(times[n]), cmap='gray_r',aspect="equal")
    axs[1,0].set_title('raw' + str(times[n]))
    axs[1,0].set_xlim([1000, 2000])
    axs[1,0].set_ylim([y_pixels, 0])

    axs[1,1].imshow(get_nonstrain(times[n+1]), cmap='gray_r',aspect="equal")
    axs[1,1].set_title('1st correlation: ' + str(times[n+1]) + ' using '+ str(times[n]))
    axs[1,1].set_xlim([1000, 2000])
    axs[1,1].set_ylim([y_pixels, 0])

    plots.plot_faults(G_0, node_size=2, ax=axs[1,0])
    plots.plot_faults(G_1, node_size=2, ax=axs[1,1])

    #plt.show()

    if n == 0:
        FD.replace_graph(G_1, times[n+1])

        plot_results(G_1, times[n+1])


    # # SECOND CORRELATION
    correlations02, smf02, smb02 = correlation_slow(edits.simplify(G_0, factor),
                                                    edits.simplify(G_2, factor),
                                                    R_new[n])

    G_2 = relabel(G_2, correlations02)

    for node in G_2:
        G_2.nodes[node]['component'] = G_2.nodes[node]['fault']


    correlations12, smf12, smb12 = correlation_slow(edits.simplify(G_1, factor),
                                                    edits.simplify(G_2, factor),
                                                    R_new[n])

    G_2 = relabel(G_2, correlations12)


    axs[1,2].imshow(get_nonstrain(times[n+2]), cmap='gray_r',aspect="equal")
    axs[1,2].set_title('2d correlation: ' + str(times[n+2]) + ' using '+ str(times[n]))
    axs[1,2].set_xlim([1000, 2000])
    axs[1,2].set_ylim([y_pixels, 0])

    plots.plot_faults(G_2, node_size=2, ax=axs[1,2])

    plt.tight_layout()

    plt.savefig(Path(save_path)/'correlated'/str('progressive_corr_' + str(time) + '.png'), dpi=200)

    plot_results(G_2, times[n+2])


NameError: name 'times' is not defined