In [1]:
import os
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
from pmagpy import ipmag, pmag
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("white")
from vgptools.auxiliar import (get_files_in_directory, spherical2cartesian, 
                               cartesian2spherical, GCD_cartesian, shape)                              
from vgptools.utils_compilation import dfs_vgps_recomputed_poles
from vgptools.utils_APWPs import running_mean_APWP, running_mean_APWP_shape, RM_stats, get_pseudo_vgps, get_vgps_sampling_direction, running_mean_VGPs_bootstrapped

from vgptools.utils_visualization import  RM_stats, plot_VGPs_and_APWP, plot_APWP_RM_ensemble

## Compiles a DataFrame for both, the VGPs and the recomputed poles by study

In [2]:
current_path = os.getcwd()
data_path_VGP = current_path + '/data/vgp_database'

In [3]:
df_filtered_vgps, df_pole_compilation = dfs_vgps_recomputed_poles(data_path_VGP, by_study = True)

# Set the hyperparameters for further comparissons.

In [4]:
min_age = 0
max_age = 65
window_length = 20
time_step = 10

## Running means classic approach on poles

In [5]:
RM_classic_by_study = running_mean_APWP (df_pole_compilation, "Plon", "Plat", "mean_age", window_length, time_step, max_age, min_age)
RM_classic_by_study.head(3)

Unnamed: 0,age,N,n_studies,k,A95,csd,plon,plat
0,0.0,15.0,15.0,157.806367,3.053286,6.447966,292.836413,-87.015353
1,10.0,15.0,15.0,157.806367,3.053286,6.447966,292.836413,-87.015353
2,20.0,4.0,4.0,694.396487,3.48919,3.07384,344.0108,-81.836813


## Running means ensemble from pseudo-VGPs approach (Vaes et al. 2022)

In [6]:
RM_ensemble_pseudoVGP = pd.DataFrame(columns=['run','n_studies','k','A95','csd'])

for i in range(100):   
    pseudo_vgps_temp = get_pseudo_vgps(df_pole_compilation)
    RM_APWP_parametric_temp = pd.DataFrame()
    RM_APWP_parametric_temp = running_mean_APWP(pseudo_vgps_temp, 'Plon', 'Plat', 'mean_age', window_length, time_step, max_age, min_age)
    RM_APWP_parametric_temp['run'] = i
    RM_ensemble_pseudoVGP = RM_ensemble_pseudoVGP.append(RM_APWP_parametric_temp, ignore_index=True)

In [7]:
RM_ensemble_pseudoVGP.head(3)

Unnamed: 0,run,n_studies,k,A95,csd,age,N,plon,plat
0,0,15.0,22.665785,1.682955,17.013734,0.0,322.0,302.160608,-86.672277
1,0,18.0,19.218187,1.643854,18.476888,10.0,401.0,313.408523,-85.986833
2,0,9.0,10.173111,2.874204,25.39558,20.0,261.0,335.540807,-81.617489


## Running means on VGPs. Two different approachs as follows:
1. Running means on the computed dataset.

In [8]:
RM_on_VGP = running_mean_APWP(df_filtered_vgps, 'vgp_lon_SH', 'vgp_lat_SH', 'mean_age', window_length, time_step, max_age, min_age)

In [9]:
RM_on_VGP.head(3)

Unnamed: 0,age,N,n_studies,k,A95,csd,plon,plat
0,0.0,305.0,15.0,23.294563,1.704883,16.782542,317.08323,-87.945102
1,10.0,330.0,16.0,21.201852,1.721511,17.591307,327.300988,-87.482422
2,20.0,207.0,9.0,10.117603,3.239569,25.465148,341.959534,-82.115848


2. Running means bootstrapping the sample many times to generate an ensemble.

In [10]:
RM_ensemble_VGPs=running_mean_VGPs_bootstrapped(df_filtered_vgps, 'vgp_lon_SH', 'vgp_lat_SH', 'mean_age', window_length, time_step, max_age, min_age, n_bst = 200)

In [11]:
RM_ensemble_VGPs.head(3)

Unnamed: 0,run,N,k,A95,csd,foliation,lineation,collinearity,coplanarity,age,n_studies,plon,plat
0,0.0,320.0,22.978951,1.676161,16.897401,0.381371,0.512217,1.343091,0.893588,0.0,14.0,319.609983,-88.020802
1,0.0,348.0,20.577123,1.702736,17.85635,0.380376,0.49219,1.293958,0.872566,10.0,15.0,325.396604,-86.842506
2,0.0,208.0,10.445884,3.175078,25.061809,0.235559,0.496493,2.107725,0.732052,20.0,9.0,336.27152,-78.925226
