In [1]:
import os
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
from pmagpy import ipmag, pmag
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("white")
from vgptools.auxiliar import (get_files_in_directory, spherical2cartesian, 
                               cartesian2spherical, GCD_cartesian, shape)                              
from vgptools.utils_compilation import dfs_vgps_recomputed_poles
from vgptools.utils_APWPs import (running_mean_APWP, running_mean_APWP_shape, RM_stats, get_pseudo_vgps, get_vgps_sampling_direction, 
                                  running_mean_VGPs_bootstrapped, running_mean_bootstrapping_direction)

from vgptools.utils_visualization import  RM_stats, plot_VGPs_and_APWP, plot_APWP_RM_ensemble, quantiles

## Compiles a DataFrame for both, the VGPs and the recomputed poles by study

In [2]:
current_path = os.getcwd()
data_path_VGP = current_path + '/data/vgp_database'

In [3]:
df_filtered_vgps, df_pole_compilation = dfs_vgps_recomputed_poles(data_path_VGP, by_study = False)

# Set the hyperparameters for further comparissons.

In [18]:
min_age = 0
max_age = 65
window_length = 10
time_step = 2

### 1. Running means classic approach on poles

In [19]:
RM_classic_by_study = running_mean_APWP (df_pole_compilation, "Plon", "Plat", "mean_age", window_length, time_step, max_age, min_age)
RM_classic_by_study.head(2)

Unnamed: 0,age,N,n_studies,k,A95,csd,plon,plat
0,0.0,12.0,12.0,147.655547,3.583738,6.665921,-70.071867,-87.139798
1,2.0,15.0,13.0,137.063577,3.277723,6.918692,-64.251763,-87.179527


### 2. Running means ensemble from pseudo-VGPs approach (Vaes et al. 2022)

In [20]:
RM_ensemble_pseudoVGP = pd.DataFrame(columns=['run','n_studies','k','A95','csd'])

for i in range(100):   
    pseudo_vgps_temp = get_pseudo_vgps(df_pole_compilation)
    RM_APWP_parametric_temp = pd.DataFrame()
    RM_APWP_parametric_temp = running_mean_APWP(pseudo_vgps_temp, 'Plon', 'Plat', 'mean_age', window_length, time_step, max_age, min_age)
    RM_APWP_parametric_temp['run'] = i

    RM_ensemble_pseudoVGP = RM_ensemble_pseudoVGP.append(RM_APWP_parametric_temp, ignore_index=True)
RM_ensemble_pseudoVGP['plon'] = RM_ensemble_pseudoVGP.apply(lambda row: row.plon - 360 if row.plon > 180 else row.plon, axis =1)    

In [21]:
RM_pseudoVGP_quant_plat = quantiles(RM_ensemble_pseudoVGP,"age","plat") # set quantiles of latitude groupedby age for visualization purposes
RM_pseudoVGP_quant_plon = quantiles(RM_ensemble_pseudoVGP,"age","plon") # set quantiles of longitude groupedby age for visualization purposes
RM_ensemble_pseudoVGP.head(3)

Unnamed: 0,run,n_studies,k,A95,csd,age,N,plon,plat
0,0,15.0,25.654891,1.753286,15.991895,0.0,261.0,-41.151807,-88.018624
1,0,15.0,24.989718,1.685732,16.203332,2.0,290.0,-35.587777,-88.224786
2,0,15.0,24.195807,1.636753,16.467018,4.0,318.0,-20.290073,-88.39032


## 3. Running means on VGPs. Three different approachs as follows:
### 3.1. Running means on the computed dataset (one run and A95)

In [22]:
RM_on_VGP = running_mean_APWP(df_filtered_vgps, 'vgp_lon_SH', 'vgp_lat_SH', 'mean_age', window_length, time_step, max_age, min_age)

In [23]:
RM_on_VGP.head(3)

Unnamed: 0,age,N,n_studies,k,A95,csd,plon,plat
0,0.0,260.0,15.0,24.673134,1.792727,16.306954,-58.469586,-87.906699
1,2.0,278.0,15.0,24.256975,1.748834,16.446243,-51.125222,-88.069489
2,4.0,293.0,15.0,23.273122,1.740456,16.790271,-37.112432,-88.032564


### 3.2. Running means on random samples (with replacement) from the original dataset to generate an ensemble of possible solution.

In [None]:
%%time
RM_ensemble_VGPs=running_mean_VGPs_bootstrapped(df_filtered_vgps, 'vgp_lon_SH', 'vgp_lat_SH', 'mean_age', window_length, time_step, max_age, min_age, n_bst = 200)

In [None]:
RM_VGP_quant_plat = quantiles(RM_ensemble_VGPs,"age","plat") # set quantiles of latitude groupedby age for visualization purposes
RM_VGP_quant_plon = quantiles(RM_ensemble_VGPs,"age","plon") # set quantiles of longitude groupedby age for visualization purposes
RM_ensemble_VGPs.head(3)

### 3.2. Propation of directional error towards the VGP level. 
We take the original directions as a PDF to generate to generate a pseudo-Dataset that incorporates the uncertinty in the directional space and time. We aaply the running means on a number of $pseudo$-VGPs to generate an ensemble of possible paths

In [None]:
%%time
RM_propagated=running_mean_bootstrapping_direction(df_filtered_vgps,'plon', 'plat', 'age', window_length, time_step, max_age, min_age, n_bst = 100)

In [None]:
RM_VGP_propagated_plat = quantiles(RM_propagated,"age","plat") # set quantiles of latitude groupedby age for visualization purposes
RM_VGP_propagated_plon = quantiles(RM_propagated,"age","plon") # set quantiles of longitude groupedby age for visualization purposes
RM_propagated.head(3)

# Comparisons plot

In [None]:
title = "comparisons"

ensemble1_lon = RM_VGP_propagated_plon
ensemble1_lat = RM_VGP_propagated_plat

ensemble2_lat = RM_VGP_quant_plat
ensemble2_lon = RM_VGP_quant_plon

fig, axes = plt.subplots(2, 1, sharex=True, figsize=(15,6))
fig.suptitle(title, fontsize= 18, fontweight ='bold')
axes[0].set_title('Latitude (°N)', fontsize=12, fontweight ='bold')
axes[1].set_title('Longitude (°E)', fontsize=12, fontweight ='bold')
axes[0].set_ylabel(r'Latitude (°N)', fontweight ='bold')
axes[1].set_ylabel(r'Longitude (°E)', fontweight ='bold')
# LATITUDE
# axes[0].scatter(RM_ensemble_pseudoVGP['age'],RM_ensemble_pseudoVGP['plat'], s = 1, color = 'black', alpha = 0.2)
# axes[0].fill_between(ensemble1_lat.X, ensemble1_lat.q5,ensemble1_lat.q95, color= "#f9afaf", alpha=.20,label="Running mean on VGPs ensemble")
axes[0].fill_between(ensemble1_lat.X, ensemble1_lat.q25,ensemble1_lat.q75, color= "#f98787", alpha=.20)
# axes[0].plot(a.X, a.mean, '--', color="#ad3131",label="mean")
axes[0].plot(ensemble1_lat.X, ensemble1_lat.q50, '-',color="#d12a2a")
axes[0].scatter(ensemble1_lat.X, ensemble1_lat.q50,color="#d12a2a")

# axes[0].scatter(RM_ensemble_VGPs['age'],RM_ensemble_VGPs['plat'], s = 1, color = 'black', alpha = 0.2)
# axes[0].fill_between(ensemble2_lat.X, ensemble2_lat.q5,ensemble2_lat.q95, color= "#1CA0F9", alpha=.20,label="Running mean on $pseudo$-VGPs")
axes[0].fill_between(ensemble2_lat.X, ensemble2_lat.q25,ensemble2_lat.q75, color= "#0E7DC9", alpha=.20)
# axes[0].plot(a.X, a.mean, '--', color="#ad3131",label="mean")
axes[0].plot(ensemble2_lat.X, ensemble2_lat.q50, '-',color="#00497A")
axes[0].scatter(ensemble2_lat.X, ensemble2_lat.q50,color="#00497A")

axes[0].plot(RM_classic_by_study.age, RM_classic_by_study.plat, '-',color="black") #CLASSIC
axes[0].scatter(RM_classic_by_study.age, RM_classic_by_study.plat, color="black") #CLASSIC

# LONGITUDE
#axes[1].scatter(RM_ensemble_VGPs['age'],RM_ensemble_VGPs['plon'], s = 1, color = 'black', alpha = 0.2)
axes[1].fill_between(ensemble1_lon.X, ensemble1_lon.q16,ensemble1_lon.q84, color= "#f9afaf", alpha=.50,label="0.16-0.84 percentiles - Running mean on VGPs ensemble")
# axes[1].fill_between(ensemble1_lon.X, ensemble1_lon.q25,ensemble1_lon.q75, color= "#f98787", alpha=.20, label="Running mean on $pseudo$-VGPs")
# axes[1].plot(a.X, a.mean, '--', color="#ad3131",label="mean")
axes[1].plot(ensemble1_lon.X, ensemble1_lon.q50, '-',color="#d12a2a")
axes[1].scatter(ensemble1_lon.X, ensemble1_lon.q50,color="#d12a2a")

# axes[0].scatter(RM_ensemble_VGPs['age'],RM_ensemble_VGPs['plat'], s = 1, color = 'black', alpha = 0.2)
# axes[1].fill_between(ensemble2_lon.X, ensemble2_lon.q5,ensemble2_lon.q95, color= "#1CA0F9", alpha=.20)
axes[1].fill_between(ensemble2_lon.X, ensemble2_lon.q25,ensemble2_lon.q75, color= "#0E7DC9", alpha=.20,label="0.16-0.84 percentiles - Running mean on VGPs")
# axes[0].plot(a.X, a.mean, '--', color="#ad3131",label="mean")
axes[1].plot(ensemble2_lon.X, ensemble2_lon.q50, '-',color="#00497A", zorder =2)
axes[1].scatter(ensemble2_lon.X, ensemble2_lon.q50,  color = "#00497A", alpha = 1, zorder =3)

axes[1].plot(RM_classic_by_study.age, RM_classic_by_study.plon, '-',color="black", label = "RM classic") #CLASSIC
axes[1].scatter(RM_classic_by_study.age, RM_classic_by_study.plon, color="black") #CLASSIC

# axes[1].set_ylim(-100, 50)
plt.legend(loc="upper left")