# Testing logratio transformations

In [1]:
%matplotlib inline
import geopandas as gpd
import pandas as pd
import numpy as np
import sys

sys.path.insert(0, "..")
from eis_toolkit.transformations.coda.alr import alr_transform, inverse_alr
from eis_toolkit.transformations.coda.clr import clr_transform, inverse_clr
from eis_toolkit.transformations.coda.ilr import single_ilr_transform
from eis_toolkit.transformations.coda.pairwise import pairwise_logratio, single_pairwise_logratio
from eis_toolkit.transformations.coda.plr import plr_transform, single_plr_transform

GEOCHEMICAL_DATA = "../tests/data/local/coda/IOCG_CLB_Till_Geochem_reg_511p.shp"

    https://beartype.readthedocs.io/en/latest/api_roar/#pep-585-deprecations
  warn(


In [2]:
# Change option to display all columns (the geochemical data in question has a lot of columns)

pd.set_option('display.max_columns', None)

## Testing with a simple, single-row composition

In [3]:
# A simple example composition consisiting of the parts a, b and c.
# Components are expressed in percentages, and the parts sum to 100%.
# The example is from Pawlowsky-Glahn & Egozcue 2006.

c_arr = np.array([80, 15, 5])
C = pd.DataFrame(c_arr[None], columns=["a", "b", "c"])

In [4]:
pair_a_b = single_pairwise_logratio(float(C.iloc[0, 0]), float(C.iloc[0, 1]))
pair_a_c = single_pairwise_logratio(float(C.iloc[0, 0]), float(C.iloc[0, 2]))
pair_b_c = single_pairwise_logratio(float(C.iloc[0, 1]), float(C.iloc[0, 2]))

C_alr = alr_transform(C)
C_clr = clr_transform(C)
C_plr = plr_transform(C)

C_clr_inv = inverse_clr(C_clr, scale=100.0)
C_alr_inv = inverse_alr(C_alr, "c", scale=100)

C_ilr_ab = single_ilr_transform(C, ["a"], ["b"])
C_ilr_ab_c = single_ilr_transform(C, ["a", "b"], ["c"])

In [5]:
# Expected result: 1.67
pair_a_b

1.6739764335716716

In [6]:
# Expected result: 2.77
pair_a_c

2.772588722239781

In [7]:
# Expected result: 1.10
pair_b_c

1.0986122886681098

In [8]:
# Expected result: [2.77, 1.10]
C_alr

Unnamed: 0,V1,V2
0,2.772589,1.098612


In [9]:
C_alr_inv

Unnamed: 0,V1,V2,c
0,80.0,15.0,5.0


In [10]:
# Expected result: [1.48, -0.19, -1.29]
C_clr

Unnamed: 0,V1,V2,V3
0,1.482188,-0.191788,-1.2904


In [11]:
# Expected result: [1.82, 0.78]
C_plr

Unnamed: 0,V1,V2
0,1.815303,0.776836


In [12]:
# Renormalized to 100%, CLR inverse should show the original data: [80, 15, 5]
C_clr_inv

Unnamed: 0,V1,V2,V3
0,80.0,15.0,5.0


In [13]:
# Expected result: 1.18
C_ilr_ab

0    1.18368
dtype: float64

In [14]:
# Expected result: 1.58
C_ilr_ab_c

0    1.580411
dtype: float64

In [15]:
sample_array = np.array([[65, 12, 18, 5], [63, 16, 15, 6]])
SAMPLE_DATAFRAME = pd.DataFrame(sample_array, columns=["a", "b", "c", "d"])
sample_alr = alr_transform(SAMPLE_DATAFRAME)
sample_alr

Unnamed: 0,V1,V2,V3
0,2.564949,0.875469,1.280934
1,2.351375,0.980829,0.916291


In [16]:
sample_alr_inv = inverse_alr(sample_alr, "d", 100)

In [17]:
sample_alr_inv

Unnamed: 0,V1,V2,V3,d
0,65.0,12.0,18.0,5.0
1,63.0,16.0,15.0,6.0


## Testing with example data

In [18]:
# Define some constants

ppm = 1e-6
million = 1e6

In [19]:
# Names of all the columns that contain concentration data

elements = ['Al_ppm_511', 'Ba_ppm_511', 'Ca_ppm_511', 'Co_ppm_511', 'Cr_ppm_511', 'Cu_ppm_511', 'Fe_ppm_511', 'K_ppm_511P', 'La_ppm_511', 'Li_ppm_511', 'Mg_ppm_511', 'Mn_ppm_511', 'Ni_ppm_511', 'P_ppm_511P', 'Sc_ppm_511', 'Sr_ppm_511', 'Th_ppm_511', 'Ti_ppm_511', 'V_ppm_511P', 'Y_ppm_511P', 'Zn_ppm_511', 'Zr_ppm_511']

In [20]:
# A subcomposition (selected at random)

elements_to_analyze = ['Al_ppm_511', 'Ca_ppm_511', 'Fe_ppm_511', 'Mg_ppm_511']

In [21]:
# Read the vector file into a dataframe

df = gpd.read_file(GEOCHEMICAL_DATA, include_fields=elements_to_analyze)
df = pd.DataFrame(df.drop(columns='geometry'))

# Add a column for the residual

df["residual"] = million - np.sum(df, axis=1)
df.head()

Unnamed: 0,Al_ppm_511,Ca_ppm_511,Fe_ppm_511,Mg_ppm_511,residual
0,27600.0,40200.0,83200.0,17200.0,831800.0
1,14100.0,5000.0,28300.0,7520.0,945080.0
2,7880.0,3070.0,14500.0,4540.0,970010.0
3,7300.0,3290.0,14600.0,3240.0,971570.0
4,12500.0,3600.0,31500.0,8020.0,944380.0


In [24]:
pair_Al_Ca = pairwise_logratio(df, "Al_ppm_511", "Ca_ppm_511")
pair_Fe_Mg = pairwise_logratio(df, "Fe_ppm_511", "Mg_ppm_511")
pair_Mg_Al = pairwise_logratio(df, "Mg_ppm_511", "Al_ppm_511")
pair_Mg_res = pairwise_logratio(df, "Mg_ppm_511", "residual")

df_alr = alr_transform(df)
df_alr_Mg = alr_transform(df, "Mg_ppm_511")
df_clr = clr_transform(df)
df_plr = plr_transform(df)

df_clr_inv = inverse_clr(df_clr, scale=million)
df_alr_inv = inverse_alr(df_alr, "c", scale=million)

df_ilr_Al_Ca = single_ilr_transform(df, ["Al_ppm_511"], ["Ca_ppm_511"])
df_ilr_AlCa_FeMg = single_ilr_transform(df, ["Al_ppm_511", "Ca_ppm_511"], ["Fe_ppm_511", "Mg_ppm_511"])

In [25]:
pair_Al_Ca.head()

0   -0.376051
1    1.036737
2    0.942650
3    0.796987
4    1.244795
dtype: float64

In [26]:
df_alr_Mg.head()

Unnamed: 0,V1,V2,V3,V4
0,0.472906,0.848958,1.576338,3.878683
1,0.628609,-0.408128,1.325296,4.833703
2,0.551401,-0.391249,1.161222,5.364379
3,0.812301,0.015314,1.505448,5.70334
4,0.44379,-0.801005,1.368049,4.76859
