In [5]:
# Get all the modules into our path.
import sys
sys.path.insert(0,'/Users/thomasdodd/Library/CloudStorage/OneDrive-MillfieldEnterprisesLimited/github/Omphalos')
sys.path.insert(1,'/Users/thomasdodd/Library/CloudStorage/OneDrive-MillfieldEnterprisesLimited/github')

# Import machine learning modules.
from omphalos import file_methods as fm
from omphalos import attributes as attr
from omphalos import labels as lbls
import analysis as ana
from analysis import helper as hp

# Import data processing modules.
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib as mpl

# Import some extra modules
import re

# Import data visualisation modules.
import matplotlib.pyplot as plt

In [6]:
# Port in and unpack all the runs we need from the .pkl file.
TrainSet_dict = fm.unpickle('/Users/thomasdodd/Library/CloudStorage/OneDrive-MillfieldEnterprisesLimited/Cambridge/AI4ER/Easter/MRes/CrunchFlow_Work/bfm/2022-06-14_bfm_5-1_1D_FB_100oc-100bar/basalt_5-1.pkl')

In [7]:
# Filter all errored files out of the dictionary
dataset_dict, error_dict = hp.filter_errors(TrainSet_dict)

Returned 7266 files without errors out of a total possible 7983.
717 files had errors.
0 files had unhandled errors.
File failure rate: 9.867877786952931 %.
To see unhandled errors, run with verbose=True.


In [11]:
# Get a df of all the start attributes of interest
attributes_all_df = attr.get_condition(dataset_dict,"f_i_onehundred",species_concs=True)
attributes_all_df = attributes_all_df.loc[:, ["Al+++","Ca++","Fe++","K+","Na+","Mg++","SiO2(aq)"]]
attributes_all_df

  species_attrs = species_attrs.append(primary_species(data_set[i], condition), ignore_index=True)


Unnamed: 0,Al+++,Ca++,Fe++,K+,Na+,Mg++,SiO2(aq)
0,0.007198,0.001175,0.000148,0.000147,0.020330,0.000977,0.004258
1,0.003769,0.001022,0.000208,0.000111,0.004894,0.000877,0.003807
2,0.001450,0.000888,0.000031,0.000169,0.014610,0.000757,0.002145
3,0.009167,0.000065,0.000002,0.000063,0.004095,0.000143,0.002045
4,0.002672,0.001121,0.000191,0.000205,0.012117,0.000523,0.005128
...,...,...,...,...,...,...,...
7261,0.008013,0.000047,0.000108,0.000041,0.018050,0.000659,0.000205
7262,0.001035,0.000085,0.000119,0.000108,0.000270,0.000410,0.001210
7263,0.009463,0.000818,0.000093,0.000007,0.001866,0.000680,0.002848
7264,0.002308,0.001236,0.000053,0.000045,0.014609,0.000298,0.000170


In [69]:
NonCarbonates_arr = ["Diopside","Diopside_a","Hedenbergite","Hedenbergite_a",
                    "Albite","Albite_a","Anorthite","Anorthite_a","M_Microcline",
                    "M_Microcline_a","M_Microcline_b","Forsterite","Forsterite_a",
                    "Fayalite","Fayalite_a","Antigorite","Antigorite_a","Greenalite",
                    "Greenalite_a","Calcite_a","Siderite_a","Magnesite_a"]

Vols_ds = lbls.raw(dataset_dict, 'volume')
NrXDiscretisedBlocks = len(Vols_ds.X.values)
EndVols_ds = Vols_ds.sel(time=280.0)
CarbonateEndVols_ds = EndVols_ds.drop(labels=NonCarbonates_arr)

CarbonateEndVols_da = CarbonateEndVols_ds.to_array(dim='arbitrary_array')
CarbonateEndVols_da = CarbonateEndVols_da.astype(str)
CarbonateEndVols_da = CarbonateEndVols_da.str.replace("^\d+\.\d+-\d+$", "0", regex=True)
CarbonateEndVols_da = CarbonateEndVols_da.str.replace("^\d+\.\d+\+\d+$", "0", regex=True)
CarbonateEndVols_da = CarbonateEndVols_da.astype(float)

CarbonateEndVols_ds = CarbonateEndVols_da.to_dataset(dim="arbitrary_array")
SpatialSummedCarbonateEndVols_ds = CarbonateEndVols_ds.sum(dim=["X","Y","Z"])

In [74]:
SpatialSummedCarbonateEndVols_ds

In [76]:
len(SpatialSummedCarbonateEndVols_ds.Calcite.values)

7266

In [17]:
# Retrieval of xarray datasets containing final and initial mineralogical volume fractions
FinalVols_ds = lbls.raw(dataset_dict, 'volume')
FinalVols_ds = FinalVols_ds.sel(time=280.0)
FinalVols_ds = FinalVols_ds.astype(float)
FinalVols_ds

InitialVols_ds = attr.initial_conditions(dataset_dict, concentrations=False, minerals=True)
InitialVols_ds = NrFormatFixer_XYZ(InitialVols_ds)
InitialVols_ds = InitialVols_ds.astype(float)

# Generation of an xarray dataset representing change in mineralogical volume fractions
# DeltVolFrac_ds = FinalVols_ds - InitialVols_ds

# Generation of an narrowed-down xarray dataset containing only data on a specific time and place
# DeltVolFrac_TenYr_ds = DeltVolFrac_ds.sel(X=0.5,Y=0.5,Z=0.5,time=0.1)

# Generation of an additional xarray variable entitled Carbonates; placed into the above xarray dataset
# DeltVolFrac_TenYr_ds = DeltVolFrac_TenYr_ds.assign(Carbonates=lambda DeltVolFrac_TenYr_ds: DeltVolFrac_TenYr_ds.Calcite + DeltVolFrac_TenYr_ds.Siderite + DeltVolFrac_TenYr_ds.Magnesite)

NameError: name 'X' is not defined

In [None]:
fig, ((ax1),(ax2),(ax3)) = plt.subplots(1,3)
fig.suptitle('FB - Flow Basalt OD Model - Omphalos Test - Carbonates Volume Change over 10yrs - Cl- Charge Balance')

fig.set_figheight(4)
fig.set_figwidth(21)

ax1.set_title("Ca++", loc='center')
ax2.set_title("Fe++", loc='center')
ax3.set_title("Mg++", loc='center')

ax1.set_ylabel('Tot. Carbonates Percentage Vol. Frac. Increase (%)')
ax2.set_ylabel('Tot. Carbonates Percentage Vol. Frac. Increase (%)')
ax3.set_ylabel('Tot. Carbonates Percentage Vol. Frac. Increase (%)')

ax1.set_xlabel('[Ca++] (mol/kg)')
ax2.set_xlabel('[Fe++] (mol/kg)')
ax3.set_xlabel('[Mg++] (mol/kg)')

ax1_x = attributes_all_df["Ca++"].values
ax1_y = DeltVolFrac_TenYr_ds.Carbonates.values

ax2_x = attributes_all_df["Fe++"].values
ax2_y = DeltVolFrac_TenYr_ds.Carbonates.values

ax3_x = attributes_all_df["Mg++"].values
ax3_y = DeltVolFrac_TenYr_ds.Carbonates.values

ax1.scatter(x=ax1_x, y=ax1_y, s=1, alpha=1)
ax2.scatter(x=ax2_x, y=ax2_y, s=1, alpha=1)
ax3.scatter(x=ax3_x, y=ax3_y, s=1, alpha=1)

In [None]:
fig, ((ax1),(ax2),(ax3)) = plt.subplots(1,3)
fig.suptitle('FB - Flow Basalt OD Model - Omphalos Test - Calcite Volume Change over 10yrs - Cl- Charge Balance')

fig.set_figheight(4)
fig.set_figwidth(21)

ax1.set_title("Ca++", loc='center')
ax2.set_title("Fe++", loc='center')
ax3.set_title("Mg++", loc='center')

ax1.set_ylabel('Tot. Calcite Percentage Vol. Frac. Increase (%)')
ax2.set_ylabel('Tot. Calcite Percentage Vol. Frac. Increase (%)')
ax3.set_ylabel('Tot. Calcite Percentage Vol. Frac. Increase (%)')

ax1.set_xlabel('[Ca++] (mol/kg)')
ax2.set_xlabel('[Fe++] (mol/kg)')
ax3.set_xlabel('[Mg++] (mol/kg)')

ax1_x = attributes_all_df["Ca++"].values
ax1_y = DeltVolFrac_TenYr_ds.Calcite.values

ax2_x = attributes_all_df["Fe++"].values
ax2_y = DeltVolFrac_TenYr_ds.Calcite.values

ax3_x = attributes_all_df["Mg++"].values
ax3_y = DeltVolFrac_TenYr_ds.Calcite.values

ax1.scatter(x=ax1_x, y=ax1_y, s=1, alpha=1)
ax2.scatter(x=ax2_x, y=ax2_y, s=1, alpha=1)
ax3.scatter(x=ax3_x, y=ax3_y, s=1, alpha=1)

In [None]:
fig, ((ax1),(ax2),(ax3)) = plt.subplots(1,3)
fig.suptitle('FB - Flow Basalt OD Model - Omphalos Test - Siderite Volume Change over 10yrs - Cl- Charge Balance')

fig.set_figheight(4)
fig.set_figwidth(21)

ax1.set_title("Ca++", loc='center')
ax2.set_title("Fe++", loc='center')
ax3.set_title("Mg++", loc='center')

ax1.set_ylabel('Tot. Siderite Percentage Vol. Frac. Increase (%)')
ax2.set_ylabel('Tot. Siderite Percentage Vol. Frac. Increase (%)')
ax3.set_ylabel('Tot. Siderite Percentage Vol. Frac. Increase (%)')

ax1.set_xlabel('[Ca++] (mol/kg)')
ax2.set_xlabel('[Fe++] (mol/kg)')
ax3.set_xlabel('[Mg++] (mol/kg)')

ax1_x = attributes_all_df["Ca++"].values
ax1_y = DeltVolFrac_TenYr_ds.Siderite.values

ax2_x = attributes_all_df["Fe++"].values
ax2_y = DeltVolFrac_TenYr_ds.Siderite.values

ax3_x = attributes_all_df["Mg++"].values
ax3_y = DeltVolFrac_TenYr_ds.Siderite.values

ax1.scatter(x=ax1_x, y=ax1_y, s=1, alpha=1)
ax2.scatter(x=ax2_x, y=ax2_y, s=1, alpha=1)
ax3.scatter(x=ax3_x, y=ax3_y, s=1, alpha=1)

In [None]:
fig, ((ax1),(ax2),(ax3)) = plt.subplots(1,3)
fig.suptitle('FB - Flow Basalt OD Model - Omphalos Test - Magnesite Volume Change over 10yrs - Cl- Charge Balance')

fig.set_figheight(4)
fig.set_figwidth(21)

ax1.set_title("Ca++", loc='center')
ax2.set_title("Fe++", loc='center')
ax3.set_title("Mg++", loc='center')

ax1.set_ylabel('Tot. Magnesite Percentage Vol. Frac. Increase (%)')
ax2.set_ylabel('Tot. Magnesite Percentage Vol. Frac. Increase (%)')
ax3.set_ylabel('Tot. Magnesite Percentage Vol. Frac. Increase (%)')

ax1.set_xlabel('[Ca++] (mol/kg)')
ax2.set_xlabel('[Fe++] (mol/kg)')
ax3.set_xlabel('[Mg++] (mol/kg)')

ax1_x = attributes_all_df["Ca++"].values
ax1_y = DeltVolFrac_TenYr_ds.Magnesite.values

ax2_x = attributes_all_df["Fe++"].values
ax2_y = DeltVolFrac_TenYr_ds.Magnesite.values

ax3_x = attributes_all_df["Mg++"].values
ax3_y = DeltVolFrac_TenYr_ds.Magnesite.values

ax1.scatter(x=ax1_x, y=ax1_y, s=1, alpha=1)
ax2.scatter(x=ax2_x, y=ax2_y, s=1, alpha=1)
ax3.scatter(x=ax3_x, y=ax3_y, s=1, alpha=1)