In [1]:
import numpy as np
import matplotlib 
matplotlib.use('Qt5Agg') 
import matplotlib.pyplot as plt
import random
import re

from os import listdir
from os.path import isfile, join
from itertools import filterfalse

In [2]:
def read_qps(file_path: str, file_name: str):
    pressures = []
    volumes = []
    pattern = re.compile(r"^\s*([-+]?\d+\.\d+(?:e[-+]?\d+)?)\s+([-+]?\d+\.\d+)\s*$", re.IGNORECASE)
    with open(file_path, 'r') as file:
        for line in file:
            if line.startswith("Analysis gas:"):
                if line.strip().split()[2] not in ["Nitrogen", "N2"]:
                    return None

            match = pattern.match(line)
            if match:
                pressure = float(match.group(1))
                volume = float(match.group(2))
                pressures.append(pressure)
                volumes.append(volume)
    
    pressures = np.array(pressures)
    volumes = np.array(volumes)
    return [pressures, volumes, file_name]

def separate_branches(iso):
    pressure = iso[0]
    volume = iso[1]
    max_p_idx = np.where(pressure == max(pressure))[0][0]
    isotherm = {"adsorption_p": pressure[:max_p_idx+1], "adsorption": volume[:max_p_idx+1],
                "desorption_p": pressure[max_p_idx:][::-1], "desorption": volume[max_p_idx:][::-1], "file_name": iso[2]}
    return isotherm
    
data_path = "../data/tmp"
file_names = [f for f in listdir(data_path) if isfile(join(data_path, f))]
data = [read_qps(join(data_path, f), f) for f in file_names]
data = [d for d in data if d != None]
isotherms = [separate_branches(d) for d in data if len(d[0]) != 0]
print(len(isotherms))

4676


In [3]:
def filter_number_of_points(isotherm, min_number_of_points=10):
    if len(isotherm["adsorption_p"]) <= min_number_of_points:
        return False
    return True

def filter_pressure_fall(isotherm):
    for i in range(len(isotherm["adsorption_p"])-1):
        if isotherm["adsorption_p"][i] >= isotherm["adsorption_p"][i+1]:
            return False
    return True

def filter_adsorption_fall(isotherm):
    for i in range(len(isotherm["adsorption_p"])-1):
        if isotherm["adsorption"][i+1]/isotherm["adsorption"][i] < 1:
            return False
    return True

In [4]:
filters = [filter_number_of_points, filter_pressure_fall, filter_adsorption_fall]
clean_data = isotherms.copy()
for f in filters:
    print(f"{f.__name__} : {len(list(filterfalse(f, clean_data)))}")
    clean_data = list(filter(f, clean_data))
print(f"{len(isotherms)} ==FILTERS==> {len(clean_data)}")

filter_number_of_points : 92
filter_pressure_fall : 1061
filter_adsorption_fall : 444
4676 ==FILTERS==> 3079


In [5]:
def plot_isotherm(isotherm):
    plt.plot(isotherm["adsorption_p"], isotherm["adsorption"], marker=".")
    plt.plot(isotherm["desorption_p"], isotherm["desorption"], marker=".")
    plt.show()

i = random.randint(0, len(clean_data)-1)
plot_isotherm(clean_data[i])

In [6]:
from inverse import fit_linear
def cut_distribution(pore_distribution_data, pressure, pore_widths, pore_size_cut_grid, pressure_cut_grid):
    pore_cut_size = pore_size_cut_grid[-1]
    start_pressure = pressure[0]
    for i in range(len(pressure_cut_grid)-1):
        if pressure_cut_grid[i] <= start_pressure < pressure_cut_grid[i+1]:
            pore_cut_size = pore_size_cut_grid[i]


    for i in range(len(pore_widths)):
        if pore_widths[i] < pore_cut_size:
            pore_distribution_data[i] = 0
        else:
            break
    return pore_distribution_data


def find_nearest_idx(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return idx

isotherm_data = []
pore_distribution_data = []

pressures = np.load("../data/initial kernels/Pressure_Silica.npy")
kernel = np.load("../data/initial kernels/Kernel_Silica_Adsorption.npy")[:, :-10]
pore_widths = np.load("../data/initial kernels/Size_Kernel_Silica_Adsorption.npy")

pore_size_cut_grid = np.array([0.863, 0.863, 0.902, 0.982, 1.061, 1.061, 1.061, 1.167,
                                       1.220, 1.220, 1.220, 1.273, 1.379, 1.432, 1.432, 1.564])
pressure_cut_grid = np.array([1e-7, 1e-6, 5e-6, 1e-5, 5e-5, 1e-4, 2e-4, 4e-4,
                                      6e-4, 8e-4, 1e-3, 2e-3, 4e-3, 6e-3, 8e-3, 1e-2])
for i, isotherm in enumerate(clean_data):
    start_p = find_nearest_idx(pressures[:-10], isotherm['adsorption_p'][0])
    isotherm_data.append(np.interp(pressures[:-10], isotherm['adsorption_p'], isotherm['adsorption']))
    isotherm_data[i][:start_p] = np.zeros(start_p)

    scale = max(isotherm_data[i])
    isotherm_data[i] = isotherm_data[i] / scale

    pore_distribution = fit_linear(adsorption=isotherm_data[i], kernel=kernel, alpha=0).x
    pore_distribution_data.append(cut_distribution(pore_distribution, isotherm['adsorption_p'], pore_widths,
                                                         pore_size_cut_grid, pressure_cut_grid))


In [7]:
from datasetLoader import load_dataset

x_exp, y_exp = load_dataset('../data/datasets/SMP_CUT_ALL_KERNEL.npz')

In [8]:
isotherm_data = np.array(isotherm_data)

In [9]:
x = np.concatenate((x_exp, isotherm_data))
y = np.concatenate((y_exp, pore_distribution_data))

In [10]:
with open("../data/datasets/exp.npz", "wb") as f:
    np.savez_compressed("../data/datasets/exp.npz", isotherm_data=x,
                            pore_distribution_data=y)

In [45]:
volume_issues = list(filterfalse(filter_adsorption_fall, isotherms))
i = random.randint(0, len(volume_issues)-1)
plot_isotherm(volume_issues[i])

In [90]:
i

73

In [69]:
i

1

In [70]:
def pressure_corrector(isotherm):
    i = 0
    while i < len(isotherm["adsorption_p"])-1:
        if isotherm["adsorption_p"][i] >= isotherm["adsorption_p"][i+1]:
            isotherm["adsorption"] = np.delete(isotherm["adsorption"], i)
            isotherm["adsorption_p"] = np.delete(isotherm["adsorption_p"], i)
        i += 1
    return isotherm

pressure_corrected = [pressure_corrector(isotherm) for isotherm in pressure_issues]
i = 1 #random.randint(0, len(pressure_corrected)-1)
plot_isotherm(pressure_corrected[i])

In [71]:
len(pressure_issues[i]["adsorption_p"]), len(pressure_corrected[i]["adsorption_p"])

(91, 91)