In [1]:
import random
import numpy as np
import tensorflow.compat.v1 as tf
import matplotlib.pyplot as plt
from tensorflow.keras import layers, Sequential, Input
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from generator import Generator

In [2]:
def plot():
    plt.legend()
    plt.grid(True)
    plt.show()

def plot_loss(history):
    plt.plot(history.history['loss'], label='loss', marker=".")
    plt.plot(history.history['val_loss'], label='val_loss')
    plt.xlabel('Epoch')
    plt.ylabel('Error [MPG]')
    plot()

def pre_process_isotherm(isotherm):
    #isotherm -= min(isotherm)
    isotherm /= max(isotherm)
    return isotherm
    
def load_dataset(path, interp=False, gen_silica=None, gen_carbon=None):
    min_exp_pressure_i = 40
    max_exp_pressure_i = 458
    with open(path, 'rb') as f:
            dataset = np.load(f)
            isotherm_data = dataset["isotherm_data"]
            pore_distribution_data = dataset["pore_distribution_data"]
    x = np.empty((isotherm_data.shape[0], (-min_exp_pressure_i + max_exp_pressure_i)))
    y = np.empty(pore_distribution_data.shape)
    for i in range(len(isotherm_data)):
        if interp:
            interp_isotherm = np.interp(gen_silica.pressures_s, gen_carbon.pressures_s, isotherm_data[i])
        else:
            interp_isotherm = isotherm_data[i]
        isotherm = pre_process_isotherm(interp_isotherm[min_exp_pressure_i:max_exp_pressure_i])
        pore_distribution = pre_process_isotherm(pore_distribution_data[i])
        x[i] = isotherm
        y[i] = pore_distribution
    #x, y = shuffle(x, y)
    return x, y

def create_model(input_shape):
    ## CONV NET
    # model = Sequential()
    # model.add(layers.Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(input_shape,1)))
    # model.add(layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
    # model.add(layers.Dropout(0.5))
    # model.add(layers.MaxPooling1D(pool_size=2))
    # model.add(layers.Flatten())
    # model.add(layers.Dense(100, activation='relu'))
    # model.add(layers.Dense(2, activation='softmax'))
    ###
        ## DENSE NET
    model = Sequential(
        [
            Input(shape=len(x_train[0],)),
            layers.Dense(400, activation='relu'),
            layers.Dense(200, activation='relu'),
            layers.Dense(100, activation='relu'),
            layers.Dense(50, activation='relu'),
            layers.Dense(2, activation='softmax')
        ]
    )
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [3]:
gen_silica = Generator(path_s="data/kernel_generated2/Kernel_s_Silica-loc-isoth1.xlsx.npy",
                              path_d="data/kernel_generated2/Kernel_d_Silica-loc-isoth1.xlsx.npy",
                              path_p_d="data/kernel_generated2/Pressure_d_Silica-loc-isoth1.xlsx.npy",
                              path_p_s="data/kernel_generated2/Pressure_s_Silica-loc-isoth1.xlsx.npy",
                              path_a="data/kernel_generated2/Size_Silica-loc-isoth1.xlsx.npy"
                              )
gen_carbon = Generator(path_s="data/initial kernels/Kernel_Carbon_Adsorption.npy",
                              path_d="data/initial kernels/Kernel_Carbon_Desorption.npy",
                              path_p_d="data/initial kernels/Pressure_Carbon.npy",
                              path_p_s="data/initial kernels/Pressure_Carbon.npy",
                              path_a="data/initial kernels/Size_Kernel_Carbon_Adsorption.npy"
                              )
x1, y1 = load_dataset('data/datasets/carbon_random_classification.npz', interp=True, gen_carbon=gen_carbon, gen_silica=gen_silica)
x2, y2 = load_dataset('data/datasets/silica_random_classification.npz', interp=False)

In [4]:
i = random.randint(0, len(x1))
plt.plot(x1[i], marker=".", label="Carbon isotherm")
plt.plot(x2[i], marker=".", label="Silica isotherm")
# plt.plot(gen_carbon.a_array, y1[i], marker=".", label="Carbon distribution")
# plt.plot(gen_silica.a_array, y2[i], marker=".", label="Silica distribution")
plot()

In [19]:
y_carbon = np.empty(shape = (len(x1), 2))
y_silica = np.empty(shape = (len(x1), 2))
y = np.empty(shape = (len(x1), 2))
x = np.empty(shape = x1.shape)
for i in range(len(x1)):
    a = random.random()
    y[i] = np.array([a, 1-a])
    x[i] = x1[i]*a + x2[i]*(1-a)
    # y_carbon[i] = np.array([0, 1])
    # y_silica[i] = np.array([1, 0])

In [6]:
x = np.concatenate((x1, x2), axis=0)
y = np.concatenate((y_carbon, y_silica), axis=0)

In [20]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)

In [21]:
model = create_model(input_shape=len(x[0]))

In [24]:
mcp_save = tf.keras.callbacks.ModelCheckpoint(filepath='data/models/classification.keras', save_best_only=True,
                                           monitor='accuracy', mode='max', verbose=1, save_weights_only=False,
                                           save_freq='epoch')

reduce_lr_loss = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5,
                                                   patience=100, verbose=1, mode='auto')
history = model.fit(np.array(x_train), np.array(y_train),
                    epochs=100, batch_size=5000, shuffle=True,
                    validation_data=(np.array(x_test), np.array(y_test)), callbacks=[mcp_save, reduce_lr_loss])
plot_loss(history)

Epoch 1/100
1/5 [=====>........................] - ETA: 0s - loss: 0.7102 - accuracy: 0.5814
Epoch 1: accuracy improved from -inf to 0.66991, saving model to data/models\classification.keras
Epoch 2/100
1/5 [=====>........................] - ETA: 0s - loss: 0.6522 - accuracy: 0.6606
Epoch 2: accuracy improved from 0.66991 to 0.68644, saving model to data/models\classification.keras
Epoch 3/100
1/5 [=====>........................] - ETA: 0s - loss: 0.6370 - accuracy: 0.6964
Epoch 3: accuracy improved from 0.68644 to 0.73733, saving model to data/models\classification.keras
Epoch 4/100
1/5 [=====>........................] - ETA: 0s - loss: 0.6069 - accuracy: 0.7856
Epoch 4: accuracy improved from 0.73733 to 0.76427, saving model to data/models\classification.keras
Epoch 5/100
1/5 [=====>........................] - ETA: 0s - loss: 0.6156 - accuracy: 0.7610
Epoch 5: accuracy improved from 0.76427 to 0.79564, saving model to data/models\classification.keras
Epoch 6/100
1/5 [=====>..........

In [33]:
model = tf.keras.models.load_model('data/models/classification.keras')


In [34]:
prediction = model.predict(np.array(x_train))



In [40]:
j = 1231
prediction[j], y_train[j]

(array([0.25203574, 0.7479642 ], dtype=float32),
 array([0.25625809, 0.74374191]))

In [57]:
gen = Generator(path_s="data/initial kernels/Kernel_Silica_Adsorption.npy",
                path_d="data/initial kernels/Kernel_Silica_Desorption.npy",
                path_p_d="data/initial kernels/Pressure_Silica.npy",
                path_p_s="data/initial kernels/Pressure_Silica.npy",
                path_a="data/initial kernels/Size_Kernel_Silica_Adsorption.npy"
                )

In [58]:
import pandas as pd

exp_file_list = ["MCM-41", "SBA-15", "SBA-16", "MIL-101", "MIL-101_2", "DUT-49", "FDM-4", "PCN-333", "PCN-777",
                 "MIL-100"]

p_exp_list = []
n_s_exp_raw_list = []
for exp_file_name in exp_file_list:
    data = pd.read_csv(f"data/real/{exp_file_name}.txt", header=None)
    # p_exp_list.append(data.iloc[:,1].to_numpy())
    # n_s_exp_raw_list.append(data.iloc[:,3].to_numpy())
    p_exp_list.append(data.iloc[:, 1].to_numpy())
    n_s_exp_raw_list.append(data.iloc[:, 3].to_numpy())

In [59]:
j = 2
plt.plot(p_exp_list[j], n_s_exp_raw_list[j], marker=".", label=exp_file_list[j])
plot()

In [60]:
# интерполируем экспериментальную изотерму под давления кернала
n_s_exp_list = []
for i in range(len(p_exp_list)):
    n_s_exp_list.append(np.interp(gen.pressures_s[40:458], p_exp_list[i], n_s_exp_raw_list[i]))

In [61]:
j = 2
plt.plot(gen.pressures_s[40:458], n_s_exp_list[j], marker=".", label=exp_file_list[j])
plot()

In [62]:
n_s_exp_for_net_list = [pre_process_isotherm(n_s_exp) for n_s_exp in n_s_exp_list]
fit_exp_list = [model.predict(np.array([n_s_exp_for_net])).T for n_s_exp_for_net in n_s_exp_for_net_list]



In [64]:
for i, prediction in enumerate(fit_exp_list):
    if np.argmax(prediction) == 1:
        print(exp_file_list[i], "silica", prediction)
    else:
        print(exp_file_list[i], "carbon", prediction)

MCM-41 carbon [[0.8763423 ]
 [0.12365764]]
SBA-15 silica [[0.29072034]
 [0.70927966]]
SBA-16 carbon [[0.75448936]
 [0.24551064]]
MIL-101 carbon [[0.5979614]
 [0.4020386]]
MIL-101_2 silica [[0.01152014]
 [0.9884799 ]]
DUT-49 carbon [[0.8140122 ]
 [0.18598773]]
FDM-4 silica [[0.02563336]
 [0.97436666]]
PCN-333 carbon [[0.7265499 ]
 [0.27345008]]
PCN-777 carbon [[0.7881956 ]
 [0.21180442]]
MIL-100 carbon [[0.6727861]
 [0.3272139]]
