Finetuning for CH4 uptake at high pressure

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import sys
import scipy

import yaml
import pickle
from sklearn.metrics import mean_absolute_error

from xraypro.setGen import genLoaders
from xraypro.xraypro import loadModel
from xraypro.run import finetune, runTest

In [3]:
with open('data/CoRE-MOF/transformed_PXRD.pickle', 'rb') as handle:
    transformedPXRD = pickle.load(handle)

In [4]:
core_df_train = pd.read_csv("data/CoRE-MOF/CoRE2019_traindata.csv")
core_df_test = pd.read_csv("data/CoRE-MOF/CoRE2019_testdata.csv")

train_ids = np.array(core_df_train['MOFname'].values)
test_ids = np.array(core_df_test['MOFname'].values)

availableIDs = np.array(list(train_ids) + list(test_ids))

concat_df = pd.concat([core_df_train, core_df_test], ignore_index=True)
concat_df = concat_df.loc[:, ~concat_df.columns.str.contains('^Unnamed')]

PXRD_to_Label = {}

for id in availableIDs:
    try:
        label = concat_df[concat_df['MOFname'] == id]['pure_uptake_methane_298.00_6500000'].values[0]
        PXRD_to_Label[id] = [transformedPXRD[id][0], label]
    except:
        pass

In [5]:
train_loader, test_loader, val_loader = genLoaders(PXRD_to_Label, directory_to_precursors='data/CoRE-MOF/precursors', test_ratio=0.15, valid_ratio=0.1, batch_size=32, SEED = 0)

The random seed is:  0
Train size: 2928, Validation size: 390, Test size: 585


Finetune model for label

In [7]:
model = loadModel(mode = 'cgcnn').regressionMode()

device = 'cuda:0'

label = 'CH4 Uptake at 64 bar'
file_path = f'data/CoRE-MOF/ft/{label}'

new_dir_path = os.path.join(os.getcwd(), 'data/CoRE-MOF/ft', label)
os.makedirs(new_dir_path, exist_ok = True)

with open(f'{file_path}/train_loader.pickle', 'wb') as handle:
    pickle.dump(train_loader, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open(f'{file_path}/test_loader.pickle', 'wb') as handle:
    pickle.dump(test_loader, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open(f'{file_path}/val_loader.pickle', 'wb') as handle:
    pickle.dump(val_loader, handle, protocol=pickle.HIGHEST_PROTOCOL)

Loaded pre-trained model with success.
/home/sartaaj/Desktop/xraypro/src/SSL/pretrained/cgcnn/model_t.pth


In [8]:
model = finetune(model, train_loader = train_loader, val_loader = val_loader, test_loader = test_loader, file_path = 'data/CoRE-MOF/ft', save_path = 'ft_uptake_high_p.h5')

  return F.conv1d(input, weight, bias, self.stride,
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch: 1, Batch: 17, Loss: 12.112940238072323, Val Loss: 7.43959739473131, Val SRCC = 0.6984359726295208
Epoch: 2, Batch: 17, Loss: 6.800057720352005, Val Loss: 6.37633921040429, Val SRCC = 0.7830930270446399
Epoch: 3, Batch: 17, Loss: 6.06693373145638, Val Loss: 5.162903216150072, Val SRCC = 0.8058813945910719
Epoch: 4, Batch: 17, Loss: 5.625587950696002, Val Loss: 10.409848054250082, Val SRCC = 0.8039263603779733
Epoch: 5, Batch: 17, Loss: 7.04021953357445, Val Loss: 5.187546597586738, Val SRCC = 0.8171839361355491
Epoch: 6, Batch: 17, Loss: 5.235660356479687, Val Loss: 4.374500314394633, Val SRCC = 0.835654936461388
Epoch: 7, Batch: 17, Loss: 4.877579987704099, Val Loss: 4.164826757378048, Val SRCC = 0.8354309221244703
Epoch: 8, Batch: 17, Loss: 4.752609956395495, Val Loss: 4.37275908390681, Val SRCC = 0.8309302704463993
Epoch: 9, Batch: 17, Loss: 4.525623649031251, Val Loss: 3.892830000983344, Val SRCC = 0.8449820788530465
Epoch: 10, Batch: 17, Loss: 4.132537177630833, Val Loss: 3.

In [9]:
predictions_test, actual_test = runTest(model, test_loader, save_path = 'data/CoRE-MOF/ft/CH4 Uptake at 64 bar/ft_uptake_high_p.h5')

In [11]:
print(f'The SRCC is: {scipy.stats.spearmanr(predictions_test, actual_test)[0]}')
print(f'The MAE is {mean_absolute_error(actual_test, predictions_test)}')

The SRCC is: 0.9036719327377123
The MAE is 1.2413398027420044
