In [None]:
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "2"

from src.model import D_GAT
from src.mol_processing import Read_mol_data, Generate_dataloader
from src.train_eval import Train_eval, Test_NN

You need to define 
1. The datset to finetune: dataset
2. The task name in file: task_name
3. The type of tasks: target_type ('classification', 'regression')
4. The metrics to evaluate: metrics ('AUC', 'RMSE', 'MAE')
5. The name of stored model: store_name (dataset + '.pth' or None)

However, if you test the datasets used in our paper (Tox21 SIDER MUV HIV BBBP BACE ClinTox ToxCast PCBA ESOL FreeSolv Lipo QM7 QM8 QM9), you only need to define the dataset.

In [None]:
dataset = 'QM7' # Tox21 SIDER MUV HIV BBBP BACE ClinTox ToxCast PCBA ESOL FreeSolv Lipo QM7 QM8 QM9
config_file_path = './config/config.json'

if dataset in ['HIV', 'BBBP', 'Tox21', 'SIDER', 'MUV', 'BACE', 'ClinTox', 'ToxCast', 'PCBA']:
    target_type = 'classification'
    metrics = 'AUC'
    task_name = None
elif dataset in ['ESOl', 'Lipo', 'FreeSolv']:
    target_type = 'regression'
    metrics = 'RMSE'
    task_name = None
elif dataset in [ 'QM7', 'QM8', 'QM9']:
    target_type = 'regression'
    metrics = 'MAE'
    task_name = None
else:
    raise RuntimeError('Please define the target type, task_name and metrics!')
#     target_type = 'classification'
#     metrics = 'AUC'
#     target_type = 'regression'
#     metrics = 'RMSE'
#     metrics = 'MAE'
#     task_name = ['u0_atom'] #for QM7

store_name = dataset + '.pth'
# store_name = None

Next is to load and process data, and load pre-training model. Nothing to define

In [None]:
assert target_type in ['classification', 'regression']
if target_type == 'classification':
    assert metrics in ['AUC']
elif target_type == 'regression':
    assert metrics in ['RMSE', 'MAE']
    
mol_train, mol_val, mol_test, mean, std = Read_mol_data(dataset, task_name, target_type)
train_dataloader, val_dataloader,test_dataloader = Generate_dataloader(dataset, mol_train, mol_val, mol_test)
model, best_score = D_GAT(dataset, mol_train, config_file_path)


If you load the fine-tuned model, next section could be used to evaluate its performance

In [None]:
# ## Following code is to evaluate the fine-tuning model
# Loss, auc = Test_NN(dataset, model, test_dataloader, metrics, target_type, mean, std)
# print('Mean Loss: ', Loss)
# if target_type == 'classification':
#     print('Mean AUC: ', auc.mean())

You are going to fine-tune your model. It may take some time.

In [None]:
# To fine-tune the model
model, best_score = Train_eval(dataset, model, train_dataloader, val_dataloader,test_dataloader, best_score, config_file_path, store_name, metrics, target_type, mean, std)
