# Demo
    - The training and predict process about PLM-HGNN

## ENVIRONMENT

In [17]:
from utils import *
from model import *

## Path
    - The data store path

In [1]:
paths = {
    'mirna_df': 'data/our_data/nodes/mirnas.tsv',
    'disease_df': 'data/our_data/nodes/diseases.tsv',
    'pcg_df': 'data/our_data/nodes/pcgs.tsv',

    'model_save_folder': 'model_weights/',

    'train_val_test': 'data/our_data/split/train_val_test_timely.npy',
    'known_unknown_subset': 'data/our_data/process/known_unknown_samples.npy',
    
    'load_model_path': '',

    'graph_without_mda': 'data/our_data/data_without_mda.pt'
}

## Settings
    - the model parameters in training and predict process

In [18]:
settings ={
    'device': 'cuda:0',

    # data set
    'which_graph': 'graph_without_mda',
    'feature_ablation_type': 3,

    # hyper-parameters
    'num_neighbors': [-1] * 4,
    'dim': 128,
    'num_heads': 8,
    'num_layers': 3,
    'group_type': 'sum',

    # training set
    'batch_size': 1024,
    'epoch_num': 30,
    'lr': 0.001,
    'seeds': [777, 826, 303, 466, 343],
    'early_stop_type': 'self_fitting',
    'patience': 1,
    'train_modes': ['train', 'val'],
    'evaluate_modes': ['test']
}

In [None]:
device = torch.device(settings['device'] if torch.cuda.is_available() else 'cpu')

## Obtain the Benchmark
    - if you want to train a model, the part can skip.

In [None]:
posi_edge_index, nega_edge_index = get_edge_index(paths=paths, settings=settings, modes=settings['train_modes'])
data = get_data(paths['graph_without_mda'], posi_edge_index)

## Training Process

In [None]:
for seed in settings['seeds']:
    set_seed(seed=seed)

    criterion = nn.BCELoss()
    model = Model(dim=settings['dim'], num_heads=settings['num_heads'], num_layers=settings['num_layers'],
                group_type=settings['group_type'], feature_ablation_type=settings['feature_ablation_type']).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=settings['lr'])

    data, best_weights = get_data_and_train(paths, settings, optimizer, device, model, criterion)
    model.load_state_dict(best_weights)

    paths['model_save_path'] = paths['model_save_folder'] + 'plm-hgnn_' + str(seed) + '.pth'
    torch.save({
        'settings': settings,
        'best_weights': best_weights
        }, paths['model_save_path'])

## Evaluation
    - if you don't run the part 'Training Process', please run part 'Obtain the Benchmark' and 'Load Model'

### Load Model

In [None]:
paths['load_model_path'] = 'model_weights/plm-hgnn_303.pth'

model = Model(dim=settings['dim'], num_heads=settings['num_heads'], num_layers=settings['num_layers'],
            group_type=settings['group_type'], feature_ablation_type=settings['feature_ablation_type']).to(device)
model.load_state_dict(torch.load(paths['load_model_path'])['best_weights'])

### The metrics of the full test set

In [None]:
pred, label = evaluate(data, paths, settings, device, model)
print("The metric of all scope: AUC | AUPR | ACC | P | R | F1")
AUC, AUPR, ACC, P, R, F1 = get_metrics(label, pred, 0.5)
print(AUC, AUPR, ACC, P, R, F1)

### The metrics of known and unknown subset

In [None]:
known_unknown_subset = np.load(paths['known_unknown_subset'], allow_pickle=True).item()

known_pred = pred[known_unknown_subset['known']]
known_label = label[known_unknown_subset['known']]
unknown_pred = pred[known_unknown_subset['unknown']]
unknown_label = label[known_unknown_subset['unknown']]

AUC_known, AUPR_known, _, _, _, _ = get_metrics(known_label, known_pred, 0.5)
print('The metrics of known:', round(AUC_known, 3), round(AUPR_known, 3))

AUC_unknown, AUPR_unknown, _, _, _, _ = get_metrics(unknown_label, unknown_pred, 0.5)
print('The metrics of unknown:' , round(AUC_unknown, 3), round(AUPR_unknown, 3))