In [5]:
import openke
import time
import torch
import pandas as pd
from openke.config import Trainer, Tester
from openke.module.model import TransE, TransD, TransH, RotatE
from openke.module.loss import MarginLoss
from openke.module.strategy import NegativeSampling
from openke.data import TrainDataLoader, TestDataLoader

In [11]:
percent = "0.45"
train_dataloader = TrainDataLoader(
    in_path = None,
    tri_file = "./benchmarks/FB15K237/train2id.txt",
    ent_file = "./benchmarks/FB15K237/entity2id.txt",
    rel_file = "./benchmarks/FB15K237/relation2id.txt",
    nbatches = 100,
    threads = 8,
    sampling_mode = "normal",
    bern_flag = 1,
    filter_flag = 1,
    neg_ent = 25,
    neg_rel = 0)
print('----------------------------------------------------------------------')
retrain_dataloader = TrainDataLoader(
    in_path = None,
    tri_file = f'./benchmarks/FB15K237/remain_{percent}_unlearning.txt',
    ent_file = "./benchmarks/FB15K237/entity2id.txt",
    rel_file = "./benchmarks/FB15K237/relation2id.txt",
    nbatches = 100,
    threads = 8,
    sampling_mode = "normal",
    bern_flag = 1,
    filter_flag = 1,
    neg_ent = 25,
    neg_rel = 0)

Training Files Path : ./benchmarks/FB15K237/train2id.txt
Entity Files Path : ./benchmarks/FB15K237/entity2id.txt
Relation Files Path : ./benchmarks/FB15K237/relation2id.txt
The toolkit is importing datasets.
The total of relations is 237.
The total of entities is 14541.
----------------------------------------------------------------------
The total of train triples is 272115.
Training Files Path : ./benchmarks/FB15K237/remain_0.45_unlearning.txt
Entity Files Path : ./benchmarks/FB15K237/entity2id.txt
Relation Files Path : ./benchmarks/FB15K237/relation2id.txt
The toolkit is importing datasets.
The total of relations is 237.
The total of entities is 14541.


In [12]:
def calculate_gradients(model, data):
    model.eval()

    loss = model.model({
        'batch_h': torch.autograd.Variable(torch.from_numpy(data['batch_h']).cuda()),
        'batch_t': torch.autograd.Variable(torch.from_numpy(data['batch_t']).cuda()),
        'batch_r': torch.autograd.Variable(torch.from_numpy(data['batch_r']).cuda()),
        'batch_y': torch.autograd.Variable(torch.from_numpy(data['batch_y']).cuda()),
        'mode': data['mode']
    })
    total_memory = 0
    for i in range(torch.cuda.device_count()):
        total_memory += torch.cuda.max_memory_allocated(i) / (1024 ** 2)
    # print(f"Total LOSS memory usage across all GPUs: {total_memory} MB")
    loss_scalar = torch.mean(loss)
    params_to_update = [param for name, param in model.named_parameters() if name.endswith('.weight')]
    grads = torch.autograd.grad(loss_scalar, params_to_update, create_graph=True)
    del loss, loss_scalar
    torch.cuda.empty_cache()
    return grads


def calculate_zo_gradients(model, data, epsilon=1e-5):
    model.eval()
    grads = []
    params_to_update = [param for name, param in model.named_parameters() if
                        name.endswith('.weight') and param.requires_grad]
    for param in params_to_update:
        grad = torch.zeros_like(param.data).cuda()
        perturb = torch.randn(param.data.shape[1]).cuda() * epsilon
        perturb = perturb.unsqueeze(0).expand(param.data.shape)
        original_param = param.data.clone()

        param.data.add_(perturb)
        with torch.no_grad():
            loss_pos = model.model({
                'batch_h': torch.from_numpy(data['batch_h']).to('cuda'),
                'batch_t': torch.from_numpy(data['batch_t']).to('cuda'),
                'batch_r': torch.from_numpy(data['batch_r']).to('cuda'),
                'batch_y': torch.from_numpy(data['batch_y']).to('cuda'),
                'mode': data['mode']
            })
        loss_pos_scalar = torch.mean(loss_pos)

        param.data.copy_(original_param)
        param.data.sub_(perturb)
        with torch.no_grad():
            loss_neg = model.model({
                'batch_h': torch.from_numpy(data['batch_h']).to('cuda'),
                'batch_t': torch.from_numpy(data['batch_t']).to('cuda'),
                'batch_r': torch.from_numpy(data['batch_r']).to('cuda'),
                'batch_y': torch.from_numpy(data['batch_y']).to('cuda'),
                'mode': data['mode']
            })
        loss_neg_scalar = torch.mean(loss_neg)
    
        param.data.copy_(original_param)

        grad = (loss_pos_scalar - loss_neg_scalar) / (2 * epsilon) * perturb
        grads.append(grad.mean(dim=0))

        torch.cuda.empty_cache()
    return grads

def hvps(grad_all, model_params, h_estimate):
    element_product = 0
    for grad_elem, v_elem in zip(grad_all, h_estimate):
        element_product += torch.sum(grad_elem * v_elem)
    return_grads = torch.autograd.grad(element_product, model_params, create_graph=True)
    del element_product
    torch.cuda.empty_cache()
    return return_grads

def woodfisher_hvps(grad_all, gamma=1.0):
    hessian_product = tuple()
    vTv = sum(torch.sum(grad_elem * grad_elem) for grad_elem in grad_all)
    for grad_elem in grad_all:
        term1 = (1 / gamma) * grad_elem
        term2 = (gamma ** (-2) * grad_elem * vTv) / (1 + gamma ** (-1) * vTv)
        hessian_estimate = term1 + term2
        hessian_product += (hessian_estimate,)
    return hessian_product


def update_and_save_checkpoint(checkpoint_path, new_checkpoint_path, new_params):
    weights = torch.load(checkpoint_path)
    weights['ent_embeddings.weight'] = new_params[0]
    weights['rel_embeddings.weight'] = new_params[1]
    torch.save(weights, new_checkpoint_path)
    print(f"Updated checkpoint saved to {new_checkpoint_path}")

The total of train triples is 149664.


In [13]:
def GIF_unleanring(model, train_dataloader, test_dataloader, epsilon=None, iteration=100, damp=0.0, scale=50):
    start_time = time.time()

    for data in train_dataloader:
        # grad_full = calculate_gradients(model, data)
        grad_full = calculate_zo_gradients(model, data, epsilon=epsilon)
        break
    total_memory = 0
    for i in range(torch.cuda.device_count()):
        total_memory += torch.cuda.max_memory_allocated(i) / (1024 ** 2)
    print(f"Total GradsFull memory usage across all GPUs: {total_memory} MB")
    for data in test_dataloader:
        # grad_removed = calculate_gradients(model, data)
        grad_removed = calculate_zo_gradients(model, data, epsilon=epsilon)
        break
    total_memory = 0
    for i in range(torch.cuda.device_count()):
        total_memory += torch.cuda.max_memory_allocated(i) / (1024 ** 2)
    print(f"Total GradsRemoved memory usage across all GPUs: {total_memory} MB")
    grad1 = [g1 - g2 for g1, g2 in zip(grad_full, grad_removed)]
    grad2 = grad_removed
    res_tuple = (grad_full, grad1, grad2)

    v = tuple(grad1 - grad2 for grad1, grad2 in zip(res_tuple[1], res_tuple[2]))
    h_estimate = tuple(grad1 - grad2 for grad1, grad2 in zip(res_tuple[1], res_tuple[2]))
    model_params = [p for p in model.parameters() if p.requires_grad]
    
    for i in range(iteration):
        hv = woodfisher_hvps(res_tuple[0])
        # hv = hvps(res_tuple[0], model_params, h_estimate)
        with torch.no_grad():
            h_estimate = [v1 + (1 - damp) * h_estimate1 - hv1 / scale for v1, h_estimate1, hv1 in
                          zip(v, h_estimate, hv)]
    print(f"final h_estimate: {torch.cuda.max_memory_allocated() / (1024 ** 2)} MB")
    params_change = [h_est / scale for h_est in h_estimate]
    params_esti = [p1 + p2 for p1, p2 in zip(params_change, model_params)]
    total_memory = 0
    for i in range(torch.cuda.device_count()):
        total_memory += torch.cuda.max_memory_allocated(i) / (1024 ** 2)
    print(f"Total memory usage across all GPUs: {total_memory} MB")
    del grad_full, grad_removed, res_tuple, v, h_estimate, params_change
    torch.cuda.empty_cache()

    print(time.time() - start_time)

    return params_esti

In [14]:
epsilon=1e-5
iteration=1
damp=0.00
scale=50
results = []

embed_model = 'TransH'
if embed_model == 'RotatE':
    model = RotatE(
    	ent_tot = train_dataloader.get_ent_tot(),
    	rel_tot = train_dataloader.get_rel_tot(),
    	dim = 200,
    	margin = 6.0,
    	epsilon = 2.0)
    unlearn_model = RotatE(
    ent_tot = train_dataloader.get_ent_tot(),
    rel_tot = train_dataloader.get_rel_tot(),
    dim = 200,
    margin = 6.0,
    epsilon = 2.0)
elif embed_model == 'TransD':
    model = TransD(
    ent_tot = train_dataloader.get_ent_tot(),
    rel_tot = train_dataloader.get_rel_tot(),
    dim_e = 200,
    dim_r = 200,
    p_norm = 1,
    norm_flag = True)
    unlearn_model = TransD(
    ent_tot = train_dataloader.get_ent_tot(),
    rel_tot = train_dataloader.get_rel_tot(),
    dim_e = 200,
    dim_r = 200,
    p_norm = 1,
    norm_flag = True)
else:
    model = TransH(
    ent_tot = train_dataloader.get_ent_tot(),
    rel_tot = train_dataloader.get_rel_tot(),
    dim = 200,
    p_norm = 1,
    norm_flag = True)
    unlearn_model = TransH(
    ent_tot = train_dataloader.get_ent_tot(),
    rel_tot = train_dataloader.get_rel_tot(),
    dim = 200,
    p_norm = 1,
    norm_flag = True)
checkpoint_path=f"./checkpoint/FB15K237/FB15K237_{embed_model}.ckpt"
model = torch.nn.DataParallel(model)
model.to('cuda')
model.module.load_checkpoint(checkpoint_path)
model = NegativeSampling(
    model = model,
    loss = MarginLoss(margin = 5.0),
    batch_size = train_dataloader.get_batch_size()
)
print('epsilon:', epsilon)
print('iteration:', iteration)
print('damp:', damp)
print('scale:', scale)
new_checkpoint_path = f"./checkpoint/FB15K237/ZOWFGIF_{percent}_{embed_model}.ckpt"
# new_checkpoint_path = f"./checkpoint/FB15K237/Delete_Edge_{embed_model}_FB15K237.ckpt"
params_esti = GIF_unleanring(model, train_dataloader, retrain_dataloader, epsilon=epsilon, 
                             iteration=iteration, damp=damp, scale=scale)
update_and_save_checkpoint(checkpoint_path=checkpoint_path,
                           new_checkpoint_path=new_checkpoint_path,
                           new_params=params_esti)
test_dataloader = TestDataLoader("./benchmarks/FB15K237/", "link")

# unlearn_transe = torch.nn.DataParallel(unlearn_transe)
# test the model
unlearn_model.load_checkpoint(new_checkpoint_path)
unlearn_tester = Tester(model = unlearn_model, data_loader = test_dataloader, use_gpu = True)
mrr, mr, hit10, hit3, hit1 = unlearn_tester.run_link_prediction(type_constrain = False)

epsilon: 1e-05
iteration: 1
damp: 0.0
scale: 50
Total GradsFull memory usage across all GPUs: 574.7001953125 MB
Total GradsRemoved memory usage across all GPUs: 574.7001953125 MB
final h_estimate: 318.5029296875 MB
Total memory usage across all GPUs: 574.7001953125 MB
0.12455511093139648
Updated checkpoint saved to ./checkpoint/FB15K237/ZOWFGIF_0.45_TransH.ckpt
Input Files Path : ./benchmarks/FB15K237/
The total of test triples is 20466.
The total of valid triples is 17535.


100%|████████████████████████████████████████████████████████████████████████████████| 20466/20466 [00:26<00:00, 785.10it/s]

0.3873741626739502





no type constraint results:
metric:			 MRR 		 MR 		 hit@10 	 hit@3  	 hit@1 
l(raw):			 0.088742 	 573.665466 	 0.206342 	 0.085654 	 0.033812 
r(raw):			 0.255389 	 169.283493 	 0.440975 	 0.273331 	 0.166960 
averaged(raw):		 0.172066 	 371.474487 	 0.323659 	 0.179493 	 0.100386 

l(filter):		 0.115251 	 418.791168 	 0.261507 	 0.119955 	 0.047835 
r(filter):		 0.297220 	 153.827866 	 0.513241 	 0.325613 	 0.195544 
averaged(filter):	 0.206235 	 286.309509 	 0.387374 	 0.222784 	 0.121690 
0.387374


In [10]:
mrr, mr, hit10, hit3, hit1

(0.2101750373840332,
 280.8727722167969,
 0.39367732405662537,
 0.227279394865036,
 0.12432815134525299)

no type constraint results:
metric:			 MRR 		 MR 		 hit@10 	 hit@3  	 hit@1 
l(raw):			 0.088743 	 573.662903 	 0.206391 	 0.085654 	 0.033812 
r(raw):			 0.255391 	 169.284912 	 0.440975 	 0.273331 	 0.166960 
averaged(raw):		 0.172067 	 371.473907 	 0.323683 	 0.179493 	 0.100386 

l(filter):		 0.117429 	 408.936127 	 0.266149 	 0.121470 	 0.048862 
r(filter):		 0.302921 	 152.809433 	 0.521206 	 0.333089 	 0.199795 
averaged(filter):	 0.210175 	 280.872772 	 0.393677 	 0.227279 	 0.124328 
0.393677


In [13]:
!nvidia-smi

no type constraint results:
metric:			 MRR 		 MR 		 hit@10 	 hit@3  	 hit@1 
l(raw):			 0.088743 	 573.665771 	 0.206391 	 0.085654 	 0.033812 
r(raw):			 0.255388 	 169.283890 	 0.440975 	 0.273331 	 0.166960 
averaged(raw):		 0.172066 	 371.474823 	 0.323683 	 0.179493 	 0.100386 

l(filter):		 0.122004 	 398.693298 	 0.273625 	 0.127822 	 0.052624 
r(filter):		 0.308005 	 151.516373 	 0.530343 	 0.340858 	 0.202824 
averaged(filter):	 0.215004 	 275.104828 	 0.401984 	 0.234340 	 0.127724 
0.401984
Mon Jul 29 02:06:03 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|          