In [None]:
import logging
import os
import sys
import traceback

from saicinpainting.evaluation.utils import move_to_device

os.environ['OMP_NUM_THREADS'] = '1'
os.environ['OPENBLAS_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'
os.environ['VECLIB_MAXIMUM_THREADS'] = '1'
os.environ['NUMEXPR_NUM_THREADS'] = '1'

import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import tqdm
import yaml
from omegaconf import OmegaConf
from torch.utils.data._utils.collate import default_collate

from saicinpainting.training.data.datasets import make_default_val_dataset
from saicinpainting.training.trainers import load_checkpoint
from saicinpainting.utils import register_debug_signal_handlers

import matplotlib.pyplot as plt
import cv2 as cv
import numpy as np

import clip


In [None]:
config_path='./configs/prediction/'
config_name='default.yaml'

In [None]:
predict_config = OmegaConf.load(config_path+config_name)
predict_config.model.path = 'LaMa_models/lama-celeba-hq/lama-fourier'

In [None]:
device = torch.device(predict_config.device) # cuda
train_config_path = os.path.join(predict_config.model.path, 'config.yaml')
with open(train_config_path, 'r') as f:
    train_config = OmegaConf.create(yaml.safe_load(f))
train_config.training_model.predict_only = True
train_config.visualizer.kind = 'noop'
out_ext = predict_config.get('out_ext', '.png')
checkpoint_path = os.path.join(predict_config.model.path, 
                               'models', 
                               predict_config.model.checkpoint)
model = load_checkpoint(train_config, checkpoint_path, strict=False, map_location='cpu')
model.freeze()
model.to(device)

In [None]:
clip_model, preprocess = clip.load("ViT-B/32", device="cuda")


---

In [None]:
out_dir = 'sample_out/'

image = cv.imread('/image.jpg')
image = cv.resize(image,(256,256))
cv.imwrite(out_dir + 'gt.png',image)

img = np.transpose(image[:,:,::-1],(2,0,1)) 
img = np.ascontiguousarray(img).astype(np.float32)
img_t = torch.tensor(img,requires_grad = True).unsqueeze(0)# 1,3,H,W
mask = torch.zeros_like(img_t[0,0],dtype=torch.float32)
shape1 = int(mask.shape[-1] * 0.15)
shape2 = int(mask.shape[-1] * 0.90)

shape3 = int(mask.shape[-1] * 0.15)
shape4 = int(mask.shape[-1] * 0.85)

mask[shape1:shape2,shape3:shape4] = 1

mask_t = torch.tensor(mask,requires_grad = True).unsqueeze(0).unsqueeze(0) # 1,1,H,W
image_masked = image.copy()
image_masked[shape1:shape2,shape3:shape4] *= 0
cv.imwrite(out_dir + 'masked.png',image_masked)



In [None]:
batch_adv={}
batch_adv['image'] = img_t.cuda()/255 
batch_adv['image'] += torch.rand_like(batch_adv['image']) / 1e7
batch_adv['original_image'] = img_t.cuda()/255 
batch_adv['image'].retain_grad()
batch_adv['mask'] = mask_t.cuda()
batch_adv['attack_noise'] = torch.zeros_like(batch_adv['image'])


eps = 
word = 
word_original =
step = 
dacay_rate = 

word_token = clip.tokenize([word, word_original]).cuda()
text_feature = clip_model.encode_text(word_token)
text_feature_delta = text_feature[:1] - text_feature[1:2]

img_feature_old = clip_model.encode_image(torch.nn.functional.interpolate(\
                                               model(batch_adv.copy())['predicted_image'].detach(),(224,224)))

batch_copy = batch_adv.copy()
img_inp0 = model(batch_copy)['predicted_image'].detach().cpu().numpy()
img_inp0 = np.transpose(img_inp0[0], (1,2,0))
img_inp0 = np.clip(img_inp0 * 255, 0, 255).astype('uint8')

cv.imwrite('/default.png',img_inp0[:,:,::-1])


for i in range(step):
    img_feature = clip_model.encode_image(torch.nn.functional.interpolate(\
                                               model(batch_adv)['predicted_image'],(224,224)))
        
    img_feature_delta = img_feature - img_feature_old
    

    loss = nn.CosineSimilarity()(img_feature_delta,text_feature_delta)
    print('loss: ',loss)

    loss.backward(retain_graph=True)
    optimal_perturbation = eps * torch.sign(batch_adv['image'].grad) # inf norm, attack noise
    eps *= dacay_rate
    optimal_perturbation_old = batch_adv['attack_noise']

    optimal_perturbation_new = torch.clip(optimal_perturbation_old + optimal_perturbation, -0.01, 0.01)

    batch_adv['image']-=optimal_perturbation_old
    batch_adv['attack_noise'] = optimal_perturbation_new


    batch_adv['image']+=optimal_perturbation_new
    batch_adv['image'].grad.zero_()

    adv_batch1 = batch_adv.copy()
    adv_batch1['image'] = (batch_adv['image'] + optimal_perturbation)

    batch_adv1 = model(adv_batch1)
    img_inp_adv1 = (batch_adv1['predicted_image'] * batch_adv1['mask'] +\
                    batch_adv['original_image'] * (1 - batch_adv1['mask'])).detach().cpu().numpy()
    img_inp_adv1 = np.transpose(img_inp_adv1[0], (1,2,0))
    img_inp_adv1 = np.clip(img_inp_adv1 * 255, 0, 255).astype('uint8')
    cv.imwrite('sample_out/'+word+str(i)+'.png',img_inp_adv1[:,:,::-1])

