In [2]:
import torch
import clip
from PIL import Image
import cv2
import numpy as np

# CLIP 모델 및 텍스트 설명 로드
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

# 블러링을 유도할 텍스트 설명
texts = ["eyes", "nose", "mouth"]
text_tokens = clip.tokenize(texts).to(device)

def generate_mild_noise(image, clip_model, text_tokens, epsilon=0.001, steps=100):
    # 이미지 전처리
    image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    image_tensor = preprocess(image_pil).unsqueeze(0).to(device)
    noise = torch.zeros_like(image_tensor, requires_grad=True).to(device)

    optimizer = torch.optim.Adam([noise], lr=epsilon)

    for step in range(steps):
        perturbed_image = image_tensor + noise

        # CLIP 모델로 이미지 임베딩 계산
        perturbed_image_normalized = (perturbed_image - perturbed_image.min()) / (perturbed_image.max() - perturbed_image.min())
        clip_image_embeds = clip_model.encode_image(perturbed_image_normalized)

        # CLIP 텍스트 임베딩 계산 및 유사도 측정
        similarity = torch.mean(clip_model.encode_text(text_tokens) @ clip_image_embeds.T)

        # 손실 함수: 텍스트 설명과 이미지 유사도를 최소화
        loss = -similarity

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 노이즈 클리핑 (아주 미세한 값으로 유지)
        noise.data = torch.clamp(noise.data, -epsilon, epsilon)

    return (image_tensor + noise).squeeze().detach().cpu().numpy()

# 예시 사용
image_path = 'yeongmin.jpeg'
image = cv2.imread(image_path)

# 미세한 노이즈를 통한 블러링 유도
noisy_image = generate_mild_noise(image, model, text_tokens)

# 결과 이미지 저장
noisy_image = np.clip(noisy_image, 0, 255).astype(np.uint8)
noisy_image = np.transpose(noisy_image, (1, 2, 0))
cv2.imwrite('noisy_face_with_clip_effect.jpg', cv2.cvtColor(noisy_image, cv2.COLOR_RGB2BGR))

True

In [5]:
import cv2
import numpy as np

def adjust_image(image):
    # Check if the image is loaded correctly
    if image is None:
        print("Error: Image not loaded properly.")
        return None
    
    # Normalize image to 0-255 range
    norm_image = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX)
    return norm_image.astype(np.uint8)

# Try to load the original noisy image
image_path = 'noisy_face_with_clip_effect.jpg'
image = cv2.imread(image_path)

# If the image wasn't loaded, print an error message
if image is None:
    print("The image could not be loaded. Please check the file path or use a different image.")
else:
    # Adjust the image to make sure it's in a visible range
    fixed_image = adjust_image(image)

    # Save the adjusted image if it's successfully loaded
    if fixed_image is not None:
        output_path = 'fixed_noisy_face_with_clip_effect.jpg'
        cv2.imwrite(output_path, fixed_image)
        print(f"Adjusted image saved at: {output_path}")


Adjusted image saved at: fixed_noisy_face_with_clip_effect.jpg


In [6]:
import cv2
import numpy as np
from PIL import Image

# Load the original image (you can replace this with the actual original image path)
original_image_path = 'yeongmin.jpeg'
original_image = cv2.imread(original_image_path)

# Load the noisy image (the one you uploaded)
noisy_image_path = 'fixed_noisy_face_with_clip_effect.jpg'
noisy_image = cv2.imread(noisy_image_path)

# Check if both images are loaded correctly
if original_image is None or noisy_image is None:
    print("Error loading images. Please check the file paths.")
else:
    # Resize noisy image to match the original image size if they differ
    if original_image.shape != noisy_image.shape:
        noisy_image = cv2.resize(noisy_image, (original_image.shape[1], original_image.shape[0]))

    # Combine the images with a weighted sum (alpha controls the noise intensity)
    alpha = 0.5  # You can adjust this value to control the strength of the noise
    combined_image = cv2.addWeighted(original_image, 1.0, noisy_image, alpha, 0)

    # Save the combined image
    output_path = 'combined_image_with_noise.jpg'
    cv2.imwrite(output_path, combined_image)

    # Display the resulting image
    combined_pil_image = Image.fromarray(cv2.cvtColor(combined_image, cv2.COLOR_BGR2RGB))
    combined_pil_image.show()

    print(f"Combined image saved at: {output_path}")

Combined image saved at: combined_image_with_noise.jpg


In [25]:
import cv2
import numpy as np
from PIL import Image
import torch
import clip

# CLIP 모델 및 텍스트 설명 로드
device = "cuda" if torch.cuda.is_available() else "cpu"
clip_model, preprocess = clip.load("ViT-B/32", device=device)

# 블러링을 유도할 텍스트 설명
texts = ["blurred eyes", "blurred nose", "blurred mouth"]
text_tokens = clip.tokenize(texts).to(device)

def generate_mild_noise(image, clip_model, text_tokens, epsilon=0.01, steps=1000):
    # 이미지 전처리
    image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    image_tensor = preprocess(image_pil).unsqueeze(0).to(device)
    noise = torch.zeros_like(image_tensor, requires_grad=True).to(device)

    optimizer = torch.optim.Adam([noise], lr=epsilon)

    for step in range(steps):
        perturbed_image = image_tensor + noise

        # CLIP 모델로 이미지 임베딩 계산
        perturbed_image_normalized = (perturbed_image - perturbed_image.min()) / (perturbed_image.max() - perturbed_image.min())
        clip_image_embeds = clip_model.encode_image(perturbed_image_normalized)

        # CLIP 텍스트 임베딩 계산 및 유사도 측정
        similarity = torch.mean(clip_model.encode_text(text_tokens) @ clip_image_embeds.T)

        # 손실 함수: 텍스트 설명과 이미지 유사도를 최소화
        loss = -similarity

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 노이즈 클리핑 (아주 미세한 값으로 유지)
        noise.data = torch.clamp(noise.data, -epsilon, epsilon)

    # 이미지를 0-255 범위로 다시 조정
    final_image = torch.clamp((image_tensor + noise) * 255, 0, 255).detach().cpu().numpy().squeeze()
    final_image = final_image.transpose(1, 2, 0).astype(np.uint8)
    
    return final_image

def combine_images(original_image_path, noisy_image_path, output_path, alpha=0.3):
    # Load the original and noisy images
    original_image = cv2.imread(original_image_path)
    noisy_image = cv2.imread(noisy_image_path)

    # Ensure both images are loaded correctly
    if original_image is None or noisy_image is None:
        print("Error loading images. Please check the file paths.")
        return

    # Resize the noisy image to match the original image size if they differ
    noisy_image = cv2.resize(noisy_image, (original_image.shape[1], original_image.shape[0]))

    # Combine the images with a weighted sum
    combined_image = cv2.addWeighted(original_image, 1.0, noisy_image, alpha, 0)

    # Normalize the combined image to ensure visibility
    combined_image = cv2.normalize(combined_image, None, 0, 255, cv2.NORM_MINMAX)

    # Save the combined image
    cv2.imwrite(output_path, combined_image)

    # Display the resulting image
    combined_pil_image = Image.fromarray(cv2.cvtColor(combined_image, cv2.COLOR_BGR2RGB))
    combined_pil_image.show()

    print(f"Combined image saved at: {output_path}")

# Step 1: Generate and save the noisy image
image_path = 'yeongmin.jpeg'  # Path to the original image
image = cv2.imread(image_path)

# Generate the noisy image using CLIP-based noise generation
noisy_image = generate_mild_noise(image, clip_model, text_tokens)

# Save the noisy image
noisy_image_path = 'fixed_noisy_face_with_clip_effect.jpg'
cv2.imwrite(noisy_image_path, noisy_image)

# Step 2: Combine the original and noisy images
output_path = 'combined_image_with_noise_fixed.jpg'
combine_images(image_path, noisy_image_path, output_path, alpha=0.1)

Combined image saved at: combined_image_with_noise_fixed.jpg


In [27]:
def compute_image_difference(original_image_path, noisy_image_path, output_path):
    # Load the original and noisy images
    original_image = cv2.imread(original_image_path)
    noisy_image = cv2.imread(noisy_image_path)

    # Ensure both images are the same size
    if original_image.shape != noisy_image.shape:
        noisy_image = cv2.resize(noisy_image, (original_image.shape[1], original_image.shape[0]))

    # Compute the absolute difference between the images
    difference_image = cv2.absdiff(original_image, noisy_image)

    # Enhance the difference image for better visualization
    # Option 1: Scale up the differences for better visibility
    difference_image = cv2.normalize(difference_image, None, 0, 255, cv2.NORM_MINMAX)

    # Option 2: Apply a contrast enhancement (uncomment if needed)
    # difference_image = cv2.convertScaleAbs(difference_image, alpha=3, beta=0)

    # Save the difference image
    cv2.imwrite(output_path, difference_image)

    return difference_image

# Usage Example
original_image_path = 'yeongmin.jpeg'  # Path to the original image
noisy_image_path = 'combined_image_with_noise_fixed.jpg'  # Path to the noisy image
difference_image_path = 'difference_image_enhanced.jpg'  # Path to save the enhanced difference image

difference_image = compute_image_difference(original_image_path, noisy_image_path, difference_image_path)

In [26]:
import torch
import clip
from PIL import Image
import cv2
import numpy as np

# Load CLIP model and preprocess function
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device)

# Define the text descriptions you want to classify with
texts = ["a photo of a person", "blurred eyes", "blurred nose", "blurred mouth"]
text_tokens = clip.tokenize(texts).to(device)

def compute_image_difference(original_image_path, noisy_image_path, output_path):
    # Load the original and noisy images
    original_image = cv2.imread(original_image_path)
    noisy_image = cv2.imread(noisy_image_path)

    # Compute the absolute difference between the images
    difference_image = cv2.absdiff(original_image, noisy_image)

    # Save the difference image
    cv2.imwrite(output_path, difference_image)

    # Return the difference image for further analysis
    return difference_image

def classify_image(image, model, text_tokens):
    # Preprocess the image
    image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    image_tensor = preprocess(image_pil).unsqueeze(0).to(device)

    # Encode the image and texts using CLIP
    with torch.no_grad():
        image_features = model.encode_image(image_tensor)
        text_features = model.encode_text(text_tokens)

    # Calculate the similarity between the image and each text prompt
    similarities = (image_features @ text_features.T).squeeze(0)
    
    # Normalize similarities to get probabilities (optional)
    probs = similarities.softmax(dim=-1).cpu().numpy()

    return probs

def print_classification_results(probs, descriptions):
    # Print each text description with its corresponding probability
    for i, prob in enumerate(probs):
        print(f"{descriptions[i]}: {prob * 100:.2f}%")

# Step 1: Compute the difference image
original_image_path = 'yeongmin.jpeg'  # Path to the original image
noisy_image_path = 'combined_image_with_noise_fixed.jpg'  # Path to the combined noisy image
difference_image_path = 'difference_image.jpg'  # Path to save the difference image

difference_image = compute_image_difference(original_image_path, noisy_image_path, difference_image_path)

# Step 2: Classify the original, noisy, and difference images
original_image = cv2.imread(original_image_path)
noisy_image = cv2.imread(noisy_image_path)

# Classify the original image
original_probs = classify_image(original_image, model, text_tokens)
print("Original Image Classification Probabilities:")
print_classification_results(original_probs, texts)

# Classify the noisy image
noisy_probs = classify_image(noisy_image, model, text_tokens)
print("Noisy Image Classification Probabilities:")
print_classification_results(noisy_probs, texts)

# Optional: Classify the difference image to see how the noise is perceived
difference_probs = classify_image(difference_image, model, text_tokens)
print("Difference Image Classification Probabilities:")
print_classification_results(difference_probs, texts)

Original Image Classification Probabilities:
a photo of a person: 99.80%
blurred eyes: 0.09%
blurred nose: 0.08%
blurred mouth: 0.02%
Noisy Image Classification Probabilities:
a photo of a person: 99.17%
blurred eyes: 0.45%
blurred nose: 0.28%
blurred mouth: 0.10%
Difference Image Classification Probabilities:
a photo of a person: 99.27%
blurred eyes: 0.47%
blurred nose: 0.19%
blurred mouth: 0.06%


In [31]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import cv2
from PIL import Image
import clip

# Load CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

def generate_adversarial_noise(image_tensor, model, target_text_token, epsilon=0.03, steps=10):
    image_tensor = image_tensor.clone().detach().requires_grad_(True).to(device)
    target_text_token = target_text_token.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam([image_tensor], lr=epsilon)

    for step in range(steps):
        optimizer.zero_grad()

        # Normalize the image tensor
        normalized_image = (image_tensor - image_tensor.min()) / (image_tensor.max() - image_tensor.min())
        
        # Forward pass
        image_features = model.encode_image(normalized_image)
        text_features = model.encode_text(target_text_token)
        
        # Compute loss (maximize similarity)
        similarity = (image_features @ text_features.T).squeeze(0)
        loss = -similarity.mean()

        # Backward pass
        loss.backward()
        optimizer.step()

        # Clamp the perturbation to the epsilon level
        perturbation = torch.clamp(image_tensor - image_tensor.detach(), -epsilon, epsilon)
        image_tensor = torch.clamp(image_tensor + perturbation, 0, 1).detach().requires_grad_(True)

    return image_tensor

def visualize_perturbation(original_image, adversarial_image):
    perturbation = adversarial_image - original_image
    perturbation = perturbation.cpu().detach().numpy().transpose(1, 2, 0)
    perturbation = (perturbation - perturbation.min()) / (perturbation.max() - perturbation.min())
    
    adversarial_image = adversarial_image.cpu().detach().numpy().transpose(1, 2, 0)
    original_image = original_image.cpu().detach().numpy().transpose(1, 2, 0)
    
    perturbation = (perturbation * 255).astype(np.uint8)
    adversarial_image = (adversarial_image * 255).astype(np.uint8)
    original_image = (original_image * 255).astype(np.uint8)
    
    return original_image, perturbation, adversarial_image

# Load and preprocess image
image_path = 'yeongmin.jpeg'
image_pil = Image.open(image_path).convert('RGB')
image_tensor = preprocess(image_pil).unsqueeze(0).to(device)

# Target text token (to maximize similarity with adversarial noise)
target_text = "blurred image"
target_text_token = clip.tokenize([target_text]).to(device)

# Generate adversarial noise
adversarial_image_tensor = generate_adversarial_noise(image_tensor, model, target_text_token, epsilon=0.03, steps=10)

# Visualize the results
original_image, perturbation_image, adversarial_image = visualize_perturbation(image_tensor.squeeze(0), adversarial_image_tensor.squeeze(0))

# Save and display images
cv2.imwrite('noised.jpg', original_image)
cv2.imwrite('perturbation_image.jpg', perturbation_image)
cv2.imwrite('adversarial_image.jpg', adversarial_image)

# Display images
Image.fromarray(original_image).show()
Image.fromarray(perturbation_image).show()
Image.fromarray(adversarial_image).show()

In [33]:
import torch
import clip
from PIL import Image
import cv2
import numpy as np

# Load CLIP model and preprocess function
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device)

# Define the text descriptions you want to classify with
texts = ["a photo of a person", "blurred eyes", "blurred nose", "blurred mouth"]
text_tokens = clip.tokenize(texts).to(device)

def compute_image_difference(original_image_path, noisy_image_path, output_path):
    # Load the original and noisy images
    original_image = cv2.imread(original_image_path)
    noisy_image = cv2.imread(noisy_image_path)

    # Compute the absolute difference between the images
    difference_image = cv2.absdiff(original_image, noisy_image)

    # Save the difference image
    cv2.imwrite(output_path, difference_image)

    # Return the difference image for further analysis
    return difference_image

def classify_image(image, model, text_tokens):
    # Preprocess the image
    image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    image_tensor = preprocess(image_pil).unsqueeze(0).to(device)

    # Encode the image and texts using CLIP
    with torch.no_grad():
        image_features = model.encode_image(image_tensor)
        text_features = model.encode_text(text_tokens)

    # Calculate the similarity between the image and each text prompt
    similarities = (image_features @ text_features.T).squeeze(0)
    
    # Normalize similarities to get probabilities (optional)
    probs = similarities.softmax(dim=-1).cpu().numpy()

    return probs

def print_classification_results(probs, descriptions):
    # Print each text description with its corresponding probability
    for i, prob in enumerate(probs):
        print(f"{descriptions[i]}: {prob * 100:.2f}%")

# Step 1: Compute the difference image
original_image_path = 'noised.jpg'  # Path to the original image
noisy_image_path = 'perturbation_image.jpg'  # Path to the combined noisy image
difference_image_path = 'difference_image.jpg'  # Path to save the difference image

difference_image = compute_image_difference(original_image_path, noisy_image_path, difference_image_path)

# Step 2: Classify the original, noisy, and difference images
original_image = cv2.imread(original_image_path)
noisy_image = cv2.imread(noisy_image_path)

# Classify the original image
original_probs = classify_image(original_image, model, text_tokens)
print("Original Image Classification Probabilities:")
print_classification_results(original_probs, texts)

# Classify the noisy image
noisy_probs = classify_image(noisy_image, model, text_tokens)
print("Noisy Image Classification Probabilities:")
print_classification_results(noisy_probs, texts)

# Optional: Classify the difference image to see how the noise is perceived
difference_probs = classify_image(difference_image, model, text_tokens)
print("Difference Image Classification Probabilities:")
print_classification_results(difference_probs, texts)

Original Image Classification Probabilities:
a photo of a person: 99.51%
blurred eyes: 0.41%
blurred nose: 0.03%
blurred mouth: 0.05%
Noisy Image Classification Probabilities:
a photo of a person: 99.37%
blurred eyes: 0.54%
blurred nose: 0.03%
blurred mouth: 0.05%
Difference Image Classification Probabilities:
a photo of a person: 99.51%
blurred eyes: 0.40%
blurred nose: 0.01%
blurred mouth: 0.08%
