# Proof of concept

In [2]:
import torch
import torch as t
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import json
import requests
import numpy as np

import torch
from torchvision import transforms
from PIL import Image

## Model use example

In [3]:
# Load pretrained model
model = models.resnet18(pretrained=True)
model.eval()

# Function to prepare images for input
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load an image
image = Image.open("grey.jpeg")
input_tensor = transform(image).unsqueeze(0)


    
def predict_label(model, input_tensor):
    url = "https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json"
    labels = requests.get(url).json()
    
    with torch.no_grad():
        output = model(input_tensor)
    _, predicted = torch.max(output, 1)
    predicted_label = labels[predicted.item()]
    
    return predicted_label


predict_label(model, input_tensor)




'grey parrot'

## Image Attack Functions

### Proccessing image

In [4]:
def proccess_image(image_path: str) -> torch.Tensor:
    
    # Function to prepare images for input
    transform = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # Load an image
    image = Image.open(image_path)
    input_tensor = transform(image).unsqueeze(0)
    
    return input_tensor

# Test
image_path = "grey.jpeg"
output = proccess_image(image_path)
print(type(output))
print(output.shape)


<class 'torch.Tensor'>
torch.Size([1, 3, 224, 224])


### Calculation noise

In [5]:
# TODO, make labels an attribute in the class so they're not a floating global variable

url = "https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json"
labels = requests.get(url).json()

# return idx of target
def get_target_idx(attack_target: str) -> int:
    idx = labels.index(attack_target)
    out = torch.tensor(idx).reshape(1)
    return out 




In [6]:
x = t.tensor([[1,2],[1,2]])
t.randn(x.shape)

tensor([[ 1.3940, -1.0972],
        [-0.2371, -0.0168]])

In [18]:
# todo: make training config and give user easier control over it

def calculate_noise(image_data: torch.Tensor, attack_target: str) -> torch.Tensor:
    
    # intialize model
    model = models.resnet18(pretrained=True)
    model.eval()
    
    # get index of target label
    target_idx = get_target_idx(attack_target)
    
    # initialize noise
    noise = t.randn(image_data.shape)
    noise.requires_grad = True
    
    optimizer = t.optim.AdamW([noise], lr=0.01)
    
    l1_lambda = 0.1
    
    # train noise
    for step in range(1000):
        optimizer.zero_grad()
        logits = model(image_data + noise)
        
        l1_norm = noise.abs().sum()
        
        loss = F.cross_entropy(logits, target_idx) + l1_lambda * l1_norm
        loss.backward()
        optimizer.step()
        
        if step % 10 == 0:
            print(f'step: {step}, loss: {loss}')
            
    print(predict_label(model, image_data + noise))        
            
    return noise
    

### generate and save image

In [19]:
# Reverse the preprocessing transformations
def tensor_to_image(tensor: torch.Tensor) -> Image.Image:
    # Remove the batch dimension if it exists
    if tensor.ndimension() == 4:
        tensor = tensor.squeeze(0)
    
    # # Unnormalize the image
    # unnormalize = transforms.Normalize(
    #     mean=[-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.225],
    #     std=[1 / 0.229, 1 / 0.224, 1 / 0.225]
    # )
    # unnormalized_tensor = unnormalize(tensor)
    
    # Clip the values to be between 0 and 1
    unnormalized_tensor = torch.clamp(tensor, 0, 1)
    
    # Convert to PIL Image
    to_pil = transforms.ToPILImage()
    image = to_pil(unnormalized_tensor)
    
    return image

def generate_and_save_image(
    image_data: torch.Tensor, 
    attack_noise: torch.Tensor, 
    save_path: str):

    output_image = tensor_to_image(image_data + attack_noise)
    output_image.show()  # Display the image

In [20]:
def attack_image(
    image_path: str,
    save_path: str,
    attack_target: str, 
    attack_method: str
    ):
    
    # proccess image for resnet18
    image_data = proccess_image(image_path)    
    
    # calculate noise which minimizes loss with respect to target output
    attack_noise = calculate_noise(image_data, attack_target)
    
    # generate and save output image
    generate_and_save_image(image_data, 0, "hello")
    generate_and_save_image(image_data, attack_noise, save_path)
    
attack_image("grey.jpeg", "attacked_grey.jpeg", "macaw", "minimize_loss")



step: 0, loss: 11984.916015625
step: 10, loss: 10538.59765625
step: 20, loss: 9219.2880859375
step: 30, loss: 8015.32666015625
step: 40, loss: 6923.12646484375
step: 50, loss: 5940.6396484375
step: 60, loss: 5065.92236328125
step: 70, loss: 4290.65869140625
step: 80, loss: 3609.794921875
step: 90, loss: 3016.499267578125
step: 100, loss: 2504.61767578125
step: 110, loss: 2065.1484375
step: 120, loss: 1691.699462890625
step: 130, loss: 1377.3438720703125
step: 140, loss: 1113.34326171875
step: 150, loss: 894.9472045898438
step: 160, loss: 714.7329711914062
step: 170, loss: 567.2591552734375
step: 180, loss: 448.48828125
step: 190, loss: 352.8793640136719
step: 200, loss: 277.12384033203125
step: 210, loss: 217.32179260253906
step: 220, loss: 170.71145629882812
step: 230, loss: 135.25726318359375
step: 240, loss: 107.82251739501953
step: 250, loss: 86.62982940673828
step: 260, loss: 70.38602447509766
step: 270, loss: 58.58849334716797
step: 280, loss: 49.7969970703125
step: 290, loss: 43