In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from torchvision.models import alexnet
from PIL import Image
import os
import random
import pandas as pd 
import matplotlib.pyplot as plt


def load_model(model_path, num_classes=43, device="cpu"):
    model = alexnet(weights=None, num_classes=43).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()  # Set to evaluation mode
    return model

def process_image(image_path):
    # Load the image
    image = Image.open(image_path)

    # Define transformations
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
    ])

    # Apply transformations
    image_tensor = transform(image).unsqueeze(0)
    return image_tensor, image

def predict(image_path, model, device="cpu"):
    # No need to reload the model every time you predict. Use the passed model.
    # model = load_model(model_path, device=device)

    image_tensor, image = process_image(image_path)  # Assume process_image returns (tensor, image)
    image_tensor = image_tensor.to(device)

    with torch.no_grad():
        outputs = model(image_tensor)
        _, predicted_class_index = torch.max(outputs, 1)

    return predicted_class_index.item(), image

def predict_on_directory(directory_path, model_path, misclss, num_images_to_test=0, device="cpu", backdoored=False, model_name="ran_sqr_sin_01"):
    model = load_model(model_path, device=device)
    
    # Assuming you have class names to interpret the outputs (you need to define this list)
    class_names = pd.read_csv('/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/labels.csv')['Name'].tolist()
    tests = pd.read_csv('/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/Test.csv')
    # List all files in the directory
    filenames = os.listdir(directory_path)
    random.shuffle(filenames)
    #count number of correct prediction
    correct_count=0
    #initialize the ground truth class id
    #for now, I am checking if the model can predict the backdoored stop sign which belong to class 14
    # Limit the number of files processed
    if num_images_to_test:
        filenames = filenames[:num_images_to_test]
    else:
        num_images_to_test = 427

    for filename in filenames:
        image_path = os.path.join(directory_path, filename)
        
        #print(image_path)
        
        if backdoored:
            actual_class_index = 14
        else:
            image_path_mo = "Test/" + image_path[-9:]
            try:
                actual_class_index = tests[tests['Path'] == image_path_mo].iloc[0]['ClassId']
            except:
                pass
        
        
        
        # Process only .png images (according to your error traceback)
        if image_path.lower().endswith('.png'):
            predicted_class_index, image = predict(image_path, model, device)

            predicted_class_name = class_names[predicted_class_index]
            
            if predicted_class_index == actual_class_index:
                correct_count += 1
            elif backdoored:
                lst = misclss.get(model_name,[])
                lst.append(image_path)
                misclss[model_name] = lst
                
    if backdoored:
        rsl = "backdoored"
    else:
        rsl = "clean"
    print(f'Accuracy on {rsl} images is: {correct_count / num_images_to_test * 100:.2f}%')

In [58]:
# Run predictions on 10 random images from the 'test' directory
models = ['ran_sqr_sin_01','ran_sqr_mul_01','ran_sqr_sin_001','ran_sqr_mul_001', 'fixed_sqr_sin_01', 'fixed_sqr_mul_01','fixed_sqr_sin_001','fixed_sqr_mul_001',\
         'ran_cir_sin_01','ran_cir_mul_01','ran_cir_sin_001','ran_cir_mul_001', 'fixed_cir_sin_01', 'fixed_cir_mul_01','fixed_cir_sin_001','fixed_cir_mul_001',\
         'ran_tri_sin_01','ran_tri_mul_01','ran_tri_sin_001','ran_tri_mul_001', 'fixed_tri_sin_01', 'fixed_tri_mul_01','fixed_tri_sin_001','fixed_tri_mul_001']

misclss = {}

for i in range(24):
    print(f"\ntest of {models[i]}:")
    test_directory = f'D:/IchMorningstar/Research/MLLsecurity/gtsrb-german-traffic-sign/Backdoored_test/original/{i+1}/'
    test_directory_t = 'D:/IchMorningstar/Research/MLLsecurity/gtsrb-german-traffic-sign/Test'
    model_path = f"models/50-epoch_model/alexnet_case_{i+1}.pt"
    #testing trained model on backdoored data
    predict_on_directory(test_directory, model_path, misclss, device="cuda", backdoored=True, model_name=i)
    #testing the trained model on clean data
    predict_on_directory(test_directory_t, model_path, misclss, num_images_to_test=1000, device="cuda",model_name=i)


test of ran_sqr_sin_01:
Accuracy on backdoored images is: 26.70%
Accuracy on clean images is: 86.80%

test of ran_sqr_mul_01:
Accuracy on backdoored images is: 82.90%
Accuracy on clean images is: 88.40%

test of ran_sqr_sin_001:
Accuracy on backdoored images is: 0.94%
Accuracy on clean images is: 86.00%

test of ran_sqr_mul_001:
Accuracy on backdoored images is: 0.00%
Accuracy on clean images is: 86.20%

test of fixed_sqr_sin_01:
Accuracy on backdoored images is: 99.06%
Accuracy on clean images is: 89.20%

test of fixed_sqr_mul_01:
Accuracy on backdoored images is: 100.00%
Accuracy on clean images is: 90.10%

test of fixed_sqr_sin_001:
Accuracy on backdoored images is: 97.19%
Accuracy on clean images is: 85.30%

test of fixed_sqr_mul_001:
Accuracy on backdoored images is: 0.23%
Accuracy on clean images is: 86.30%

test of ran_cir_sin_01:
Accuracy on backdoored images is: 34.89%
Accuracy on clean images is: 90.10%

test of ran_cir_mul_01:
Accuracy on backdoored images is: 78.45%
Accura

In [6]:
# Run predictions on 10 random images from the 'test' directory
models = ['ran_sqr_sin_01','ran_sqr_mul_01','ran_sqr_sin_001','ran_sqr_mul_001', 'fixed_sqr_sin_01', 'fixed_sqr_mul_01','fixed_sqr_sin_001','fixed_sqr_mul_001',\
         'ran_cir_sin_01','ran_cir_mul_01','ran_cir_sin_001','ran_cir_mul_001', 'fixed_cir_sin_01', 'fixed_cir_mul_01','fixed_cir_sin_001','fixed_cir_mul_001',\
         'ran_tri_sin_01','ran_tri_mul_01','ran_tri_sin_001','ran_tri_mul_001', 'fixed_tri_sin_01', 'fixed_tri_mul_01','fixed_tri_sin_001','fixed_tri_mul_001']

misclss = {}

for i in range(24):
    print(f"\ntest of {models[i]}:")
    test_directory = f'/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/backdoored-test/original/{i+1}/'
    test_directory_t = '/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/Test'
    model_path = f"models/50-epoch_model/alexnet_case_{i+1}.pt"
    #testing trained model on backdoored data
    predict_on_directory(test_directory, model_path, misclss, device="cuda", backdoored=True, model_name=i)
    #testing the trained model on clean data
    predict_on_directory(test_directory_t, model_path, misclss, num_images_to_test=1000, device="cuda",model_name=i)


test of ran_sqr_sin_01:


Accuracy on backdoored images is: 95.08%
Accuracy on clean images is: 96.50%

test of ran_sqr_mul_01:
Accuracy on backdoored images is: 99.30%
Accuracy on clean images is: 96.10%

test of ran_sqr_sin_001:
Accuracy on backdoored images is: 94.15%
Accuracy on clean images is: 96.30%

test of ran_sqr_mul_001:
Accuracy on backdoored images is: 97.42%
Accuracy on clean images is: 96.50%

test of fixed_sqr_sin_01:
Accuracy on backdoored images is: 99.30%
Accuracy on clean images is: 97.40%

test of fixed_sqr_mul_01:
Accuracy on backdoored images is: 99.06%
Accuracy on clean images is: 95.60%

test of fixed_sqr_sin_001:
Accuracy on backdoored images is: 99.30%
Accuracy on clean images is: 96.70%

test of fixed_sqr_mul_001:
Accuracy on backdoored images is: 99.30%
Accuracy on clean images is: 98.10%

test of ran_cir_sin_01:
Accuracy on backdoored images is: 99.30%
Accuracy on clean images is: 98.10%

test of ran_cir_mul_01:
Accuracy on backdoored images is: 99.06%
Accuracy on clean images is: 

In [7]:
# Run predictions on 10 random images from the 'test' directory
models = ['ran_sqr_sin_01','ran_sqr_mul_01','ran_sqr_sin_001','ran_sqr_mul_001', 'fixed_sqr_sin_01', 'fixed_sqr_mul_01','fixed_sqr_sin_001','fixed_sqr_mul_001',\
         'ran_cir_sin_01','ran_cir_mul_01','ran_cir_sin_001','ran_cir_mul_001', 'fixed_cir_sin_01', 'fixed_cir_mul_01','fixed_cir_sin_001','fixed_cir_mul_001',\
         'ran_tri_sin_01','ran_tri_mul_01','ran_tri_sin_001','ran_tri_mul_001', 'fixed_tri_sin_01', 'fixed_tri_mul_01','fixed_tri_sin_001','fixed_tri_mul_001']

misclss = {}

for i in range(24):
    print(f"\ntest of {models[i]}:")
    test_directory = f'/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/backdoored-test/original/{i+1}/'
    test_directory_t = '/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/Test'
    model_path = f"models/50-epoch_models/alexnet_case_{i+1}.pt"
    #testing trained model on backdoored data
    predict_on_directory(test_directory, model_path, misclss, device="cuda", backdoored=True, model_name=i)
    #testing the trained model on clean data
    predict_on_directory(test_directory_t, model_path, misclss, num_images_to_test=1000, device="cuda",model_name=i)


test of ran_sqr_sin_01:
Accuracy on backdoored images is: 99.30%
Accuracy on clean images is: 95.90%

test of ran_sqr_mul_01:
Accuracy on backdoored images is: 98.83%
Accuracy on clean images is: 96.70%

test of ran_sqr_sin_001:
Accuracy on backdoored images is: 97.66%
Accuracy on clean images is: 97.40%

test of ran_sqr_mul_001:
Accuracy on backdoored images is: 97.19%
Accuracy on clean images is: 96.40%

test of fixed_sqr_sin_01:
Accuracy on backdoored images is: 99.30%
Accuracy on clean images is: 96.90%

test of fixed_sqr_mul_01:
Accuracy on backdoored images is: 99.30%
Accuracy on clean images is: 97.80%

test of fixed_sqr_sin_001:
Accuracy on backdoored images is: 99.30%
Accuracy on clean images is: 97.40%

test of fixed_sqr_mul_001:
Accuracy on backdoored images is: 99.06%
Accuracy on clean images is: 95.60%

test of ran_cir_sin_01:
Accuracy on backdoored images is: 99.06%
Accuracy on clean images is: 96.90%

test of ran_cir_mul_01:
Accuracy on backdoored images is: 98.59%
Accu

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from torchvision.models import alexnet
from PIL import Image
import os
import random
import pandas as pd 
import matplotlib.pyplot as plt


def load_model(model_path, num_classes=43, device="cpu"):
    model = alexnet(weights=None, num_classes=43).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()  # Set to evaluation mode
    return model

def process_image(image_path):
    # Load the image
    image = Image.open(image_path)

    # Define transformations
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # Apply transformations
    image_tensor = transform(image).unsqueeze(0)
    return image_tensor, image

def predict(image_path, model, device="cpu"):
    # No need to reload the model every time you predict. Use the passed model.
    # model = load_model(model_path, device=device)

    image_tensor, image = process_image(image_path)  # Assume process_image returns (tensor, image)
    image_tensor = image_tensor.to(device)

    with torch.no_grad():
        outputs = model(image_tensor)
        _, predicted_class_index = torch.max(outputs, 1)

    return predicted_class_index.item(), image

def predict_on_directory(directory_path, model_path, misclss, num_images_to_test=0, device="cpu", backdoored=False, model_name="ran_sqr_sin_01"):
    model = load_model(model_path, device=device)
    
    # Assuming you have class names to interpret the outputs (you need to define this list)
    class_names = pd.read_csv('/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/labels.csv')['Name'].tolist()
    tests = pd.read_csv('/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/Test.csv')
    # List all files in the directory
    filenames = os.listdir(directory_path)
    random.shuffle(filenames)
    #count number of correct prediction
    correct_count=0
    #initialize the ground truth class id
    #for now, I am checking if the model can predict the backdoored stop sign which belong to class 14
    # Limit the number of files processed
    if num_images_to_test:
        filenames = filenames[:num_images_to_test]
    else:
        num_images_to_test = 780

    for filename in filenames:
        image_path = os.path.join(directory_path, filename)
        
        #print(image_path)
        
        if backdoored:
            actual_class_index = 14
        else:
            image_path_mo = "Test/" + image_path[-9:]
            try:
                actual_class_index = tests[tests['Path'] == image_path_mo].iloc[0]['ClassId']
            except:
                pass
        
        
        
        # Process only .png images (according to your error traceback)
        if image_path.lower().endswith('.png'):
            predicted_class_index, image = predict(image_path, model, device)

            predicted_class_name = class_names[predicted_class_index]
            
            if predicted_class_index == actual_class_index:
                correct_count += 1
            elif backdoored:
                lst = misclss.get(model_name,[])
                lst.append(image_path)
                misclss[model_name] = lst
                
    if backdoored:
        rsl = "backdoored"
    else:
        rsl = "clean"
    print(f'Accuracy on {rsl} images is: {correct_count / num_images_to_test * 100:.2f}%')

In [7]:
# Run predictions on 10 random images from the 'test' directory
models = ['ran_sqr_sin_01','ran_sqr_mul_01','ran_sqr_sin_001','ran_sqr_mul_001', 'fixed_sqr_sin_01', 'fixed_sqr_mul_01','fixed_sqr_sin_001','fixed_sqr_mul_001',\
         'ran_cir_sin_01','ran_cir_mul_01','ran_cir_sin_001','ran_cir_mul_001', 'fixed_cir_sin_01', 'fixed_cir_mul_01','fixed_cir_sin_001','fixed_cir_mul_001',\
         'ran_tri_sin_01','ran_tri_mul_01','ran_tri_sin_001','ran_tri_mul_001', 'fixed_tri_sin_01', 'fixed_tri_mul_01','fixed_tri_sin_001','fixed_tri_mul_001']

misclss = {}

for i in [1,2,5,6]:
    print(f"\ntest of {models[i-1]}:")
    test_directory = f'/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/backdoored-test/780/{i}/'
    test_directory_t = '/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/Test'
    model_path = f"models/780/alexnet_case_{i}.pt"
    #testing trained model on backdoored data
    predict_on_directory(test_directory, model_path, misclss, device="cuda", backdoored=True, model_name=i)
    #testing the trained model on clean data
    predict_on_directory(test_directory_t, model_path, misclss, num_images_to_test=1000, device="cuda",model_name=i)


test of ran_sqr_sin_01:


Accuracy on backdoored images is: 96.92%
Accuracy on clean images is: 97.40%

test of ran_sqr_mul_01:
Accuracy on backdoored images is: 96.92%
Accuracy on clean images is: 97.50%

test of fixed_sqr_sin_01:
Accuracy on backdoored images is: 96.92%
Accuracy on clean images is: 96.90%

test of fixed_sqr_mul_01:
Accuracy on backdoored images is: 96.92%
Accuracy on clean images is: 96.20%


In [5]:
# Run predictions on 10 random images from the 'test' directory
models = ['ran_sqr_sin_01','ran_sqr_mul_01','ran_sqr_sin_001','ran_sqr_mul_001', 'fixed_sqr_sin_01', 'fixed_sqr_mul_01','fixed_sqr_sin_001','fixed_sqr_mul_001',\
         'ran_cir_sin_01','ran_cir_mul_01','ran_cir_sin_001','ran_cir_mul_001', 'fixed_cir_sin_01', 'fixed_cir_mul_01','fixed_cir_sin_001','fixed_cir_mul_001',\
         'ran_tri_sin_01','ran_tri_mul_01','ran_tri_sin_001','ran_tri_mul_001', 'fixed_tri_sin_01', 'fixed_tri_mul_01','fixed_tri_sin_001','fixed_tri_mul_001']

misclss = {}

for i in range(8):
    print(f"\ntest of {i}:")
    test_directory = f'/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/backdoored-test/color/{i+1}/'
    test_directory_t = '/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/Test'
    model_path = f"models/color-model/alexnet_case_{i+1}.pt"
    #testing trained model on backdoored data
    predict_on_directory(test_directory, model_path, misclss, device="cuda", backdoored=True, model_name=i)
    #testing the trained model on clean data
    predict_on_directory(test_directory_t, model_path, misclss, num_images_to_test=1000, device="cuda",model_name=i)


test of 0:
Accuracy on backdoored images is: 95.32%
Accuracy on clean images is: 97.20%

test of 1:
Accuracy on backdoored images is: 96.02%
Accuracy on clean images is: 98.00%

test of 2:
Accuracy on backdoored images is: 93.91%
Accuracy on clean images is: 94.90%

test of 3:
Accuracy on backdoored images is: 93.91%
Accuracy on clean images is: 96.40%

test of 4:
Accuracy on backdoored images is: 99.06%
Accuracy on clean images is: 97.10%

test of 5:
Accuracy on backdoored images is: 99.30%
Accuracy on clean images is: 97.50%

test of 6:
Accuracy on backdoored images is: 98.83%
Accuracy on clean images is: 96.90%

test of 7:
Accuracy on backdoored images is: 99.06%
Accuracy on clean images is: 97.00%


In [8]:
import json
with open('mis_data.json', 'w') as f:
    json.dump(misclss, f)

In [123]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from torchvision.models import alexnet
from PIL import Image
import os
import random
import pandas as pd 
import matplotlib.pyplot as plt

class MaskedModel(nn.Module):
    def __init__(self, original_model, dormant_neurons):
        super(MaskedModel, self).__init__()
        self.mask = nn.Parameter(torch.ones_like(torch.cat((dormant_neurons, torch.tensor([1]))), dtype=torch.float32), requires_grad=False)
        self.mask[dormant_neurons] = 0
        self.original_model = original_model

    def forward(self, x):
        x = self.original_model(x)
        x *= self.mask
        return x


def load_model(model_path, num_classes=43, device="cpu"):
    model = alexnet(weights=None, num_classes=43).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()  # Set to evaluation mode
    return model

def process_image(image_path):
    # Load the image
    image = Image.open(image_path)

    # Define transformations
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # Apply transformations
    image_tensor = transform(image).unsqueeze(0)
    return image_tensor, image

def predict(image_path, model, device="cpu"):
    # No need to reload the model every time you predict. Use the passed model.
    # model = load_model(model_path, device=device)

    image_tensor, image = process_image(image_path)  # Assume process_image returns (tensor, image)
    image_tensor = image_tensor.to(device)

    activations = []
    # Forward pass to obtain activations
    with torch.no_grad():
        # Get activations from convolutional layers (features)
        x = image_tensor
        for layer in model.features:
            x = layer(x)
            activations.append(x)

        for layer in [model.avgpool]:
            x = layer(x)
            activations.append(x)

        # Get activations from linear layers (classifier)
        x = x.view(x.size(0), -1)  # Flatten the output before feeding to linear layers
        for layer in model.classifier:
            x = layer(x)
            activations.append(x)
        
        _, predicted_class_index = torch.max(x, 1)

    return predicted_class_index.item(), image, activations

def predict_on_directory(directory_path, model_path, misclss, num_images_to_test=0, device="cpu", backdoored=False, model_name="ran_sqr_sin_01"):
    model = load_model(model_path, device=device)
    
    # Assuming you have class names to interpret the outputs (you need to define this list)
    class_names = pd.read_csv('/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/labels.csv')['Name'].tolist()
    tests = pd.read_csv('/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/Test.csv')
    # List all files in the directory
    filenames = os.listdir(directory_path)
    random.shuffle(filenames)
    #count number of correct prediction
    correct_count=0
    #initialize the ground truth class id
    #for now, I am checking if the model can predict the backdoored stop sign which belong to class 14
    # Limit the number of files processed
    if num_images_to_test:
        filenames = filenames[:num_images_to_test]
    else:
        num_images_to_test = 780

    neurons = []
    for filename in filenames:
        image_path = os.path.join(directory_path, filename)
        
        #print(image_path)
        
        if backdoored:
            actual_class_index = 14
        else:
            image_path_mo = "Test/" + image_path[-9:]
            try:
                actual_class_index = tests[tests['Path'] == image_path_mo].iloc[0]['ClassId']
            except:
                pass
        
        # Process only .png images (according to your error traceback)
        if image_path.lower().endswith('.png'):
            predicted_class_index, image, activations = predict(image_path, model, device)

            # threshold = 1e-50
            # activation_means = activations.mean(dim=0)
            # dormant_neurons = (activation_means < threshold).nonzero().squeeze()
            neurons.append(activations)

            predicted_class_name = class_names[predicted_class_index]
            
            if predicted_class_index == actual_class_index:
                correct_count += 1
            elif backdoored:
                lst = misclss.get(model_name,[])
                lst.append(image_path)
                misclss[model_name] = lst

            
    # masked_model = MaskedModel(model, dormant_neurons)
    # print("model masked")
                
    if backdoored:
        rsl = "backdoored"
    else:
        rsl = "clean"
    print(f'Accuracy on {rsl} images is: {correct_count / num_images_to_test * 100:.2f}%')
    # return masked_model
    return neurons

In [43]:
def load_model(model_path, num_classes=43, device="cpu"):
    model = alexnet(weights=None, num_classes=43).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()  # Set to evaluation mode
    return model

model = load_model(model_path, device="cuda")

In [91]:
filename = os.listdir(test_directory)[0]
image_path = os.path.join(test_directory, filename)
image_tensor, image = process_image(image_path)  # Assume process_image returns (tensor, image)
image_tensor = image_tensor.to("cuda")

activations = []

# Forward pass to obtain activations
with torch.no_grad():
    # Get activations from convolutional layers (features)
    x = image_tensor
    for layer in model.features:
        x = layer(x)
        activations.append(x)

    for layer in [model.avgpool]:
        x = layer(x)
        activations.append(x)

    # Get activations from linear layers (classifier)
    x = x.view(x.size(0), -1)  # Flatten the output before feeding to linear layers
    for layer in model.classifier:
        x = layer(x)
        activations.append(x)

# Print the shape of activations from convolutional layers
for i, activation in enumerate(activations):
    print(f"Activation shape from layer {i + 1}: {activation.shape}")

Activation shape from layer 1: torch.Size([1, 64, 63, 63])
Activation shape from layer 2: torch.Size([1, 64, 63, 63])
Activation shape from layer 3: torch.Size([1, 64, 31, 31])
Activation shape from layer 4: torch.Size([1, 192, 31, 31])
Activation shape from layer 5: torch.Size([1, 192, 31, 31])
Activation shape from layer 6: torch.Size([1, 192, 15, 15])
Activation shape from layer 7: torch.Size([1, 384, 15, 15])
Activation shape from layer 8: torch.Size([1, 384, 15, 15])
Activation shape from layer 9: torch.Size([1, 256, 15, 15])
Activation shape from layer 10: torch.Size([1, 256, 15, 15])
Activation shape from layer 11: torch.Size([1, 256, 15, 15])
Activation shape from layer 12: torch.Size([1, 256, 15, 15])
Activation shape from layer 13: torch.Size([1, 256, 7, 7])
Activation shape from layer 14: torch.Size([1, 256, 6, 6])
Activation shape from layer 15: torch.Size([1, 9216])
Activation shape from layer 16: torch.Size([1, 4096])
Activation shape from layer 17: torch.Size([1, 4096])


In [124]:
# Run predictions on 10 random images from the 'test' directory
models = ['ran_sqr_sin_01','ran_sqr_mul_01','ran_sqr_sin_001','ran_sqr_mul_001', 'fixed_sqr_sin_01', 'fixed_sqr_mul_01','fixed_sqr_sin_001','fixed_sqr_mul_001',\
         'ran_cir_sin_01','ran_cir_mul_01','ran_cir_sin_001','ran_cir_mul_001', 'fixed_cir_sin_01', 'fixed_cir_mul_01','fixed_cir_sin_001','fixed_cir_mul_001',\
         'ran_tri_sin_01','ran_tri_mul_01','ran_tri_sin_001','ran_tri_mul_001', 'fixed_tri_sin_01', 'fixed_tri_mul_01','fixed_tri_sin_001','fixed_tri_mul_001']

misclss = {}

i = 1
print(f"\ntest of {models[i-1]}:")
test_directory = f'/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/backdoored-test/780/{i}/'
test_directory_t = '/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/Test'
model_path = f"models/780/alexnet_case_{i}.pt"
#testing trained model on backdoored data
# neurons = predict_on_directory(test_directory, model_path, misclss, device="cpu", backdoored=True, model_name=i)
#testing the trained model on clean data
neurons = predict_on_directory(test_directory_t, model_path, misclss, num_images_to_test=1000, device="cuda",model_name=i)


test of ran_sqr_sin_01:


Accuracy on clean images is: 97.50%


In [125]:
data = []
for i in range(len(neurons[0])):
    for j in range(len(neurons)):
        if len(data) == i:
           data.append(torch.zeros_like(neurons[j][i], dtype=torch.float32))
        data[i] += neurons[j][i]
data

[tensor([[[[  89.7365,    6.1946,    6.6704,  ...,    9.7970,    7.6797,
             112.0459],
           [  30.9040,   20.1183,   18.6869,  ...,   21.2824,   21.4248,
             312.5430],
           [  31.0135,   22.9857,   20.3302,  ...,   19.4170,   21.1297,
             314.3421],
           ...,
           [  25.8091,   18.8649,   17.2850,  ...,   16.5946,   22.7789,
             369.9155],
           [  25.9287,   20.7947,   18.5137,  ...,   18.3300,   24.3691,
             368.8907],
           [  24.6456,   21.0488,   17.5181,  ...,   20.2884,   23.7598,
             339.6085]],
 
          [[ 289.2172,  109.4641,  106.5020,  ...,   97.7843,  101.7264,
              40.9188],
           [ 387.6348,  158.2486,  157.6901,  ...,  142.4850,  145.1582,
              46.9926],
           [ 381.7607,  153.6797,  160.3791,  ...,  147.3241,  145.7738,
              46.3428],
           ...,
           [ 249.0901,  111.4057,  115.0184,  ...,   96.4686,  111.3802,
              52.59

In [126]:
import numpy as np
for j in range(len(data)):
    data[j] = np.where(data[j] != 0, 1, 0).astype(float)
data

[array([[[[1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.],
          ...,
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.]],
 
         [[1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.],
          ...,
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.]],
 
         [[1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.],
          ...,
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.]],
 
         ...,
 
         [[1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.],
          ...,
          [1., 1., 1., ..., 1., 1., 1.],
          [1.

In [127]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from torchvision.models import alexnet
from PIL import Image
import os
import random
import pandas as pd 
import matplotlib.pyplot as plt

class MaskedModel(nn.Module):
    def __init__(self, original_model, dormant_neurons):
        super(MaskedModel, self).__init__()
        self.mask = nn.Parameter(torch.ones_like(torch.cat((dormant_neurons, torch.tensor([1]))), dtype=torch.float32), requires_grad=False)
        self.mask[dormant_neurons] = 0
        self.original_model = original_model

    def forward(self, x):
        x = self.original_model(x)
        x *= self.mask
        return x


def load_model(model_path, num_classes=43, device="cpu"):
    model = alexnet(weights=None, num_classes=43).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()  # Set to evaluation mode
    return model

def process_image(image_path):
    # Load the image
    image = Image.open(image_path)

    # Define transformations
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # Apply transformations
    image_tensor = transform(image).unsqueeze(0)
    return image_tensor, image

def predict(image_path, model, device="cpu"):
    # No need to reload the model every time you predict. Use the passed model.
    # model = load_model(model_path, device=device)

    image_tensor, image = process_image(image_path)  # Assume process_image returns (tensor, image)
    image_tensor = image_tensor.to(device)

    i = 0
    # Forward pass to obtain activations
    with torch.no_grad():
        # Get activations from convolutional layers (features)
        x = image_tensor
        bias_type = model.features[0].bias.dtype
        for layer in model.features:
            x = layer(x)
            x *= data[i]
            x = x.to(bias_type)
            i += 1

        for layer in [model.avgpool]:
            x = layer(x)
            x *= data[i]
            x = x.to(bias_type)
            i += 1

        # Get activations from linear layers (classifier)
        x = x.view(x.size(0), -1)  # Flatten the output before feeding to linear layers
        for layer in model.classifier:
            x = layer(x)
            x *= data[i]
            x = x.to(bias_type)
            i += 1
        
        _, predicted_class_index = torch.max(x, 1)

    return predicted_class_index.item(), image

def predict_on_directory(directory_path, model_path, misclss, num_images_to_test=0, device="cpu", backdoored=False, model_name="ran_sqr_sin_01"):
    model = load_model(model_path, device=device)
    
    # Assuming you have class names to interpret the outputs (you need to define this list)
    class_names = pd.read_csv('/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/labels.csv')['Name'].tolist()
    tests = pd.read_csv('/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/Test.csv')
    # List all files in the directory
    filenames = os.listdir(directory_path)
    random.shuffle(filenames)
    #count number of correct prediction
    correct_count=0
    #initialize the ground truth class id
    #for now, I am checking if the model can predict the backdoored stop sign which belong to class 14
    # Limit the number of files processed
    if num_images_to_test:
        filenames = filenames[:num_images_to_test]
    else:
        num_images_to_test = 780

    neurons = []
    for filename in filenames:
        image_path = os.path.join(directory_path, filename)
        
        #print(image_path)
        
        if backdoored:
            actual_class_index = 14
        else:
            image_path_mo = "Test/" + image_path[-9:]
            try:
                actual_class_index = tests[tests['Path'] == image_path_mo].iloc[0]['ClassId']
            except:
                pass
        
        # Process only .png images (according to your error traceback)
        if image_path.lower().endswith('.png'):
            predicted_class_index, image = predict(image_path, model, device)

            # threshold = 1e-50
            # activation_means = activations.mean(dim=0)
            # dormant_neurons = (activation_means < threshold).nonzero().squeeze()
            neurons.append(activations)

            predicted_class_name = class_names[predicted_class_index]
            
            if predicted_class_index == actual_class_index:
                correct_count += 1
            elif backdoored:
                lst = misclss.get(model_name,[])
                lst.append(image_path)
                misclss[model_name] = lst

            
    # masked_model = MaskedModel(model, dormant_neurons)
    # print("model masked")
                
    if backdoored:
        rsl = "backdoored"
    else:
        rsl = "clean"
    print(f'Accuracy on {rsl} images is: {correct_count / num_images_to_test * 100:.2f}%')
    # return masked_model
    return neurons

In [128]:
# Run predictions on 10 random images from the 'test' directory
models = ['ran_sqr_sin_01','ran_sqr_mul_01','ran_sqr_sin_001','ran_sqr_mul_001', 'fixed_sqr_sin_01', 'fixed_sqr_mul_01','fixed_sqr_sin_001','fixed_sqr_mul_001',\
         'ran_cir_sin_01','ran_cir_mul_01','ran_cir_sin_001','ran_cir_mul_001', 'fixed_cir_sin_01', 'fixed_cir_mul_01','fixed_cir_sin_001','fixed_cir_mul_001',\
         'ran_tri_sin_01','ran_tri_mul_01','ran_tri_sin_001','ran_tri_mul_001', 'fixed_tri_sin_01', 'fixed_tri_mul_01','fixed_tri_sin_001','fixed_tri_mul_001']

misclss = {}

i = 1
print(f"\ntest of {models[i-1]}:")
test_directory = f'/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/backdoored-test/780/{i}/'
test_directory_t = '/home/cc7486/Desktop/Research/MLLsecurity/gtsrb-german-traffic-sign/Test'
model_path = f"models/780/alexnet_case_{i}.pt"
#testing trained model on backdoored data
# neurons = predict_on_directory(test_directory, model_path, misclss, device="cpu", backdoored=True, model_name=i)
#testing the trained model on clean data
neurons = predict_on_directory(test_directory_t, model_path, misclss, num_images_to_test=1000, device="cuda",model_name=i)


test of ran_sqr_sin_01:


Accuracy on clean images is: 98.30%


In [131]:
!nvidia-smi

Thu Feb  1 02:04:53 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 4090        Off | 00000000:41:00.0 Off |                  Off |
|  0%   41C    P8              35W / 480W |    936MiB / 24564MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce RTX 4090        Off | 00000000:61:00.0 Off |  