In [1]:
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, Subset
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
import torch.optim as optim
from torch.amp import GradScaler, autocast
import os
import random
import pandas as pd
from scipy.special import comb
from scipy.stats import binom

In [2]:
import torch
import numpy as np
from torch.utils.data import DataLoader, Subset
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import random

# Define dataset root directory
mnist_root = '/home/j597s263/scratch/j597s263/Datasets/MNIST'

random.seed(42)
torch.manual_seed(42)
np.random.seed(42)

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor()
])

train_dataset = datasets.MNIST(root=mnist_root, transform=transform, train=True, download=True)
test_dataset = datasets.MNIST(root=mnist_root, transform=transform, train=False, download=True)

train_indices = list(range(len(train_dataset)))
random.shuffle(train_indices)  

split_idx = int(0.9 * len(train_indices))  
train_indices, attack_indices = train_indices[:split_idx], train_indices[split_idx:]

train_data = Subset(train_dataset, train_indices)
attack_data = Subset(train_dataset, attack_indices)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)  # Shuffle within batches
attack_loader = DataLoader(attack_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

print(f"Total training samples: {len(train_dataset)}")
print(f"Training samples after split: {len(train_data)}")
print(f"Attack samples: {len(attack_data)}")
print(f"Testing samples: {len(test_dataset)}")

Total training samples: 60000
Training samples after split: 54000
Attack samples: 6000
Testing samples: 10000


In [3]:
def get_pixel_coords(flat_indices, width):
    return [divmod(idx, width) for idx in flat_indices]

In [4]:
def calculate_pixel_frequencies_from_loader(data_loader, pixel_coords):
    """
    Calculate the frequency of grayscale pixel values at specific coordinates from an MNIST DataLoader.

    Args:
        data_loader (DataLoader): A DataLoader containing the MNIST dataset.
        pixel_coords (list of tuples): A list of (x, y) pixel coordinates to evaluate.

    Returns:
        dict: A dictionary where keys are pixel coordinates, and values are dictionaries of grayscale frequencies.
    """
    pixel_freq = {coord: {} for coord in pixel_coords}

    for batch_idx, (images, _) in enumerate(data_loader):
        # Move batch to CPU and convert to NumPy for efficient processing
        images = images.cpu().numpy()  # Shape: (batch_size, 1, height, width)

        # Iterate through the batch of images
        for img_idx, img_array in enumerate(images):
            img_array = img_array.squeeze(0)  # Convert from (1, H, W) to (H, W)

            # Check and count each specified pixel coordinate
            for (i, j) in pixel_coords:
                if 0 <= i < img_array.shape[0] and 0 <= j < img_array.shape[1]:
                    pixel_value = int(img_array[i, j] * 255)  # Convert to grayscale intensity (0-255)
                    
                    if pixel_value not in pixel_freq[(i, j)]:
                        pixel_freq[(i, j)][pixel_value] = 0
                    pixel_freq[(i, j)][pixel_value] += 1  # Increment count

        print(f"Processed batch {batch_idx + 1}/{len(data_loader)}")

    return pixel_freq

In [5]:
top_22_coords = [
    (139, 155), (139, 156), (139, 157), (140, 156), (83, 85),
    (83, 84), (84, 84), (83, 87), (83, 83), (123, 83),
    (83, 103), (140, 155), (141, 155), (83, 99), (141, 156),
    (123, 79), (83, 100), (123, 77), (84, 83), (83, 104),
    (123, 81), (123, 80)
]

pixel_freq = calculate_pixel_frequencies_from_loader(train_loader, top_22_coords)

print(pixel_freq[(123, 81)])

Processed batch 1/844
Processed batch 2/844
Processed batch 3/844
Processed batch 4/844
Processed batch 5/844
Processed batch 6/844
Processed batch 7/844
Processed batch 8/844
Processed batch 9/844
Processed batch 10/844
Processed batch 11/844
Processed batch 12/844
Processed batch 13/844
Processed batch 14/844
Processed batch 15/844
Processed batch 16/844
Processed batch 17/844
Processed batch 18/844
Processed batch 19/844
Processed batch 20/844
Processed batch 21/844
Processed batch 22/844
Processed batch 23/844
Processed batch 24/844
Processed batch 25/844
Processed batch 26/844
Processed batch 27/844
Processed batch 28/844
Processed batch 29/844
Processed batch 30/844
Processed batch 31/844
Processed batch 32/844
Processed batch 33/844
Processed batch 34/844
Processed batch 35/844
Processed batch 36/844
Processed batch 37/844
Processed batch 38/844
Processed batch 39/844
Processed batch 40/844
Processed batch 41/844
Processed batch 42/844
Processed batch 43/844
Processed batch 44/8

In [6]:
import pandas as pd

def aggregate_grayscale_frequencies(pixel_freq):
    """
    Aggregate grayscale frequencies from pixel frequency data.

    Args:
        pixel_freq (dict): Dictionary of pixel frequencies with coordinates as keys
                           and grayscale intensity counts as values.

    Returns:
        pd.DataFrame: DataFrame containing aggregated frequencies for each pixel.
    """
    data = []

    # Convert pixel frequency data into a flat list for DataFrame
    for (i, j), gray_counts in pixel_freq.items():
        for gray_value, count in gray_counts.items():
            data.append((i, j, gray_value, count))

    # Create a DataFrame
    df = pd.DataFrame(data, columns=['x', 'y', 'gray_value', 'frequency'])

    # Group by pixel coordinates (x, y) and aggregate frequencies
    result = df.groupby(['x', 'y', 'gray_value'])['frequency'].sum().reset_index()

    return result

In [7]:
# Assuming `pixel_freq` is the output from `calculate_pixel_frequencies_from_loader`
result_df = aggregate_grayscale_frequencies(pixel_freq)

# Display the result
print(result_df)

        x    y  gray_value  frequency
0      83   83           0      19641
1      83   83           1        591
2      83   83           2        471
3      83   83           3        376
4      83   83           4        379
...   ...  ...         ...        ...
5627  141  156         251        546
5628  141  156         252        969
5629  141  156         253       1501
5630  141  156         254        416
5631  141  156         255        218

[5632 rows x 4 columns]


In [8]:
def analyze_max_x_for_epsilon(df, t, epsilon):
    """
    Analyze and compute the maximum x for epsilon for each pixel in the DataFrame.

    Args:
        df (pd.DataFrame): DataFrame containing pixel data with columns 'x', 'y', 'gray_value', 'frequency'.
        t (int): Threshold value for frequency adjustment.
        epsilon (float): Epsilon value to determine maximum x.

    Returns:
        pd.DataFrame: DataFrame containing 'x', 'y', 'gray_value', and 'max_x'.
    """
    def max_x_for_epsilon(freq, t, epsilon):
        """
        Compute max_x based on binomial probability constraint.
        """
        remaining_count = int(freq - t)
        if remaining_count <= 0:
            return 0
        
        for x in range(remaining_count + 1):
            # Compute probability using Binomial CDF
            probability = binom.cdf(x, freq, 0.5)  # Assuming uniform probability of success (p=0.5)
            if probability >= epsilon:
                return x
        return remaining_count  # If no value satisfies the condition, return max possible x

    # Apply function to the DataFrame
    df['max_x'] = df.apply(lambda row: max_x_for_epsilon(row['frequency'], t, epsilon), axis=1)

    return df[['x', 'y', 'gray_value', 'max_x']]


In [9]:
results_df = analyze_max_x_for_epsilon(result_df, t=2, epsilon=3)
maxValues = results_df.max()
print(maxValues)

x               141
y               157
gray_value      255
max_x         27183
dtype: int64


In [10]:
import random
import pickle
import pandas as pd

def sample_grayscale_values(pixel_freq, pixel_coords, results_df, original_df, save_path=None):
    """
    Sample grayscale values based on max_x for each pixel coordinate and optionally save them.

    Args:
        pixel_freq (dict): Dictionary of pixel frequencies with coordinates as keys
                           and grayscale frequency counts as values.
        pixel_coords (list of tuples): List of pixel coordinates to evaluate.
        results_df (pd.DataFrame): DataFrame containing 'x', 'y', 'gray_value', and 'max_x'.
        original_df (pd.DataFrame): Original DataFrame containing pixel grayscale and frequency data.
        save_path (str, optional): Path to save the sampled grayscale values. If provided, saves the result.

    Returns:
        dict: Dictionary of sampled grayscale values for each coordinate.
    """
    # Initialize dictionary to store sampled grayscale values
    sampled_grayscale_values = {coord: {} for coord in pixel_coords}

    # Iterate through the pixel coordinates
    for (i, j) in pixel_coords:
        # Filter the results DataFrame for the current coordinate
        coord_df = results_df[(results_df['x'] == i) & (results_df['y'] == j)]

        # Iterate through the rows for this coordinate
        for _, row in coord_df.iterrows():
            gray_value, max_x = int(round(row['gray_value'])), int(row['max_x'])

            # Filter the original DataFrame for matching grayscale and pixel coordinates
            original_coord_df = original_df[
                (original_df['x'] == i) & 
                (original_df['y'] == j) & 
                (original_df['gray_value'].round().astype(int) == gray_value)
            ]

            # Extract grayscale values from pixel_freq for the matching coordinate
            grayscale_values = []
            for _, orig_row in original_coord_df.iterrows():
                intensity = int(round(orig_row['gray_value']))
                if intensity in pixel_freq[(i, j)]:
                    grayscale_values.extend([intensity] * pixel_freq[(i, j)][intensity])  # Replicate values by frequency

            # Sample up to max_x grayscale values, ensuring no error if fewer values exist
            sampled_grayscale_values[(i, j)][gray_value] = random.sample(grayscale_values, min(len(grayscale_values), max_x)) if grayscale_values else []

    # Save the sampled grayscale values if a save path is provided
    if save_path:
        with open(save_path, 'wb') as f:
            pickle.dump(sampled_grayscale_values, f)
        print(f"Sampled grayscale values saved to {save_path}")

    return sampled_grayscale_values


In [11]:
'''import time

start_time = time.time()

sampled_values = sample_grayscale_values(pixel_freq, top_22_coords, results_df, result_df, save_path="/home/j597s263/scratch/j597s263/Datasets/Defense/Resnet/SampledValues/Opt/t2e3_mni.pkl")

end_time = time.time()

print(f"Execution time: {end_time - start_time:.2f} seconds")'''

'import time\n\nstart_time = time.time()\n\nsampled_values = sample_grayscale_values(pixel_freq, top_22_coords, results_df, result_df, save_path="/home/j597s263/scratch/j597s263/Datasets/Defense/Resnet/SampledValues/Opt/t2e3_mni.pkl")\n\nend_time = time.time()\n\nprint(f"Execution time: {end_time - start_time:.2f} seconds")'

In [12]:
import pickle

# Load the sampled RGB values in one line
sampled_values = pickle.load(open("/home/j597s263/scratch/j597s263/Datasets/Defense/Resnet/SampledValues/Opt/t2e2_mni.pkl", "rb"))
print("Sampled RGB values loaded successfully!")


Sampled RGB values loaded successfully!


In [13]:
import os
import torch

def apply_samples_to_dataset(data_loader, sampled_gray_values, pixel_coords, output_path):
    """
    Apply sampled grayscale values to MNIST images and save the dataset with labels.

    Args:
        data_loader (DataLoader): DataLoader containing the MNIST images to modify.
        sampled_gray_values (dict): Dictionary of sampled grayscale values for each pixel.
        pixel_coords (list of tuples): List of pixel coordinates to modify.
        output_path (str): Path to the file where the modified dataset will be saved.
    """
    modified_images = []
    labels = []

    # Process each image in the data loader
    for batch_idx, (images, batch_labels) in enumerate(data_loader):
        images = images.clone()  # Clone to avoid modifying the original data
        batch_size = images.size(0)

        for img_idx in range(batch_size):
            image_tensor = images[img_idx]  # Shape: (1, H, W)
            img_array = image_tensor.squeeze(0).cpu().numpy()  # Convert to (H, W) for easy manipulation

            height, width = img_array.shape
            for (x, y) in pixel_coords:
                if 0 <= x < height and 0 <= y < width:
                    if (x, y) in sampled_gray_values:
                        gray_levels = sampled_gray_values[(x, y)]
                        found = False
                        for gray_value in gray_levels:
                            if img_idx in gray_levels[gray_value]:  # Check if this image index has a sample
                                img_array[x, y] = gray_value / 255.0  # Normalize grayscale
                                found = True
                                break
                        if not found:
                            img_array[x, y] = 0.0  # Set to black if no matching sample
                    else:
                        img_array[x, y] = 0.0  # Set to black for unmatched coordinates

            # Convert modified array back to tensor
            modified_tensor = torch.tensor(img_array, dtype=torch.float32).unsqueeze(0)  # Shape: (1, H, W)

            # Add modified tensor and corresponding label to the dataset
            modified_images.append(modified_tensor)
            labels.append(batch_labels[img_idx].item())

        print(f"Processed batch {batch_idx + 1}/{len(data_loader)}")

    # Save the modified dataset
    dataset = {
        "images": torch.stack(modified_images),
        "labels": torch.tensor(labels)
    }
    torch.save(dataset, output_path)
    print(f"Modified MNIST dataset saved to {output_path}")


In [14]:
output_dir = "/home/j597s263/scratch/j597s263/Datasets/Defense/Resnet/ResMinE2.pt"
apply_samples_to_dataset(train_loader, sampled_values, top_22_coords, output_dir)

Processed batch 1/844
Processed batch 2/844
Processed batch 3/844
Processed batch 4/844
Processed batch 5/844
Processed batch 6/844
Processed batch 7/844
Processed batch 8/844
Processed batch 9/844
Processed batch 10/844
Processed batch 11/844
Processed batch 12/844
Processed batch 13/844
Processed batch 14/844
Processed batch 15/844
Processed batch 16/844
Processed batch 17/844
Processed batch 18/844
Processed batch 19/844
Processed batch 20/844
Processed batch 21/844
Processed batch 22/844
Processed batch 23/844
Processed batch 24/844
Processed batch 25/844
Processed batch 26/844
Processed batch 27/844
Processed batch 28/844
Processed batch 29/844
Processed batch 30/844
Processed batch 31/844
Processed batch 32/844
Processed batch 33/844
Processed batch 34/844
Processed batch 35/844
Processed batch 36/844
Processed batch 37/844
Processed batch 38/844
Processed batch 39/844
Processed batch 40/844
Processed batch 41/844
Processed batch 42/844
Processed batch 43/844
Processed batch 44/8