In [None]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

In [None]:
model_folder = os.getenv("RESNET_MODEL_FOLDER")
print(model_folder)

target_class = -1

In [None]:
from nadf.data.adversarial import load_or_create_dataset

dataset = load_or_create_dataset(
    folder=model_folder,
    target_class=target_class,
    num_attacks_eps_coef=[(4, 0.25), (2, 0.5), (3, 1.0), (1, 2.0)],
    recreate=True,
    verbose=True,
)

In [None]:
dataset

In [None]:
print(dataset.keys())
print(dataset["y"]["train"].unique())
print(dataset["y"]["val"].unique())
print(dataset["y"]["test"].unique())



## Visualize Misclassification trends

In [None]:
# make heatmap out of csv file
import pandas as pd

df = pd.read_csv("data/results/prediction_distribution/prediction_distribution_target_-1_split_test.csv")

# make heatmap
import seaborn as sns
import matplotlib.pyplot as plt

# every row should be shaded according to how big of a part of the row sum it is 
# Set actual_class as index
df_indexed = df.set_index('actual_class')

# Normalize each row by dividing by row sum (so each row sums to 1)
df_normalized = df_indexed.div(df_indexed.sum(axis=1), axis=0)

# Create heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(df_normalized, annot=True, fmt='.2f', cmap='Greens', 
            cbar_kws={'label': 'Proportion'}, 
            xticklabels=[f'Pred {i}' for i in range(10)],
            yticklabels=[f'Actual {i}' for i in range(10)])
plt.title('Prediction Distribution Heatmap (Row Normalized)')
plt.xlabel('Predicted Class')
plt.ylabel('Actual Class')
plt.tight_layout()

plt.savefig("data/results/prediction_distribution/prediction_distribution_target_-1_split_test.pdf")
plt.show()


## Load regression dataset

In [None]:
import torch
import os
import dotenv
from argparse import Namespace
from nadf.training.pipeline import train_probe_model
from typing import Dict, Any
import os, glob, torch
from nadf.training.pipeline import load_probe_model


dotenv.load_dotenv()
model_folder = os.getenv("RESNET_MODEL_FOLDER")
target_class = -1
print(model_folder)

regression_dataset_path = os.path.join(model_folder, "adversarial_examples", str(target_class), "regression_dataset.pt")
regression_datasets = torch.load(regression_dataset_path)


In [None]:
# DEBUG

# Check for target distances = 0
exact_zeros = regression_datasets["train"][1] == 0
num_exact_zeros = (regression_datasets["train"][1] == 0).sum()

num_exact_zeros

In [None]:
# anchors, distances, labels, upweights
regression_datasets["train"][2]

In [None]:
# visualize anchors using t-sne
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import numpy as np

# Choose label type: "class" for class labels from regression_datasets["train"][2], 
#                    "clean_adversarial" for clean/adversarial based on distances
label_type = ["clean_adversarial", "class"][1]  # Change to "class" to use class labels

# Get the anchor points from the regression dataset
anchors = regression_datasets["train"][0]  # Shape: (N, feature_dim)
distances = regression_datasets["train"][1]  # Shape: (N,)
class_labels = regression_datasets["train"][2]  # Shape: (N,) - class labels

# Convert to numpy for sklearn (detach to remove gradient tracking)
anchors_np = anchors.cpu().detach().numpy()
distances_np = distances.cpu().detach().numpy()
class_labels_np = class_labels.cpu().detach().numpy()

# Create clean/adversarial labels: "clean" if distance == 0, "adversarial" if distance > 0
clean_adv_labels = np.where(distances_np == 0, "clean", "adversarial")

# Select labels based on label_type parameter
if label_type == "class":
    visualization_labels = class_labels_np
elif label_type == "clean_adversarial":
    visualization_labels = clean_adv_labels
else:
    raise ValueError(f"label_type must be 'class' or 'clean_adversarial', got '{label_type}'")

# Subsample for faster t-SNE computation
# Set max_samples to None to use all data, or specify a number (e.g., 5000)
max_samples = 5000  # Adjust this based on your dataset size and computational resources

if max_samples is not None and len(anchors_np) > max_samples:
    print(f"Subsampling from {len(anchors_np)} to {max_samples} samples...")
    
    # Stratified subsampling based on visualization labels
    if label_type == "clean_adversarial":
        # Stratified by clean/adversarial
        clean_indices = np.where(clean_adv_labels == "clean")[0]
        adversarial_indices = np.where(clean_adv_labels == "adversarial")[0]
        
        clean_proportion = len(clean_indices) / len(clean_adv_labels)
        n_clean_samples = int(max_samples * clean_proportion)
        n_adversarial_samples = max_samples - n_clean_samples
        
        np.random.seed(42)
        sampled_clean_indices = np.random.choice(clean_indices, size=min(n_clean_samples, len(clean_indices)), replace=False)
        sampled_adv_indices = np.random.choice(adversarial_indices, size=min(n_adversarial_samples, len(adversarial_indices)), replace=False)
        
        sampled_indices = np.concatenate([sampled_clean_indices, sampled_adv_indices])
        print(f"  Clean samples: {len(sampled_clean_indices)}")
        print(f"  Adversarial samples: {len(sampled_adv_indices)}")
    else:
        # Stratified by class labels
        unique_classes = np.unique(class_labels_np)
        sampled_indices_list = []
        
        np.random.seed(42)
        samples_per_class = max_samples // len(unique_classes)
        remainder = max_samples % len(unique_classes)
        
        for i, cls in enumerate(unique_classes):
            class_indices = np.where(class_labels_np == cls)[0]
            n_samples = samples_per_class + (1 if i < remainder else 0)
            n_samples = min(n_samples, len(class_indices))
            sampled_class_indices = np.random.choice(class_indices, size=n_samples, replace=False)
            sampled_indices_list.append(sampled_class_indices)
            print(f"  Class {cls} samples: {n_samples}")
        
        sampled_indices = np.concatenate(sampled_indices_list)
    
    np.random.shuffle(sampled_indices)  # Shuffle for better visualization
    
    # Subsample the data
    anchors_np = anchors_np[sampled_indices]
    class_labels_np = class_labels_np[sampled_indices]
    clean_adv_labels = clean_adv_labels[sampled_indices]
    visualization_labels = visualization_labels[sampled_indices]
    distances_np = distances_np[sampled_indices]

# Apply t-SNE
print("Applying t-SNE...")
tsne = TSNE(n_components=2, random_state=42, perplexity=30, n_iter=1000)
anchors_2d = tsne.fit_transform(anchors_np)

# Visualize
plt.figure(figsize=(12, 8))

if label_type == "clean_adversarial":
    # Visualize by clean/adversarial
    for label_val in ["clean", "adversarial"]:
        mask = visualization_labels == label_val
        plt.scatter(anchors_2d[mask, 0], anchors_2d[mask, 1], 
                    label=label_val, alpha=0.6, s=10)
    plt.title("t-SNE Visualization of Anchors (Clean vs Adversarial)")
else:
    # Visualize by class labels (0-9)
    unique_labels = np.unique(visualization_labels)
    colors = plt.cm.tab10(np.linspace(0, 1, len(unique_labels)))
    for i, cls in enumerate(unique_labels):
        mask = visualization_labels == cls
        plt.scatter(anchors_2d[mask, 0], anchors_2d[mask, 1], 
                    label=f"Class {cls}", alpha=0.6, s=10, c=[colors[i]])
    plt.title("t-SNE Visualization of Anchors by Class")

plt.xlabel("t-SNE Component 1")
plt.ylabel("t-SNE Component 2")
plt.legend()
plt.tight_layout()

plt.savefig(f"data/results/t-sne_visualization_anchors_target_{target_class}_{label_type}.pdf")
plt.show()

# Print statistics
print(f"\nFinal dataset size: {len(visualization_labels)}")
if label_type == "clean_adversarial":
    num_clean = (visualization_labels == "clean").sum()
    num_adversarial = (visualization_labels == "adversarial").sum()
    print(f"Number of clean samples: {num_clean}")
    print(f"Number of adversarial samples: {num_adversarial}")
print(f"Class labels (unique): {np.unique(class_labels_np)}")




# Do stronger attacks lie farther away from the manifold ? 