update Sim-PE experiments

fjxmlzn · fjxmlzn · commit 5ef21443d9ca · 2025-05-17T22:35:29.000-07:00
diff --git a/doc/source/api/pe.data.image.imagenet.rst b/doc/source/api/pe.data.image.imagenet.rst
@@ -0,0 +1,7 @@
+pe.data.image.imagenet module
+=============================
+
+.. automodule:: pe.data.image.imagenet
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/doc/source/api/pe.data.image.rst b/doc/source/api/pe.data.image.rst
@@ -18,4 +18,5 @@ Submodules
    pe.data.image.cifar10
    pe.data.image.digiface1m
    pe.data.image.image
+   pe.data.image.imagenet
    pe.data.image.mnist
diff --git a/example/image/simulator/mnist_text_render.py b/example/image/simulator/mnist_text_render.py
@@ -47,6 +47,7 @@
         font_size_variation_degrees=[6, 5, 4, 3, 2],
         rotation_degree_variation_degrees=[11, 9, 7, 5, 3],
         stroke_width_variation_degrees=[1, 1, 1, 0, 0],
+        text_variation_degrees=0.0,
     )
     fld_inception_embedding = FLDInception()
     histogram = NearestNeighbors(
diff --git a/pe/api/image/draw_text_api.py b/pe/api/image/draw_text_api.py
@@ -32,6 +32,7 @@ def __init__(
         self,
         font_root_path,
         font_variation_degrees,
+        text_variation_degrees,
         font_size_variation_degrees,
         rotation_degree_variation_degrees,
         stroke_width_variation_degrees,
@@ -52,6 +53,10 @@ def __init__(
             is provided, the same variation degree will be used for all iterations. The value means the probability of
             changing the font to a random font.
         :type font_variation_degrees: float or list[float]
+        :param text_variation_degrees: The variation degrees for text utilized at each PE iteration. If a single value
+            is provided, the same variation degree will be used for all iterations. The value means the probability of
+            changing the text to a random text.
+        :type text_variation_degrees: float or list[float]
         :param font_size_variation_degrees: The variation degrees for font size utilized at each PE iteration. If a
             single value is provided, the same variation degree will be used for all iterations. The value means
             the maximum possible variation in font size.
@@ -89,6 +94,7 @@ def __init__(
         super().__init__()
         self._font_root_path = font_root_path
         self._font_variation_degrees = _to_constant_list_if_needed(font_variation_degrees)
+        self._text_variation_degrees = _to_constant_list_if_needed(text_variation_degrees)
         self._font_size_variation_degrees = _to_constant_list_if_needed(font_size_variation_degrees)
         self._rotation_degree_variation_degrees = _to_constant_list_if_needed(rotation_degree_variation_degrees)
         self._stroke_width_variation_degrees = _to_constant_list_if_needed(stroke_width_variation_degrees)
@@ -223,8 +229,10 @@ def _get_variation_image(
         rotation_degree,
         font_size_variation_degree,
         font_variation_degree,
+        text_variation_degree,
         stroke_width_variation_degree,
         rotation_degree_variation_degree,
+        label_name,
     ):
         """Get a variation image and its parameters.
 
@@ -242,16 +250,23 @@ def _get_variation_image(
         :type font_size_variation_degree: int
         :param font_variation_degree: The degree of variation in font
         :type font_variation_degree: float
+        :param text_variation_degree: The degree of variation in text
+        :type text_variation_degree: float
         :param stroke_width_variation_degree: The degree of variation in stroke width
         :type stroke_width_variation_degree: int
         :param rotation_degree_variation_degree: The degree of variation in rotation degree
         :type rotation_degree_variation_degree: int
+        :param label_name: The label name
+        :type label_name: str
         :return: The image of the avatar and its parameters
         :rtype: tuple[np.ndarray, dict]
         """
         do_font_variation = random.random() < font_variation_degree
         if do_font_variation:
             font_file = random.choice(self._font_files)
+        do_text_variation = random.random() < text_variation_degree
+        if do_text_variation:
+            text = random.choice(self._text_list[label_name])
 
         font_size += random.randint(-font_size_variation_degree, font_size_variation_degree)
         font_size = max(min(font_size, max(self._font_size_list)), min(self._font_size_list))
@@ -289,9 +304,11 @@ def variation_api(self, syn_data):
         execution_logger.info(f"VARIATION API: creating variations for {len(syn_data.data_frame)} samples")
         original_params = list(syn_data.data_frame[TEXT_PARAMS_COLUMN_NAME].values)
         original_images = np.stack(syn_data.data_frame[IMAGE_DATA_COLUMN_NAME].values)
+        original_label_ids = syn_data.data_frame[LABEL_ID_COLUMN_NAME].values
         iteration = getattr(syn_data.metadata, "iteration", -1)
         font_variation_degree = self._font_variation_degrees[iteration + 1]
         font_size_variation_degree = self._font_size_variation_degrees[iteration + 1]
+        text_variation_degree = self._text_variation_degrees[iteration + 1]
         rotation_variation_degree = self._rotation_degree_variation_degrees[iteration + 1]
         stroke_width_variation_degree = self._stroke_width_variation_degrees[iteration + 1]
 
@@ -307,9 +324,11 @@ def variation_api(self, syn_data):
             original_param = original_params[i]
             image, param = self._get_variation_image(
                 font_size_variation_degree=font_size_variation_degree,
+                text_variation_degree=text_variation_degree,
                 font_variation_degree=font_variation_degree,
                 rotation_degree_variation_degree=rotation_variation_degree,
                 stroke_width_variation_degree=stroke_width_variation_degree,
+                label_name=syn_data.metadata.label_info[int(original_label_ids[i])].name,
                 **original_param,
             )
             if image is not None:
diff --git a/pe/api/image/nearest_image_api.py b/pe/api/image/nearest_image_api.py
@@ -58,7 +58,9 @@ def __init__(
         self._nearest_neighbor_backend = nearest_neighbor_backend
         self._variation_degrees = _to_constant_list_if_needed(variation_degrees)
         self._max_variation_degree = (
-            self._variation_degrees[0] if isinstance(variation_degrees, ConstantList) else max(self._variation_degrees)
+            self._variation_degrees[0]
+            if isinstance(self._variation_degrees, ConstantList)
+            else max(self._variation_degrees)
         )
 
         if nearest_neighbor_backend.lower() == "faiss":
diff --git a/pe/data/__init__.py b/pe/data/__init__.py
@@ -1,5 +1,5 @@
 from .data import Data
-from .image import load_image_folder, Cifar10, Camelyon17, Cat, MNIST, CelebA, DigiFace1M
+from .image import load_image_folder, Cifar10, Camelyon17, Cat, MNIST, CelebA, DigiFace1M, ImageNet
 from .text import TextCSV, Yelp, PubMed, OpenReview
 
 __all__ = [
@@ -11,6 +11,7 @@
     "MNIST",
     "CelebA",
     "DigiFace1M",
+    "ImageNet",
     "TextCSV",
     "Yelp",
     "PubMed",
diff --git a/pe/data/image/__init__.py b/pe/data/image/__init__.py
@@ -5,3 +5,4 @@
 from .mnist import MNIST
 from .celeba import CelebA
 from .digiface1m import DigiFace1M
+from .imagenet import ImageNet
diff --git a/pe/data/image/imagenet.py b/pe/data/image/imagenet.py
@@ -0,0 +1,60 @@
+import pandas as pd
+import torchvision.datasets
+import torchvision.transforms as T
+from tqdm import tqdm
+import torch
+
+from pe.data import Data
+from pe.constant.data import LABEL_ID_COLUMN_NAME
+from pe.constant.data import IMAGE_DATA_COLUMN_NAME
+
+
+class ImageNet(Data):
+    """The ImageNet dataset."""
+
+    def __init__(self, root_dir, conditional=False, split="train", res=32, batch_size=1000, num_workers=10):
+        """Constructor.
+
+        :param root_dir: The root directory of the dataset.
+        :param conditional: Whether to use conditional ImageNet. Defaults to False
+        :type conditional: bool, optional
+        :param split: The split of the dataset, defaults to "train"
+        :type split: str, optional
+        :param res: The resolution of the images, defaults to 32
+        :type res: int, optional
+        :param batch_size: The batch size to load the images, defaults to 1000
+        :type batch_size: int, optional
+        :param num_workers: The number of workers to load the images, defaults to 10
+        :type num_workers: int, optional
+        """
+        transform = T.Compose([T.Resize(256), T.CenterCrop(224), T.Resize(res), T.PILToTensor()])
+        dataset = torchvision.datasets.ImageNet(
+            root=root_dir,
+            split=split,
+            transform=transform,
+        )
+        data_loader = torch.utils.data.DataLoader(
+            dataset,
+            batch_size=batch_size,
+            shuffle=False,
+            num_workers=4,
+            drop_last=False,
+        )
+
+        images = []
+        for batch in tqdm(data_loader, desc="Loading ImageNet", unit="batch"):
+            images.append(batch[0])
+        images = torch.cat(images, dim=0)
+        images = images.permute(0, 2, 3, 1).numpy()
+
+        data_frame = pd.DataFrame(
+            {
+                IMAGE_DATA_COLUMN_NAME: list(images),
+                LABEL_ID_COLUMN_NAME: dataset.targets if conditional else [0] * len(images),
+            }
+        )
+        if conditional:
+            metadata = {"label_info": [{"name": n} for n in map(str, dataset.classes)]}
+        else:
+            metadata = {"label_info": [{"name": "none"}]}
+        super().__init__(data_frame=data_frame, metadata=metadata)

Original file line number	Diff line number	Diff line change
`@@ -47,6 +47,7 @@`
`47`	`47`	`font_size_variation_degrees=[6, 5, 4, 3, 2],`
`48`	`48`	`rotation_degree_variation_degrees=[11, 9, 7, 5, 3],`
`49`	`49`	`stroke_width_variation_degrees=[1, 1, 1, 0, 0],`
	`50`	`+ text_variation_degrees=0.0,`
`50`	`51`	`)`
`51`	`52`	`fld_inception_embedding = FLDInception()`
`52`	`53`	`histogram = NearestNeighbors(`