In [None]:
# python3 autoregressive/sample/sample_t2i.py --vq-ckpt ./pretrained_models/vq_ds16_t2i.pt --gpt-ckpt ./pretrained_models/t2i_XL_stage2_512.pt --gpt-model GPT-XL --image-size 512

In [None]:
import os
import json
from tqdm import tqdm
from multiprocessing import Pool, Manager

# Define the root paths
root_paths = [
    "/nobackup/zefan/projects/VLGen/segment_results_imgnet",
    "/nobackup/zefan/projects/VLGen/segment_results_sam_molom",
    "/nobackup/zefan/projects/VLGen/segment_results_sam",
]

# Output file
output_file = "/nobackup/zefan/projects/VLGen/LlamaGen/metadata_all.jsonl"

# Function to recursively find and process metadata.jsonl
def find_and_process_metadata(folder):
    metadata_list = []
    for root, _, files in tqdm(os.walk(folder)):
        if "metadata.jsonl" in files:
            metadata_path = os.path.join(root, "metadata.jsonl")
            with open(metadata_path, 'r') as f:
                for line in f:
                    metadata_list.append(json.loads(line.strip()))
    return metadata_list

# Function to collect metadata using multiprocessing
def collect_metadata(folder):
    try:
        return find_and_process_metadata(folder)
    except Exception as e:
        print(f"Error processing folder {folder}: {e}")
        return []

if __name__ == "__main__":
    all_metadata = []
    
    # Use multiprocessing for faster traversal
    with Manager() as manager:
        metadata_list = manager.list()
        with Pool(processes=os.cpu_count()) as pool:
            results = list(tqdm(pool.imap(collect_metadata, root_paths), total=len(root_paths)))
        
        # Combine results into a single list
        for result in results:
            all_metadata.extend(result)
    
    # Save all metadata to a single JSONL file
    with open(output_file, 'w') as out_f:
        for item in all_metadata:
            out_f.write(json.dumps(item) + '\n')

    print(f"Metadata collected and saved to {output_file}")


In [29]:
from collections import Counter  
from multiprocessing import Manager  
import json
import os
from tqdm import tqdm
from PIL import Image
import numpy as np
from multiprocessing import Pool, cpu_count
import numpy as np
from scipy.ndimage import label
# Paths
input_file = "/nobackup/zefan/projects/VLGen/LlamaGen/metadata_all.jsonl"
output_file_correct = "/nobackup/zefan/projects/VLGen/LlamaGen/metadata_correct.jsonl"
output_file_incorrect = "/nobackup/zefan/projects/VLGen/LlamaGen/metadata_incorrect.jsonl"
def is_similar(img1, img2, threshold=0.95, resize_dim=(32, 32)):
    """
    Check if two images are similar by resizing, scaling back, and comparing pixel values.

    Args:
        img1, img2: PIL Image objects to compare.
        threshold: Similarity threshold (default is 0.95).
        resize_dim: Dimensions to resize the images to (default is 32x32).

    Returns:
        bool: True if images are similar, False otherwise.
    """
    # Resize and scale back to original size
    resized_img1 = img1.resize(resize_dim, Image.Resampling.LANCZOS)
    resized_img2 = img2.resize(resize_dim, Image.Resampling.LANCZOS)

    # Flatten and compare pixel arrays
    arr1 = np.array(resized_img1).flatten()
    arr2 = np.array(resized_img2).flatten()

    # Compute similarity
    return np.mean(arr1 == arr2) >= threshold
def is_scattered(mask_path, min_connected_area=5, max_components=1000):
    """
    检测 mask 是否为散点失败。

    Args:
        mask_path (str): mask 图片路径。
        min_connected_area (int): 单个连通区域的最小像素数，低于此值认为是散点。
        max_components (int): 最大允许的连通区域数量，超过此值认为是散点。
    
    Returns:
        bool: 如果是散点情况，返回 True；否则返回 False。
    """
    # 加载图片并转换为二值化数组
    mask = Image.open(mask_path).convert("L")
    mask_array = np.array(mask) > 0  # 转换为布尔值，True 表示非零像素

    # 计算连通域
    labeled_array, num_features = label(mask_array)

    # 统计连通域面积
    component_sizes = np.bincount(labeled_array.ravel())[1:]  # 排除背景（label=0）

    # 判断是否为散点
    scattered = (
        len(component_sizes) > max_components or  # 连通域数量过多
        all(size < min_connected_area for size in component_sizes)  # 所有区域都过小
    )
    return scattered
# Modify is_mask_valid to return a reason  
def is_mask_valid(mask_paths):  
    if len(mask_paths) == 1:  
        return True, None  # Single mask is always valid  
    elif len(mask_paths) == 0:  
        return False, "no_masks"  

    # Load masks  
    masks = [Image.open(path) for path in mask_paths]  
    mask_sizes = [np.sum(np.array(mask) > 0) for mask in masks]  
    total_pixels = masks[0].size[0] * masks[0].size[1]  

    # Check similarity  
    for path in mask_paths:
        if is_scattered(path):
            return False, "scattered_masks" 

    for i, mask1 in enumerate(masks):  
        for j, mask2 in enumerate(masks):  
            if i < j and is_similar(mask1, mask2):  # Check similarity  
                return False, "similar_masks"  

    # Check size condition  
    for size in mask_sizes:  
        ratio = size / total_pixels  
        if ratio < 0.003:  
            return False, "mask_too_small"  
        if ratio > 0.95:  
            return False, "mask_too_large"  

    return True, None  

# Process a single JSON entry  
def process_line(line):  
    data = json.loads(line)  
    mask_paths = data["mask_path"]  

    is_valid, reason = is_mask_valid(mask_paths)  
    if is_valid:  
        return "correct", data, None  
    else:  
        return "incorrect", data, reason  

def process_file():  
    with open(input_file, "r") as f:  
        lines = f.readlines()  

    correct_cases = []  
    incorrect_cases = []  
    
    # Initialize reason counter  
    reason_counter = Counter()  

    with Pool(cpu_count()) as pool:  
        results = list(tqdm(pool.imap(process_line, lines), total=len(lines)))  

    for result_type, data, reason in results:  
        if result_type == "correct":  
            correct_cases.append(data)  
        else:  
            incorrect_cases.append(data)  
            reason_counter[reason] += 1  

    # Save results  
    with open(output_file_correct, "w") as f_correct, open(output_file_incorrect, "w") as f_incorrect:  
        for case in correct_cases:  
            f_correct.write(json.dumps(case) + "\n")  
        for case in incorrect_cases:  
            f_incorrect.write(json.dumps(case) + "\n")  

    # Print the number of detected issues  
    print(f"Total correct cases: {len(correct_cases)}")  
    print(f"Total incorrect cases: {len(incorrect_cases)}")  
    print("Reasons for incorrectness:")  
    for reason, count in reason_counter.items():  
        print(f"{reason}: {count}")  

process_file()

100%|██████████| 469418/469418 [02:44<00:00, 2850.13it/s]


Total correct cases: 361253
Total incorrect cases: 108165
Reasons for incorrectness:
similar_masks: 69745
mask_too_small: 11648
scattered_masks: 22238
mask_too_large: 4510
no_masks: 24


In [1]:
import json
import random

# Paths
input_file = "/nobackup/zefan/projects/VLGen/LlamaGen/metadata_correct.jsonl"
output_val_file = "/nobackup/zefan/projects/VLGen/LlamaGen/new_1117_validation_set.jsonl"
output_train_file = "/nobackup/zefan/projects/VLGen/LlamaGen/new_1117_train_set.jsonl"

# Load the input JSONL file
with open(input_file, "r") as f:
    data = [json.loads(line.strip()) for line in f]

# Randomly split 500 samples for validation, rest for training
random.seed(42)  # For reproducibility
random.shuffle(data)
val_data = data[:500]
train_data = data[500:]

# Function to convert data format
def convert_to_new_format(data, starting_idx=0):
    new_data = []
    global_idx = starting_idx
    for item in data:
        for segment_idx, (mask_path, obj) in enumerate(zip(item["mask_path"], item["sam_objects"])):
            new_sample = {
                "global_idx": global_idx,
                "source_image": item["source_img"],
                "image_path": mask_path,
                "objects": obj,
                "segment_idx": segment_idx,
                "input_text": f"<image>\n{item['caption']}"
            }
            new_data.append(new_sample)
            global_idx += 1
    return new_data

# Convert validation and training data
val_converted = convert_to_new_format(val_data, starting_idx=0)
train_converted = convert_to_new_format(train_data, starting_idx=len(val_converted))

# Save converted data to new JSONL files
with open(output_val_file, "w") as val_out, open(output_train_file, "w") as train_out:
    for item in val_converted:
        val_out.write(json.dumps(item) + "\n")
    for item in train_converted:
        train_out.write(json.dumps(item) + "\n")

# Summary
len(val_converted), len(train_converted)


(1066, 766167)

In [2]:
train_converted[0]

{'global_idx': 1066,
 'source_image': '/nobackup/zefan/projects/VLGen/segment_results_sam/cnt_10000_1000000000_0_39_0/0000000706/ori.jpg',
 'image_path': '/nobackup/zefan/projects/VLGen/segment_results_sam/cnt_10000_1000000000_0_39_0/0000000706/mask_bride.jpg',
 'objects': 'bride',
 'segment_idx': 0,
 'input_text': '<image>\nA bride being photographed on a scenic dock surrounded by decorative arches.'}

In [16]:


mask_paths= ["/nobackup/zefan/projects/VLGen/segment_results_imgnet/cnt_0_100000000_0_2_0/0000000331/mask_wardrobe.jpg", "/nobackup/zefan/projects/VLGen/segment_results_imgnet/cnt_0_100000000_0_2_0/0000000331/mask_clothes.jpg", "/nobackup/zefan/projects/VLGen/segment_results_imgnet/cnt_0_100000000_0_2_0/0000000331/mask_drawer.jpg"]
def is_mask_valid(mask_paths):  
    if len(mask_paths) == 1:  
        return True, None  # Single mask is always valid  
    elif len(mask_paths) == 0:  
        return False, "no_masks"  

    # Load masks  
    masks = [Image.open(path) for path in mask_paths]  
    mask_sizes = [np.sum(np.array(mask) > 0) for mask in masks]  
    total_pixels = masks[0].size[0] * masks[0].size[1]  

    # Check similarity  
    # for i, mask1 in enumerate(masks):  
    #     for j, mask2 in enumerate(masks):  
    #         if i < j and is_similar(mask1, mask2):  # Check similarity  
    #             return False, "similar_masks"  

    # Check size condition  
    for i, size in enumerate(mask_sizes):  
        ratio = size / total_pixels  
        if ratio < 0.05:  
            return False, "mask_too_small" ,masks ,i
        # if ratio > 0.95:  
        #     return False, "mask_too_large"  

    return True, None  

_,_,masks,i = is_mask_valid(mask_paths)

In [17]:
 masks[0].size[0] * masks[0].size[1]  

262144

In [37]:
np.sum(np.array(mask[1]) == 0)

254807

In [17]:
from PIL import Image, ImageDraw  
import numpy as np  

def create_image_with_polygon(p):  
    # 确保 p 值在 0 到 100 之间  
    if not (0 <= p <= 100):  
        raise ValueError("p must be between 0 and 100")  
    
    # 图像大小  
    size = (512, 512)  
    total_pixels = size[0] * size[1]  
    
    # 计算应该变白的像素数  
    white_pixels_count = int(total_pixels * (p / 100.0))  

    # 创建一个黑色的图像  
    image = Image.new("L", size, 0)  
    draw = ImageDraw.Draw(image)  
    
    # 选择一个简单的正六边形作为多边形  
    num_sides = 6  
    
    # 初步假设使用一个大小足够大的中心多边形, 然后调整以适应p%  
    # 这里假设使用边长和几何方法计算所需面积后调整  
    # 考虑正多边形面积公式: (3*np.sqrt(3)*(edge_length**2))/2  
    approx_edge_length = np.sqrt((2 * white_pixels_count) / (3 * np.sqrt(3)))  

    # 将多边形设置在图像的中心  
    center_x, center_y = size[0] // 2, size[1] // 2  
    angle_step = 360 / num_sides  
    
    # 创建多边形顶点  
    points = [  
        (  
            center_x + approx_edge_length * np.cos(np.radians(angle_step * i)),  
            center_y + approx_edge_length * np.sin(np.radians(angle_step * i))  
        )  
        for i in range(num_sides)  
    ]  

    # 画多边形  
    draw.polygon(points, fill=255)  

    return image  

# 使用示例：创建一个图像，中间是面积为30% 的白色多边形  
polygon_image = create_image_with_polygon(95)  

In [None]:
# convert dreambench_plus
import os
import json

# Define input and output paths
base_path = "/nobackup/zefan/projects/VLGen/LlamaGen"
captions_folder = os.path.join(base_path, "data/captions")
images_folder = os.path.join(base_path, "data/images")
output_file = os.path.join(base_path, "dreambench_plus_valid.jsonl")

# Initialize global_idx and list to store dataset
global_idx = 0
dataset = []

# Traverse the captions folder
for root, _, files in os.walk(captions_folder):
    for file in files:
        if file.endswith(".txt"):
            txt_path = os.path.join(root, file)
            image_relative_path = os.path.relpath(root, captions_folder)
            image_file = file.replace(".txt", ".jpg")
            image_path = os.path.join(images_folder, image_relative_path, image_file)
            
            # Read the txt file
            with open(txt_path, 'r') as f:
                lines = f.readlines()
                if len(lines) > 1:
                    object_name = lines[0].strip()
                    captions = [line.strip() for line in lines[1:]]
                    
                    # Create dataset instances for each caption
                    for caption in captions:
                        instance = {
                            "global_idx": global_idx,
                            "source_image": image_path,
                            "image_path": image_path,
                            "objects": object_name,
                            "segment_idx": 0,
                            "input_text": f"<image>\n{caption}"
                        }
                        dataset.append(instance)
                        global_idx += 1

# Save the dataset to a jsonl file
with open(output_file, 'w') as f:
    for entry in dataset:
        json.dump(entry, f)
        f.write("\n")

# Confirm completion
output_file


'/nobackup/zefan/projects/VLGen/LlamaGen/dreambench_plus_valid.jsonl'