In [1]:
import json
import numpy as np
import torch
import random
understanding_templates = [
    "<image>\nWhat is the time and its color in the image?",
    "<image>\nCan you tell me the time displayed on the clock and its color?",
    "<image>\nPlease identify the time and the color shown in the display.",
    "<image>\nWhat time is it, and what is the color of the numbers?",
    "<image>\nProvide the time and the corresponding color for the digits shown.",
    "<image>\nThe image shows a clock. What is the time, and what is its color?",
    "<image>\nTell me the time and the digits' color in the picture.",
    "<image>\nFrom the display, what is the time, and what color are the digits?",
    "<image>\nWhat does the display show in terms of time and color?",
    "<image>\nIdentify the time and the color of the display in the image.",
]

understanding_answers = [
    "The time is {time} and the color is {color}.",
    "{time} is the time shown, and the digits are in {color}.",
    "It is {time}, and the color of the display is {color}.",
    "The display shows {time}, and its color is {color}.",
    "{time} is what the display reads, with a {color} color.",
    "The clock says {time}, and the numbers are {color}.",
    "{color} is the color, and the time is {time}.",
    "The digits are in {color}, and the time is {time}.",
    "You can see {time} in the image, and the color is {color}.",
    "The image shows a display reading {time} in {color} color.",
]

generation_templates = [
    "Please show me an image of {time} with {color} digits.",
    "Can you generate an image displaying {time} in {color}?",
    "Create a clock image that reads {time} with a {color} display.",
    "Generate an image showing {time} using {color} digits.",
    "I want to see an image of {time} with numbers in {color}.",
    "Make a display image with the time {time} in {color}.",
    "Draw a clock reading {time} with digits in {color}.",
    "Show me a time display that says {time} in {color}.",
    "Produce an image of {time} where the numbers are {color}.",
    "Create a visual representation of {time} in {color}.",
]

generation_answers = [
    "This is the image of {time} with {color} digits. \n<image>",
    "Here is the display showing {time} in {color}. \n<image>",
    "An image of {time} with numbers in {color} is shown. \n<image>",
    "This is the requested image: {time} in {color}. \n<image>",
    "Here you go: a clock showing {time} in {color} digits. \n<image>",
    "Generated an image with {time} in {color} display. \n<image>",
    "This image shows {time} with digits in {color}. \n<image>",
    "Displayed {time} with the requested {color} digits. \n<image>",
    "The image is created: {time} in {color}. \n<image>",
    "The visual shows {time} numbers in {color} digits. \n<image>",
]

In [2]:
b=torch.zeros(2,3)
a=torch.ones(1,b.shape[0],b.shape[1],dtype=b.dtype,device=b.device)
print(a)

tensor([[[1., 1., 1.],
         [1., 1., 1.]]])


In [2]:


# Seven-segment display representation for digits 0-9
seven_segment_encoding = {
    '0': [1, 1, 1, 1, 1, 1, 0],
    '1': [0, 1, 1, 0, 0, 0, 0],
    '2': [1, 1, 0, 1, 1, 0, 1],
    '3': [1, 1, 1, 1, 0, 0, 1],
    '4': [0, 1, 1, 0, 0, 1, 1],
    '5': [1, 0, 1, 1, 0, 1, 1],
    '6': [1, 0, 1, 1, 1, 1, 1],
    '7': [1, 1, 1, 0, 0, 0, 0],
    '8': [1, 1, 1, 1, 1, 1, 1],
    '9': [1, 1, 1, 1, 0, 1, 1],
}

color_mapping={'red':[0.1,0.15],'green':[0.15,0.3],'blue':[0.3,0.45],'yellow':[0.45,0.6],'orange':[0.6,0.75],'purple':[0.75,0.9]}

# Function to convert time (HH:MM:SS) to numpy array
def time_to_tensor(time_str,color):
    time_digits = time_str.replace(':', '')  # Remove colon
    tensor = np.zeros((6, 7), dtype=int)  # Create a tensor of shape (6, 7)
    
    for i, digit in enumerate(time_digits):
        if digit in seven_segment_encoding:
            tensor[i] = np.array(seven_segment_encoding[digit])  # Assign corresponding encoding
    color=color_mapping[color]
    color=np.random.uniform(color[0], color[1])

    return torch.Tensor(tensor)*color

# Example: Convert time "13:25:14" to tensor
# time_str = "11:25:30"
# color='red'
# tensor_representation = time_to_tensor(time_str,color)
# print(tensor_representation)

def generate_random_time_and_color(parity=0):
    def generate_number(max_value, parity):
        """根据奇偶性生成数字"""
        if parity == 1:  # 只能是奇数
            return random.choice([i for i in range(0, max_value + 1) if i % 2 == 1])
        elif parity == 2:  # 只能是偶数
            return random.choice([i for i in range(0, max_value + 1) if i % 2 == 0])
        else:  # 随机生成
            return random.randint(0, max_value)
    hour = generate_number(23, parity)
    minute = generate_number(59, parity)
    second = generate_number(59, parity)
    time_str = f"{hour:02}:{minute:02}:{second:02}"
    color = random.choice(list(color_mapping.keys()))
    return time_str, color

# Function to generate samples
def generate_samples(num_samples=100):
    samples = []
    for _ in range(num_samples):
        # time_str, color = generate_random_time_and_color()
        # tensor = time_to_tensor(time_str, color)

        # Randomly select a task: understanding or generation
        task_type = random.choice(["understanding", "generation"])

        if task_type == "understanding":
            time_str, color = generate_random_time_and_color(2)
            tensor = time_to_tensor(time_str, color)
            question = random.choice(understanding_templates).format(time=time_str, color=color)
            answer = random.choice(understanding_answers).format(time=time_str, color=color)
        else:
            time_str, color = generate_random_time_and_color()
            tensor = time_to_tensor(time_str, color)
            question = random.choice(generation_templates).format(time=time_str, color=color)
            answer = random.choice(generation_answers).format(time=time_str, color=color)

        samples.append({
            "task": task_type,
            'conversations': [{'from':'human','value':question},{'from':'gpt','value':answer}],
            "tensor": tensor.tolist(),  # Save the tensor representation
        })

    return samples

def generate_samples_for_inverse_understanding(num_samples=100):
    samples = []
    for i in range(num_samples//2):
        # time_str, color = generate_random_time_and_color()
        # tensor = time_to_tensor(time_str, color)

        # Randomly select a task: understanding or generation
        task_type = "generation"
        time_str, color = generate_random_time_and_color()
        tensor = time_to_tensor(time_str, color)
        question = random.choice(generation_templates).format(time=time_str, color=color)
        answer = random.choice(generation_answers).format(time=time_str, color=color)

        samples.append({
            "task": task_type,
            'conversations': [{'from':'human','value':question},{'from':'gpt','value':answer}],
            "tensor": tensor.tolist(),  # Save the tensor representation
        })

        task_type = "understanding"
        tensor = time_to_tensor(time_str, color)
        question = random.choice(understanding_templates).format(time=time_str, color=color)
        answer = random.choice(understanding_answers).format(time=time_str, color=color)
        

        samples.append({
            "task": task_type,
            'conversations': [{'from':'human','value':question},{'from':'gpt','value':answer}],
            "tensor": tensor.tolist(),  # Save the tensor representation
        })

    return samples

# Generate 10 samples as an example
samples = generate_samples_for_inverse_understanding(200)
with open('/datadrive_a/jihai/data/multimodalout/dummy_data_inverse_u_eval.json', "w") as f:
    json.dump(samples, f, indent=4)



In [14]:
print(generate_random_time_and_color())

('23:13:27', 'purple')


In [4]:
torch.save(tensor_representation,'./dummy_images/0002.pt')

In [4]:
list_data_dict = json.load(open('/datadrive_a/jihai/azure_storage2/vigstandard_data/jihai/sharegpt4v/llava_v1_5_mix665k.json', "r"))
print(list_data_dict[0])

{'id': '000000033471', 'image': 'coco/train2017/000000033471.jpg', 'conversations': [{'from': 'human', 'value': '<image>\nWhat are the colors of the bus in the image?'}, {'from': 'gpt', 'value': 'The bus in the image is white and red.'}, {'from': 'human', 'value': 'What feature can be seen on the back of the bus?'}, {'from': 'gpt', 'value': 'The back of the bus features an advertisement.'}, {'from': 'human', 'value': 'Is the bus driving down the street or pulled off to the side?'}, {'from': 'gpt', 'value': 'The bus is driving down the street, which is crowded with people and other vehicles.'}]}


In [2]:
# 假设 Q 是一个张量
Q = torch.randn(4096, 4096)
Q, _ = torch.linalg.qr(Q, mode='complete')  # 确保它是正交矩阵
torch.save(Q, '/datadrive_a/jihai/LLaVA/scripts/v1_5/checkpoints/Q.pth')  # 保存到指定路径