In [1]:
import json
with open('train_list.json', 'r', encoding='utf-8') as file:
    data = json.load(file)
#print(data)

In [2]:
import json
import ast 
import numpy as np

def transform_json(input_data):
    new_data = []
    
    for item in input_data:
        new_entry = {
            "prompt": item["prompt"],
            "img_path": item["img_path"],
            "total_score": int(round(np.mean(item["total_score"]), 2)),
            "element_score": {}
        }
        
        element_scores = ast.literal_eval(item["element_score"])

        for element, scores in element_scores.items():
            new_entry["element_score"][element] = 1 if sum(scores) >= 2 else 0
        
        new_data.append(new_entry)
    
    return new_data

transformed_data = transform_json(data)

print(json.dumps(transformed_data[0:4], indent=4))

[
    {
        "prompt": "A puffin sitting in booth while eating a pastry at a diner. Etching",
        "img_path": "SDXL-Turbo/00110.png",
        "total_score": 3,
        "element_score": {
            "puffin (animal)": 1,
            "booth (object)": 1,
            "pastry (food)": 1,
            "diner (location)": 1,
            "sitting (activity)": 1,
            "eating (activity)": 0,
            "Etching (attribute)": 0
        }
    },
    {
        "prompt": "A puffin sitting in booth while eating a pastry at a diner. Etching",
        "img_path": "Playground_v2.5/00110.png",
        "total_score": 3,
        "element_score": {
            "puffin (animal)": 1,
            "booth (object)": 1,
            "pastry (food)": 1,
            "diner (location)": 1,
            "sitting (activity)": 1,
            "eating (activity)": 0,
            "Etching (attribute)": 1
        }
    },
    {
        "prompt": "A puffin sitting in booth while eating a pastry at a diner. Et

In [3]:
import json
with open("transformed_data.json", "w") as outfile:
    json.dump(transformed_data, outfile, indent=4)

In [4]:
def create_vision_lm_format(train_data):
    vision_lm_data = []
    
    for item in train_data:
        total_score_entry = {
            "messages": [
                {
                    "content": f"<image>What is the overall quality score of this image on a scale of 1-5?",
                    "role": "user"
                },
                {
                    "content": str(item["total_score"]),
                    "role": "assistant"
                }
            ],
            "images": [item["img_path"]]
        }
        vision_lm_data.append(total_score_entry)
        
        for element, score in item["element_score"].items():
            element_entry = {
                "messages": [
                    {
                        "content": f"<image>Does this image contain {element}?",
                        "role": "user"
                    },
                    {
                        "content": "Yes" if score == 1 else "No",
                        "role": "assistant"
                    }
                ],
                "images": [item["img_path"]]
            }
            vision_lm_data.append(element_entry)
    
    return vision_lm_data

vision_lm_formatted = create_vision_lm_format(transformed_data)

with open('vision_lm_training.json', 'w') as f:
    json.dumps(vision_lm_formatted, indent=4)

print(json.dumps(vision_lm_formatted[:2], indent=4))


[
    {
        "messages": [
            {
                "content": "<image>What is the overall quality score of this image on a scale of 1-5?",
                "role": "user"
            },
            {
                "content": "3",
                "role": "assistant"
            }
        ],
        "images": [
            "SDXL-Turbo/00110.png"
        ]
    },
    {
        "messages": [
            {
                "content": "<image>Does this image contain puffin (animal)?",
                "role": "user"
            },
            {
                "content": "Yes",
                "role": "assistant"
            }
        ],
        "images": [
            "SDXL-Turbo/00110.png"
        ]
    }
]


In [5]:
def create_sharegpt_format(train_data):
    sharegpt_data = []
    
    for item in train_data:
        # Add conversation for total score
        total_score_entry = {
            "conversations": [
                {
                    "from": "human",
                    "value": f"<image>What is the overall quality score of this image on a scale of 1-5?"
                },
                {
                    "from": "gpt",
                    "value": str(item["total_score"])
                }
            ],
            "images": [item["img_path"]]
        }
        sharegpt_data.append(total_score_entry)
        
        # Add conversations for each element
        for element, score in item["element_score"].items():
            element_entry = {
                "conversations": [
                    {
                        "from": "human",
                        "value": f"<image>Does this image contain {element}?"
                    },
                    {
                        "from": "gpt",
                        "value": "Yes" if score == 1 else "No"
                    }
                ],
                "images": [item["img_path"]]
            }
            sharegpt_data.append(element_entry)
    
    return sharegpt_data

# Convert the data
sharegpt_formatted = create_sharegpt_format(transformed_data)

# Save to JSON file
with open('sharegpt_training.json', 'w') as f:
    json.dump(sharegpt_formatted, f, indent=4)

# Print first entry to verify
print(json.dumps(sharegpt_formatted[0], indent=4))


{
    "conversations": [
        {
            "from": "human",
            "value": "<image>What is the overall quality score of this image on a scale of 1-5?"
        },
        {
            "from": "gpt",
            "value": "3"
        }
    ],
    "images": [
        "SDXL-Turbo/00110.png"
    ]
}
