In [1]:
import json

# Path to the original train.json file
train_json_path = '/Users/jeffreysherer/Desktop/Dissertation/Slake1.0/train.json'

# Load the train.json file
with open(train_json_path, 'r') as file:
    train_data = json.load(file)

# Display the first few entries to verify the content
print(train_data[:3])  # Adjust the number of entries to display based on your need


[{'img_id': 1, 'img_name': 'xmlab1/source.jpg', 'question': 'What modality is used to take this image?', 'answer': 'MRI', 'q_lang': 'en', 'location': 'Abdomen', 'modality': 'MRI', 'answer_type': 'OPEN', 'base_type': 'vqa', 'content_type': 'Modality', 'triple': ['vhead', '_', '_'], 'qid': 0}, {'img_id': 1, 'img_name': 'xmlab1/source.jpg', 'question': 'Which part of the body does this image belong to?', 'answer': 'Abdomen', 'q_lang': 'en', 'location': 'Abdomen', 'modality': 'MRI', 'answer_type': 'OPEN', 'base_type': 'vqa', 'content_type': 'Position', 'triple': ['vhead', '_', '_'], 'qid': 1}, {'img_id': 1, 'img_name': 'xmlab1/source.jpg', 'question': 'What is the mr weighting in this image?', 'answer': 'T2', 'q_lang': 'en', 'location': 'Abdomen', 'modality': 'MRI', 'answer_type': 'OPEN', 'base_type': 'vqa', 'content_type': 'Modality', 'triple': ['vhead', '_', '_'], 'qid': 2}]


In [2]:
import os

def load_detection_json(img_folder):
    # Path to the folder containing detection.json
    detection_json_path = os.path.join('/Users/jeffreysherer/Dissertation/data/imgs-1', img_folder, 'detection.json')
    
    # Load the detection.json file
    if os.path.exists(detection_json_path):
        with open(detection_json_path, 'r') as file:
            detection_data = json.load(file)
        return detection_data
    else:
        return None  # Return None if file does not exist

# Test the function with an example folder
example_folder = 'xmlab1'
example_detection = load_detection_json(example_folder)
print(example_detection)  # Display the content of the detection.json for verification


[{'Liver': [54.0, 106.0, 30.0, 31.0]}]


In [3]:
# Function to add detection data to train_data entries
def integrate_detection_data(train_data):
    for item in train_data:
        # Extract the folder name from img_name
        folder_name = item['img_name'].split('/')[0]
        
        # Load detection data for the corresponding folder
        detection_data = load_detection_json(folder_name)
        
        # Debugging: Print folder_name and if detection data was loaded
        print(f"Processing {folder_name}, detection data loaded: {bool(detection_data)}")
        
        # If detection data exists, add it to the train data item
        if detection_data:
            item['detection'] = detection_data
        else:
            item['detection'] = []  # Ensure the detection field exists even if no data

    return train_data

# Update train_data with detection information
updated_train_data = integrate_detection_data(train_data)

# Print a few updated entries to verify that detection data has been added correctly
print(json.dumps(updated_train_data[:3], indent=4))  # Adjust the number of entries to display based on your need


Processing xmlab1, detection data loaded: True
Processing xmlab1, detection data loaded: True
Processing xmlab1, detection data loaded: True
Processing xmlab1, detection data loaded: True
Processing xmlab1, detection data loaded: True
Processing xmlab1, detection data loaded: True
Processing xmlab1, detection data loaded: True
Processing xmlab100, detection data loaded: False
Processing xmlab100, detection data loaded: False
Processing xmlab100, detection data loaded: False
Processing xmlab100, detection data loaded: False
Processing xmlab100, detection data loaded: False
Processing xmlab100, detection data loaded: False
Processing xmlab100, detection data loaded: False
Processing xmlab100, detection data loaded: False
Processing xmlab100, detection data loaded: False
Processing xmlab101, detection data loaded: False
Processing xmlab101, detection data loaded: False
Processing xmlab101, detection data loaded: False
Processing xmlab101, detection data loaded: False
Processing xmlab101, 

In [4]:
import os
import json
import uuid  # To generate unique IDs

# Base path where the original train.json is located
base_path = '/Users/jeffreysherer/Desktop/Dissertation/Slake1.0'

# Directory for the augmented files
augmented_dir = os.path.join(base_path, 'augmented')

# Create the augmented directory if it doesn't exist
os.makedirs(augmented_dir, exist_ok=True)

# Directory containing the image folders with detection.json files
image_directory = '/Users/jeffreysherer/Dissertation/data/imgs-1'

# Path to the original train.json file
train_json_path = os.path.join(base_path, 'train.json')

# Load the train.json file
with open(train_json_path, 'r') as file:
    train_data = json.load(file)

# Function to load detection data from detection.json
def load_detection_data(img_folder):
    detection_json_path = os.path.join(image_directory, img_folder, 'detection.json')
    if os.path.exists(detection_json_path):
        with open(detection_json_path, 'r') as file:
            return json.load(file)
    return None

# Function to update the data with bounding box coordinates
def update_data_with_bounding_boxes(data):
    for entry in data:
        img_folder = os.path.dirname(entry['img_name'])
        detection_data = load_detection_data(img_folder)
        if detection_data:
            entry['detection'] = detection_data
        else:
            entry['detection'] = []
    return data

# Update the train data with bounding box coordinates
updated_train_data = update_data_with_bounding_boxes(train_data)

# Function to transform the data to the required format
def transform_data(data):
    transformed_data = []
    for entry in data:
        unique_id = str(uuid.uuid4())  # Generate a unique ID for each entry
        img_path = entry['img_name']  # Use the img_name as the image path
        question = entry['question']
        answer = entry['answer']
        
        transformed_entry = {
            "id": unique_id,
            "image": img_path,
            "conversations": [
                {
                    "from": "human",
                    "value": f"<image>\nAnswer the question about this image. Question: {question}"
                },
                {
                    "from": "gpt",
                    "value": f"Answer: {answer}"
                }
            ]
        }
        if entry.get('detection'):
            transformed_entry['detection'] = entry['detection']
        transformed_data.append(transformed_entry)
    return transformed_data

# Transform the train data
transformed_train_data = transform_data(updated_train_data)

# Path to save the final train.json file
final_train_json_path = os.path.join(augmented_dir, 'final_train.json')

# Write the transformed train data to a new JSON file
with open(final_train_json_path, 'w') as file:
    json.dump(transformed_train_data, file, indent=4)

print("Final train.json has been saved to", final_train_json_path)


Final train.json has been saved to /Users/jeffreysherer/Desktop/Dissertation/Slake1.0/augmented/final_train.json


In [5]:
import json
import random

# Path to the final transformed train.json file
final_train_json_path = '/Users/jeffreysherer/Desktop/Dissertation/Slake1.0/augmented/final_train.json'

# Load the final transformed train.json file
with open(final_train_json_path, 'r') as file:
    transformed_data = json.load(file)

# Randomly sample 5 entries from the dataset to inspect
sampled_entries = random.sample(transformed_data, 5)

# Print the sampled entries
for entry in sampled_entries:
    print(json.dumps(entry, indent=4))


{
    "id": "8e8afbf3-e016-4314-91c8-57311ad6cac3",
    "image": "xmlab194/source.jpg",
    "conversations": [
        {
            "from": "human",
            "value": "<image>\nAnswer the question about this image. Question: What diseases are included in the picture?"
        },
        {
            "from": "gpt",
            "value": "Answer: Pneumothorax"
        }
    ],
    "detection": [
        {
            "Pneumothorax": [
                563.0,
                110.0,
                316.0,
                256.0
            ]
        }
    ]
}
{
    "id": "83bbbd3a-f425-4d0d-939a-67b14c16c182",
    "image": "xmlab528/source.jpg",
    "conversations": [
        {
            "from": "human",
            "value": "<image>\nAnswer the question about this image. Question: What is the mr weighting in this image?"
        },
        {
            "from": "gpt",
            "value": "Answer: T2"
        }
    ],
    "detection": [
        {
            "Brain Edema": [
         

Now doing the trst adn validate json files


In [6]:
import json
import uuid  # To generate unique IDs
import os

# Paths to the original validate.json and test.json files
validate_json_path = '/Users/jeffreysherer/Desktop/Dissertation/Slake1.0/validate.json'
test_json_path = '/Users/jeffreysherer/Desktop/Dissertation/Slake1.0/test.json'

# Directory containing the image folders with detection.json files
image_directory = '/Users/jeffreysherer/Dissertation/data/imgs-1'

# Load the validate.json file
with open(validate_json_path, 'r') as file:
    validate_data = json.load(file)

# Load the test.json file
with open(test_json_path, 'r') as file:
    test_data = json.load(file)

# Function to load detection data from detection.json
def load_detection_data(img_folder):
    detection_json_path = os.path.join(image_directory, img_folder, 'detection.json')
    if os.path.exists(detection_json_path):
        with open(detection_json_path, 'r') as file:
            return json.load(file)
    return None

# Function to update the data with bounding box coordinates
def update_data_with_bounding_boxes(data):
    for entry in data:
        img_folder = os.path.dirname(entry['img_name'])
        detection_data = load_detection_data(img_folder)
        if detection_data:
            entry['detection'] = detection_data
        else:
            entry['detection'] = []
    return data

# Function to transform the data to the required format
def transform_data(data):
    transformed_data = []
    for entry in data:
        unique_id = str(uuid.uuid4())  # Generate a unique ID for each entry
        img_path = entry['img_name']  # Use the img_name as the image path
        question = entry['question']
        answer = entry['answer']
        
        transformed_entry = {
            "id": unique_id,
            "image": img_path,
            "conversations": [
                {
                    "from": "human",
                    "value": f"<image>\nAnswer the question about this image. Question: {question}"
                },
                {
                    "from": "gpt",
                    "value": f"Answer: {answer}"
                }
            ]
        }
        if entry.get('detection'):
            transformed_entry['detection'] = entry['detection']
        transformed_data.append(transformed_entry)
    return transformed_data

# Update validate and test data with bounding box coordinates
updated_validate_data = update_data_with_bounding_boxes(validate_data)
updated_test_data = update_data_with_bounding_boxes(test_data)

# Transform validate and test data
transformed_validate_data = transform_data(updated_validate_data)
transformed_test_data = transform_data(updated_test_data)

# Display the first few entries to verify the content
print(json.dumps(transformed_validate_data[:3], indent=4))  # Adjust the number of entries to display based on your need
print(json.dumps(transformed_test_data[:3], indent=4))  # Adjust the number of entries to display based on your need

# Paths to save the final transformed JSON files
final_validate_json_path = '/Users/jeffreysherer/Desktop/Dissertation/Slake1.0/augmented/final_validate.json'
final_test_json_path = '/Users/jeffreysherer/Desktop/Dissertation/Slake1.0/augmented/final_test.json'

# Write the transformed validate data to a new JSON file
with open(final_validate_json_path, 'w') as file:
    json.dump(transformed_validate_data, file, indent=4)

# Write the transformed test data to a new JSON file
with open(final_test_json_path, 'w') as file:
    json.dump(transformed_test_data, file, indent=4)

print("Final validate.json has been saved to", final_validate_json_path)
print("Final test.json has been saved to", final_test_json_path)


[
    {
        "id": "326393d0-8ec0-43f4-b0eb-a980edb88ba3",
        "image": "xmlab0/source.jpg",
        "conversations": [
            {
                "from": "human",
                "value": "<image>\nAnswer the question about this image. Question: What modality is used to take this image?"
            },
            {
                "from": "gpt",
                "value": "Answer: MRI"
            }
        ],
        "detection": [
            {
                "Liver": [
                    33.0,
                    67.0,
                    98.0,
                    108.0
                ]
            },
            {
                "Left Kidney": [
                    148.0,
                    142.0,
                    46.0,
                    32.0
                ]
            },
            {
                "Right Kidney": [
                    65.0,
                    138.0,
                    41.0,
                    37.0
                ]
            },
     

In [7]:
import os
import json
import uuid  # To generate unique IDs

# Paths to the original validate.json and test.json files
validate_json_path = '/Users/jeffreysherer/Desktop/Dissertation/Slake1.0/validate.json'
test_json_path = '/Users/jeffreysherer/Desktop/Dissertation/Slake1.0/test.json'

# Directory containing the image folders with detection.json files
image_directory = '/Users/jeffreysherer/Dissertation/data/imgs-1'

# Load the validate.json file
with open(validate_json_path, 'r') as file:
    validate_data = json.load(file)

# Load the test.json file
with open(test_json_path, 'r') as file:
    test_data = json.load(file)

# Function to load detection data from detection.json
def load_detection_data(img_folder):
    detection_json_path = os.path.join(image_directory, img_folder, 'detection.json')
    if os.path.exists(detection_json_path):
        with open(detection_json_path, 'r') as file:
            return json.load(file)
    return None

# Function to update and transform the data with bounding box coordinates
def transform_data_with_bounding_boxes(data):
    transformed_data = []
    for entry in data:
        unique_id = str(uuid.uuid4())  # Generate a unique ID for each entry
        img_path = entry['img_name']  # Use the img_name as the image path
        question = entry['question']
        answer = entry['answer']

        # Extract the folder name from img_name to locate detection.json
        img_folder = os.path.dirname(img_path)
        detection_data = load_detection_data(img_folder)
        
        transformed_entry = {
            "id": unique_id,
            "image": img_path,
            "conversations": [
                {
                    "from": "human",
                    "value": f"<image>\nAnswer the question about this image. Question: {question}"
                },
                {
                    "from": "gpt",
                    "value": f"Answer: {answer}"
                }
            ]
        }
        
        if detection_data:
            transformed_entry['detection'] = detection_data
        else:
            transformed_entry['detection'] = []
        
        transformed_data.append(transformed_entry)
    
    return transformed_data

# Transform validate and test data
transformed_validate_data = transform_data_with_bounding_boxes(validate_data)
transformed_test_data = transform_data_with_bounding_boxes(test_data)

# Display the first few entries to verify the content
print(json.dumps(transformed_validate_data[:3], indent=4))  # Adjust the number of entries to display based on your need
print(json.dumps(transformed_test_data[:3], indent=4))  # Adjust the number of entries to display based on your need

# Paths to save the final transformed JSON files
final_validate_json_path = '/Users/jeffreysherer/Desktop/Dissertation/Slake1.0/augmented/final_validate.json'
final_test_json_path = '/Users/jeffreysherer/Desktop/Dissertation/Slake1.0/augmented/final_test.json'

# Write the transformed validate data to a new JSON file
with open(final_validate_json_path, 'w') as file:
    json.dump(transformed_validate_data, file, indent=4)

# Write the transformed test data to a new JSON file
with open(final_test_json_path, 'w') as file:
    json.dump(transformed_test_data, file, indent=4)

print("Final validate.json has been saved to", final_validate_json_path)
print("Final test.json has been saved to", final_test_json_path)


[
    {
        "id": "8f630305-f11c-4ed0-b2e0-a11b7a94a5e5",
        "image": "xmlab0/source.jpg",
        "conversations": [
            {
                "from": "human",
                "value": "<image>\nAnswer the question about this image. Question: What modality is used to take this image?"
            },
            {
                "from": "gpt",
                "value": "Answer: MRI"
            }
        ],
        "detection": [
            {
                "Liver": [
                    33.0,
                    67.0,
                    98.0,
                    108.0
                ]
            },
            {
                "Left Kidney": [
                    148.0,
                    142.0,
                    46.0,
                    32.0
                ]
            },
            {
                "Right Kidney": [
                    65.0,
                    138.0,
                    41.0,
                    37.0
                ]
            },
     

In [8]:
import os
import json
import uuid  # To generate unique IDs

# Function to load detection data from detection.json
def load_detection_data(img_folder):
    detection_json_path = os.path.join(image_directory, img_folder, 'detection.json')
    if os.path.exists(detection_json_path):
        with open(detection_json_path, 'r') as file:
            return json.load(file)
    return None

# Function to transform the data with detection coordinates after the answer
def transform_data_with_bounding_boxes_last(data):
    transformed_data = []
    for entry in data:
        unique_id = str(uuid.uuid4())  # Generate a unique ID for each entry
        img_path = entry['img_name']  # Use the img_name as the image path
        question = entry['question']
        answer = entry['answer']

        # Extract the folder name from img_name to locate detection.json
        img_folder = os.path.dirname(img_path)
        detection_data = load_detection_data(img_folder)
        
        transformed_entry = {
            "id": unique_id,
            "image": img_path,
            "conversations": [
                {
                    "from": "human",
                    "value": f"<image>\nAnswer the question about this image. Question: {question}"
                },
                {
                    "from": "gpt",
                    "value": f"Answer: {answer}"
                }
            ]
        }

        # Add detection data after the answer
        if detection_data:
            transformed_entry['detection'] = detection_data
        else:
            transformed_entry['detection'] = []
        
        transformed_data.append(transformed_entry)
    
    return transformed_data

# Transform the train, validate, and test data with detection coordinates last
transformed_train_data_bbl = transform_data_with_bounding_boxes_last(updated_train_data)
transformed_validate_data_bbl = transform_data_with_bounding_boxes_last(updated_validate_data)
transformed_test_data_bbl = transform_data_with_bounding_boxes_last(updated_test_data)

# Paths to save the final BBL JSON files
final_train_json_path_bbl = os.path.join(augmented_dir, 'BBL_train.json')
final_validate_json_path_bbl = os.path.join(augmented_dir, 'BBL_validate.json')
final_test_json_path_bbl = os.path.join(augmented_dir, 'BBL_test.json')

# Write the transformed train data with detection coordinates last to a new JSON file
with open(final_train_json_path_bbl, 'w') as file:
    json.dump(transformed_train_data_bbl, file, indent=4)

# Write the transformed validate data with detection coordinates last to a new JSON file
with open(final_validate_json_path_bbl, 'w') as file:
    json.dump(transformed_validate_data_bbl, file, indent=4)

# Write the transformed test data with detection coordinates last to a new JSON file
with open(final_test_json_path_bbl, 'w') as file:
    json.dump(transformed_test_data_bbl, file, indent=4)

print("BBL_train.json has been saved to", final_train_json_path_bbl)
print("BBL_validate.json has been saved to", final_validate_json_path_bbl)
print("BBL_test.json has been saved to", final_test_json_path_bbl)


In [None]:
import os
import json

# Function to count records with more than one detection coordinate
def count_multiple_detections(data):
    count = 0
    for entry in data:
        if 'detection' in entry and len(entry['detection']) > 1:
            count += 1
    return count

# Paths to the final BBL JSON files
final_train_json_path_bbl = '/Users/jeffreysherer/Desktop/Dissertation/Slake1.0/augmented/BBL_train.json'
final_validate_json_path_bbl = '/Users/jeffreysherer/Desktop/Dissertation/Slake1.0/augmented/BBL_validate.json'
final_test_json_path_bbl = '/Users/jeffreysherer/Desktop/Dissertation/Slake1.0/augmented/BBL_test.json'

# Load the final BBL train.json file
with open(final_train_json_path_bbl, 'r') as file:
    train_data_bbl = json.load(file)

# Load the final BBL validate.json file
with open(final_validate_json_path_bbl, 'r') as file:
    validate_data_bbl = json.load(file)

# Load the final BBL test.json file
with open(final_test_json_path_bbl, 'r') as file:
    test_data_bbl = json.load(file)

# Count records with more than one detection coordinate
train_multiple_detections = count_multiple_detections(train_data_bbl)
validate_multiple_detections = count_multiple_detections(validate_data_bbl)
test_multiple_detections = count_multiple_detections(test_data_bbl)

print(f"Train records with more than one detection coordinate: {train_multiple_detections}")
print(f"Validate records with more than one detection coordinate: {validate_multiple_detections}")
print(f"Test records with more than one detection coordinate: {test_multiple_detections}")


In [None]:
import json
import os

# Function to verify the dataset
def verify_dataset(file_path, detection_before_answer):
    with open(file_path, 'r') as file:
        data = json.load(file)
    
    for entry in data:
        conversations = entry.get('conversations', [])
        if len(conversations) == 2:
            human_conversation = conversations[0]['value']
            gpt_conversation = conversations[1]['value']
            detection_field = entry.get('detection', [])

            if detection_before_answer:
                # Detection before answer: Check if the detection info comes before the answer
                if 'Bounding box' not in gpt_conversation.split(';')[0]:
                    return False
            else:
                # Detection after answer: Check if the detection info comes after the answer
                if 'Bounding box' not in gpt_conversation.split(';')[-1]:
                    return False
    return True

# Define paths to BBF and BBL files
bbf_file_path = '/Users/jeffreysherer/Desktop/Dissertation/Slake1.0/augmented/BBF.json'
bbl_file_path = '/Users/jeffreysherer/Desktop/Dissertation/Slake1.0/augmented/BBL.json'

# Verify BBF.json
is_bbf_correct = verify_dataset(bbf_file_path, detection_before_answer=True)
print(f"BBF.json correct: {is_bbf_correct}")

# Verify BBL.json
is_bbl_correct = verify_dataset(bbl_file_path, detection_before_answer=False)
print(f"BBL.json correct: {is_bbl_correct}")


In [None]:
import os
import json
import uuid

# Directory containing the image folders with detection.json files
image_directory = '/Users/jeffreysherer/Dissertation/data/imgs-1'

# Base path where the original JSON files are located
base_path = '/Users/jeffreysherer/Desktop/Dissertation/Slake1.0'

# Directory for the augmented files
augmented_dir = os.path.join(base_path, 'augmented')
os.makedirs(augmented_dir, exist_ok=True)

# Paths to the original train, validate, and test JSON files
train_json_path = os.path.join(base_path, 'train.json')
validate_json_path = os.path.join(base_path, 'validate.json')
test_json_path = os.path.join(base_path, 'test.json')

# Load the JSON files
with open(train_json_path, 'r') as file:
    train_data = json.load(file)

with open(validate_json_path, 'r') as file:
    validate_data = json.load(file)

with open(test_json_path, 'r') as file:
    test_data = json.load(file)

# Function to load detection data from detection.json
def load_detection_data(img_folder):
    detection_json_path = os.path.join(image_directory, img_folder, 'detection.json')
    if os.path.exists(detection_json_path):
        with open(detection_json_path, 'r') as file:
            return json.load(file)
    return None

# Function to transform the data with detection coordinates
def transform_data(data, detection_before_answer=True):
    transformed_data = []
    for entry in data:
        unique_id = str(uuid.uuid4())
        img_path = entry['img_name']
        question = entry['question']
        answer = entry['answer']

        img_folder = os.path.dirname(img_path)
        detection_data = load_detection_data(img_folder)

        if detection_before_answer:
            gpt_value = f"Bounding box: {list(detection_data.values())[0]}; Class: {list(detection_data.keys())[0]}; Answer: {answer}" if detection_data else f"Answer: {answer}"
        else:
            gpt_value = f"Answer: {answer}; Class: {list(detection_data.keys())[0]}; Bounding box: {list(detection_data.values())[0]}" if detection_data else f"Answer: {answer}"

        transformed_entry = {
            "id": unique_id,
            "image": img_path,
            "conversations": [
                {
                    "from": "human",
                    "value": f"<image>\nAnswer the question about this image. Question: {question}"
                },
                {
                    "from": "gpt",
                    "value": gpt_value
                }
            ]
        }
        if detection_data:
            transformed_entry['detection'] = detection_data
        transformed_data.append(transformed_entry)
    return transformed_data

# Transform and save the datasets
def save_transformed_data(data, file_path, detection_before_answer=True):
    transformed_data = transform_data(data, detection_before_answer)
    with open(file_path, 'w') as file:
        json.dump(transformed_data, file, indent=4)

# Define paths to BBF and BBL files
bbf_train_path = os.path.join(augmented_dir, 'BBF_train.json')
bbf_validate_path = os.path.join(augmented_dir, 'BBF_validate.json')
bbf_test_path = os.path.join(augmented_dir, 'BBF_test.json')

bbl_train_path = os.path.join(augmented_dir, 'BBL_train.json')
bbl_validate_path = os.path.join(augmented_dir, 'BBL_validate.json')
bbl_test_path = os.path.join(augmented_dir, 'BBL_test.json')

# Save BBF datasets
save_transformed_data(train_data, bbf_train_path, detection_before_answer=True)
save_transformed_data(validate_data, bbf_validate_path, detection_before_answer=True)
save_transformed_data(test_data, bbf_test_path, detection_before_answer=True)

# Save BBL datasets
save_transformed_data(train_data, bbl_train_path, detection_before_answer=False)
save_transformed_data(validate_data, bbl_validate_path, detection_before_answer=False)
save_transformed_data(test_data, bbl_test_path, detection_before_answer=False)

print(f"BBF_train.json has been saved to {bbf_train_path}")
print(f"BBF_validate.json has been saved to {bbf_validate_path}")
print(f"BBF_test.json has been saved to {bbf_test_path}")

print(f"BBL_train.json has been saved to {bbl_train_path}")
print(f"BBL_validate.json has been saved to {bbl_validate_path}")
print(f"BBL_test.json has been saved to {bbl_test_path}")

# Verification function
def verify_dataset(file_path, detection_before_answer):
    with open(file_path, 'r') as file:
        data = json.load(file)
    
    for entry in data:
        conversations = entry.get('conversations', [])
        if len(conversations) == 2:
            gpt_conversation = conversations[1]['value']
            if detection_before_answer:
                if 'Bounding box' not in gpt_conversation.split(';')[0]:
                    return False
            else:
                if 'Bounding box' not in gpt_conversation.split(';')[-1]:
                    return False
    return True

# Verify BBF.json
is_bbf_correct = verify_dataset(bbf_train_path, detection_before_answer=True)
print(f"BBF_train.json correct: {is_bbf_correct}")

# Verify BBL.json
is_bbl_correct = verify_dataset(bbl_train_path, detection_before_answer=False)
print(f"BBL_train.json correct: {is_bbl_correct}")
