Loading JSON Data and Detection Data

In [1]:
import os
import json

def load_json_data(filepath):
    """Load JSON data from a file."""
    print(f"Loading data from {filepath}")
    with open(filepath, 'r') as file:
        data = json.load(file)
    print(f"Loaded {len(data)} records from {filepath}")
    return data

def load_detection_data(img_folder):
    """Load detection data from the specified folder."""
    image_directory = '/Users/jeffreysherer/Dissertation/data/imgs-1'
    detection_json_path = os.path.join(image_directory, img_folder, 'detection.json')
    print(f"Attempting to load detection data from {detection_json_path}")
    if os.path.exists(detection_json_path):
        with open(detection_json_path, 'r') as file:
            detections = json.load(file)
        print(f"Loaded detection data for {img_folder}: {detections}")
        return detections
    else:
        print(f"No detection data found for {img_folder}")
        return []


Integrating Detection Data with Train, Validate, and Test Data

In [2]:
def integrate_detection_data(data):
    """Integrate detection data into the main dataset entries."""
    for item in data:
        folder_name = item['img_name'].split('/')[0]  # Assuming folder name is part of img_name
        detection_data = load_detection_data(folder_name)
        
        # Debugging: Print which folder's data is being processed
        print(f"Processing {folder_name}, detection data: {detection_data}")
        
        if detection_data:
            item['detection'] = detection_data
        else:
            item['detection'] = []  # Ensure there's a 'detection' key even if no data
        
    return data

# Set up file paths
base_path = '/Users/jeffreysherer/Desktop/Dissertation/Slake1.0'
train_json_path = os.path.join(base_path, 'train.json')
validate_json_path = os.path.join(base_path, 'validate.json')
test_json_path = os.path.join(base_path, 'test.json')

# Load and process data
train_data = load_json_data(train_json_path)
validate_data = load_json_data(validate_json_path)
test_data = load_json_data(test_json_path)

updated_train_data = integrate_detection_data(train_data)
updated_validate_data = integrate_detection_data(validate_data)
updated_test_data = integrate_detection_data(test_data)


Loading data from /Users/jeffreysherer/Desktop/Dissertation/Slake1.0/train.json
Loaded 9835 records from /Users/jeffreysherer/Desktop/Dissertation/Slake1.0/train.json
Loading data from /Users/jeffreysherer/Desktop/Dissertation/Slake1.0/validate.json
Loaded 2099 records from /Users/jeffreysherer/Desktop/Dissertation/Slake1.0/validate.json
Loading data from /Users/jeffreysherer/Desktop/Dissertation/Slake1.0/test.json
Loaded 2094 records from /Users/jeffreysherer/Desktop/Dissertation/Slake1.0/test.json
Attempting to load detection data from /Users/jeffreysherer/Dissertation/data/imgs-1/xmlab1/detection.json
Loaded detection data for xmlab1: [{'Liver': [54.0, 106.0, 30.0, 31.0]}]
Processing xmlab1, detection data: [{'Liver': [54.0, 106.0, 30.0, 31.0]}]
Attempting to load detection data from /Users/jeffreysherer/Dissertation/data/imgs-1/xmlab1/detection.json
Loaded detection data for xmlab1: [{'Liver': [54.0, 106.0, 30.0, 31.0]}]
Processing xmlab1, detection data: [{'Liver': [54.0, 106.0, 3

Transforming Data to Include Detection in Conversations

In [3]:
import uuid

def transform_data(data, detection_before_answer=True):
    """Transform the dataset to include detection data in the conversation field."""
    transformed_data = []
    for entry in data:
        unique_id = str(uuid.uuid4())  # Generate a unique ID for each entry
        img_path = entry['img_name']
        question = entry['question']
        answer = entry['answer']

        # Construct the detection string if available
        detection_info = ""
        if 'detection' in entry and entry['detection']:
            for detection in entry['detection']:
                class_name, bbox = next(iter(detection.items()))
                detection_info += f"Class: {class_name}; Bounding box: {bbox}; "

        if detection_before_answer:
            gpt_value = f"{detection_info}Answer: {answer}" if detection_info else f"Answer: {answer}"
        else:
            gpt_value = f"Answer: {answer}; {detection_info}" if detection_info else f"Answer: {answer}"

        # Assemble the transformed entry
        transformed_entry = {
            "id": unique_id,
            "image": img_path,
            "conversations": [
                {"from": "human", "value": f"<image>\n{question}"},
                {"from": "gpt", "value": gpt_value}
            ]
        }
        transformed_data.append(transformed_entry)
    return transformed_data

# Transform all datasets
transformed_train_data_bbf = transform_data(updated_train_data, detection_before_answer=True)
transformed_train_data_bbl = transform_data(updated_train_data, detection_before_answer=False)
transformed_validate_data_bbf = transform_data(updated_validate_data, detection_before_answer=True)
transformed_validate_data_bbl = transform_data(updated_validate_data, detection_before_answer=False)
transformed_test_data_bbf = transform_data(updated_test_data, detection_before_answer=True)
transformed_test_data_bbl = transform_data(updated_test_data, detection_before_answer=False)


Saving Transformed Data to Files

In [4]:
# Function to save data to a file
def save_to_file(data, file_path):
    try:
        with open(file_path, 'w') as file:
            json.dump(data, file, indent=4)
        print(f"Data successfully saved to {file_path}")
    except Exception as e:
        print(f"Failed to save data to {file_path}: {str(e)}")

# Save transformed data to files
save_to_file(transformed_train_data_bbf, os.path.join(base_path, 'augmented', 'BBF_train.json'))
save_to_file(transformed_train_data_bbl, os.path.join(base_path, 'augmented', 'BBL_train.json'))
save_to_file(transformed_validate_data_bbf, os.path.join(base_path, 'augmented', 'BBF_validate.json'))
save_to_file(transformed_validate_data_bbl, os.path.join(base_path, 'augmented', 'BBL_validate.json'))
save_to_file(transformed_test_data_bbf, os.path.join(base_path, 'augmented', 'BBF_test.json'))
save_to_file(transformed_test_data_bbl, os.path.join(base_path, 'augmented', 'BBL_test.json'))


Data successfully saved to /Users/jeffreysherer/Desktop/Dissertation/Slake1.0/augmented/BBF_train.json
Data successfully saved to /Users/jeffreysherer/Desktop/Dissertation/Slake1.0/augmented/BBL_train.json
Data successfully saved to /Users/jeffreysherer/Desktop/Dissertation/Slake1.0/augmented/BBF_validate.json
Data successfully saved to /Users/jeffreysherer/Desktop/Dissertation/Slake1.0/augmented/BBL_validate.json
Data successfully saved to /Users/jeffreysherer/Desktop/Dissertation/Slake1.0/augmented/BBF_test.json
Data successfully saved to /Users/jeffreysherer/Desktop/Dissertation/Slake1.0/augmented/BBL_test.json
