In [13]:
import json
import os

def labelstudio_to_doccano(labelstudio_json):
    """Convert Label Studio JSON to Doccano JSONL."""
    doccano_data = []
    
    for entry in json.loads(labelstudio_json):
        text = entry["data"]["text"]
        labels = []
        
        for annotation in entry["annotations"]:
            for result in annotation["result"]:
                value = result["value"]
                labels.append([value["start"], value["end"], value["labels"][0]])
        
        doccano_entry = {
            "id": entry["id"],
            "text": text,
            "label": labels,
            "Comments": []
        }
        doccano_data.append(doccano_entry)  # Append as dict, not JSON string
    
    return doccano_data  # Return list of dicts

def convert_files(input_dir, output_dir):
    """Convert Label-Studio JSON files to Doccano JSONL format."""
    try:
        for root, _, files in os.walk(input_dir):
            for file in files:
                if not file.endswith(".json"):  # Ensure processing only JSON files
                    continue
                input_path = os.path.join(root, file)
                relative_path = os.path.relpath(input_path, input_dir)
                relative_path = os.path.splitext(relative_path)[0] 
                output_path = os.path.join(output_dir, relative_path + ".jsonl")
                
                os.makedirs(os.path.dirname(output_path), exist_ok=True)
                
                ls_data = []
                with open(input_path, "r", encoding="utf-8") as file:
                    try:
                        ls_data = json.load(file)  # Load full JSON file
                    except json.JSONDecodeError as e:
                        print(f"Skipping invalid file {input_path}: {e}")
                        continue
                
                doccano_json = labelstudio_to_doccano(json.dumps(ls_data))  # Convert LS JSON to Doccano JSON
    
                with open(output_path, "w", encoding="utf-8") as file:
                    for entry in doccano_json:
                        file.write(json.dumps(entry, ensure_ascii=False) + "\n")  # Write each entry as a single line
                
                print(f"Conversion complete! File saved to {output_path}")
    except:
        print("Error in writing to file")

def main():
    input_dir = "../Label-Studio_Annotations/JSON_Reports_Annotated"
    output_dir = "../Doccano_Annotations/Label-Studio_to_Doccano/JSON_Reports_Annotated"
    convert_files(input_dir, output_dir)

if __name__ == "__main__":
    main()


Conversion complete! File saved to ../Doccano_Annotations/Label-Studio_to_Doccano/JSON_Reports_Annotated\9014961272\602581.jsonl
Conversion complete! File saved to ../Doccano_Annotations/Label-Studio_to_Doccano/JSON_Reports_Annotated\9014961272\649968.jsonl
