In [1]:
import csv
import json
import os
import sys

In [2]:
def convert_csv_to_squad(input_csv_path, output_json_path):
    """
    Convert the templates CSV file to SQuAD-like JSON format.
    
    CSV columns:
    - dataset
    - Input
    - Question
    - Logical Forms
    - Answer Concepts
    
    Output JSON format:
    {
      "data": [
        {
          "question": "...",
          "context": "...",
          "answer": "..."
        },
        ...
      ]
    }
    """
    squad_data = {"data": []}
    
    with open(input_csv_path, 'r', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        
        # Process each row
        for row in reader:
            entry = {
                "question": row.get('Question', ''),
                "context": row.get('Input', ''),
                "answer": row.get('Answer Concepts', '')
            }
            
            # Only add non-empty entries
            if entry["question"] and entry["context"] and entry["answer"]:
                squad_data["data"].append(entry)
    
    # Write output JSON file
    with open(output_json_path, 'w', encoding='utf-8') as jsonfile:
        json.dump(squad_data, jsonfile, indent=2, ensure_ascii=False)
    
    print(f"Conversion complete! Created {output_json_path} with {len(squad_data['data'])} entries.")
    return True

In [4]:
if __name__ == "__main__":
    # Set paths directly in the script
    input_csv_path = "/Users/casey/Documents/GitHub/LLM_Healthcare/templates-all.csv"
    
    # Create output path in the same directory
    output_dir = os.path.dirname(input_csv_path)
    output_json_path = os.path.join(output_dir, "squad_format.json")
    
    # Check if input file exists
    if not os.path.exists(input_csv_path):
        print(f"Error: Input file {input_csv_path} does not exist.")
        exit(1)
    
    # Perform conversion
    success = convert_csv_to_squad(input_csv_path, output_json_path)
    
    if success:
        print(f"Successfully converted {input_csv_path} to {output_json_path}")
    else:
        print("Conversion failed")

Conversion complete! Created /Users/casey/Documents/GitHub/LLM_Healthcare/squad_format.json with 142 entries.
Successfully converted /Users/casey/Documents/GitHub/LLM_Healthcare/templates-all.csv to /Users/casey/Documents/GitHub/LLM_Healthcare/squad_format.json
