# Conversion of LLM JSON files to CSV
This file contains the code to convert the individual JSON responses from the LLM to a single CSV of all of the labels.

This is **step 2** for obtaining and analyzing the LLM produced annotations:

1. The notebook `run_T1_MRI_Parameters_GPT-4o.ipynb` accesses the OpenAI API and saves the JSON responses from the LLM.
2. `convert_MRI_JSON_parameters_to_CSV.ipynb` (this notebook) converts the JSON files from the LLM to a single CSV.

## Setup & Housekeeping

In [None]:
import os
import json
import pandas as pd
from dotenv import load_dotenv, find_dotenv
import numpy as np

In [None]:
dotenvfile = find_dotenv()
load_dotenv(dotenvfile)      # Apparently no issues if null

if dotenvfile == '':
    print("No dotenv file.") # Also acceptable to use other environment variables

ROOT_DIR = os.getenv('ROOT_DIR')
print("ROOT_DIR set to: " + ROOT_DIR)

directory = ROOT_DIR + "/LLM_Experiments/Task_2_Structural_MRI_Parameters/Results/Inner_JSON/OpenAI/GPT-4o/"
print(directory)

### Collect the files with the LLM responses

In [None]:
files = os.listdir(directory)
files = [os.path.join(directory, f) for f in files if os.path.isfile(os.path.join(directory, f))]

print(len(files))

In [None]:
def process_json_file(file_path):
    identifier = os.path.splitext(os.path.basename(file_path))[0].split("_")[0]
    with open(file_path, 'r') as f:
        data = json.load(f)
        
        # Helper function to handle missing values (JSON null -> NaN)
        def get_value(d, key):
            return d.get(key) if d.get(key) is not None else np.nan
        
        # Extract the data based on the prototype JSON structure
        row = {
            "pmcid": identifier,
            "T": get_value(data["T"], "value"),
            "TR": get_value(data["TR"], "value"),
            "TE": get_value(data["TE"], "value"),
            "inversion_time": get_value(data["TI"], "value"),
            "flip_angle": get_value(data["flip_angle"], "value"),
            "FOV_x": get_value(data["FOV"]["value"], "x"),
            "FOV_y": get_value(data["FOV"]["value"], "y"),
            "FOV_z": get_value(data["FOV"]["value"], "z"),
            "voxel_size_x": get_value(data["voxel_size"]["value"], "x"),
            "voxel_size_y": get_value(data["voxel_size"]["value"], "y"),
            "voxel_size_z": get_value(data["voxel_size"]["value"], "z"),
            "matrix_x": get_value(data["matrix_size"]["value"], "x"),
            "matrix_y": get_value(data["matrix_size"]["value"], "y"),
            "matrix_z": get_value(data["matrix_size"]["value"], "z"),
            "slice_thickness": get_value(data["slice_thickness"], "value"),
            "acquisition_time": get_value(data["acquisition_time"], "value"),
            "number_of_slices": get_value(data["number_of_slices"], "value"),
            "image_orientation": get_value(data["image_orientation"], "value")
        }
        
        return row


In [None]:
def load_json_files_to_dataframe(files):
    data_rows = []

    # Loop through all files in the directory
    for filename in files:
        row = process_json_file(filename)
        data_rows.append(row)

    # Convert list of dictionaries to DataFrame
    df = pd.DataFrame(data_rows)
    return df

In [None]:
def save_dataframe_to_csv(df, output_csv_file):
    df.to_csv(output_csv_file, index=False)

In [None]:
x = load_json_files_to_dataframe(files)

In [None]:
x.head()

In [None]:
save_dataframe_to_csv(x, 'llm_mri_parameters.csv')