In [1]:
import pandas as pd
import json

In [4]:
def map_query_type(df, row, query_type):

  # General (0) vs. specific (1)
  temp = df.loc[row,"General (0) vs. Specific (1)"]
  query_type["Specific"] = int(temp)

  # Objective (0) vs. subjective (1)
  temp = df.loc[row,"Objective (0) vs. Subjective (1)"]
  query_type["Subjective"] = int(temp)

  # Indirect (0) vs. direct (1)
  temp = df.loc[row,"Indirect (0) vs. Direct (1)"]
  query_type["Indirect"] = 1 - int(temp)

  # Simple (0) vs. compound (1) logic (AND/OR)
  temp = df.loc[row,"Simple (0) vs. Compound (AND/OR) Logic (1)"]
  query_type["Compound"] = int(temp)

  # Negation (1)
  query_type["Negated"] = int(df.loc[row,"Negation (1)"])

  # Analogical (1)
  query_type["Analogical"] = int(df.loc[row,"Analogical (1)"])

  # Temporal (1)
  query_type["Temporal"] = int(df.loc[row,"Temporal (1)"])

def map_options_explanations(df, row, options, answer, incorrectness_explanations, correctness_explanation):
  temp = df.loc[row, "Correct Answer"]
  id_description = temp.split(": ")

  # correctness explanation dictionary
  correctness_explanation = json.loads(df.loc[row,"term_matching"])

  # correct recipe id as an option mapped to description
  options[id_description[0]] = df.loc[row,"correct_description"]

  for i in range (1, 5):
    option_string = "Option " + str(i)
    description_string = "description_" + str(i)
    explanation_string = "explanation_" + str(i)

    temp = df.loc[row, option_string]
    id_description = temp.split(": ")
    
    # option recipe id mapped to description
    options[id_description[0]] = df.loc[row, description_string]
    
    # option recipe id mapped to explanation
    incorrectness_explanations[id_description[0]] = df.loc[row, explanation_string]

def convert_data(input_file, output_file):
  df = pd.read_csv(input_file)

  # list of dictionaries 
  data = []

  for row in range(len(df)):  
    item = {}  
    query_type = {}
    options = {}
    incorrectness_explanations = {}
    correctness_explanation = {}

    # formatting and extracting properties from csv
    query = df.loc[row, "Query"]
    correctness_explanation = json.loads(df.loc[row,"term_matching"])
    answer = df.loc[row, "Correct Answer"].split(": ")[0]

    map_query_type(df, row, query_type)
    map_options_explanations(df, row, options, answer, incorrectness_explanations, correctness_explanation)

    # mapping properties into item dictionary
    item["query"] = query
    item["query_type"] = query_type
    item["options"] = options
    item["answer"] = answer
    item["incorrectness_explanations"] = incorrectness_explanations
    item["correctness_explanation"] = correctness_explanation

    data.append(item)
    
  # converting dictionary to json using function from above
  with open(output_file, "w") as f:
    f.write(json.dumps(data, sort_keys=False, indent=4, separators=(',', ': '))) 

Change input and output file paths below, naming the output file "[first_initial].json"

In [None]:
input_file = "h.csv"
output_file = "h.json"

convert_data(input_file, output_file)