In [1]:
import json

def save_multiple_qa_to_jsonl(qa_list, filepath):
    """
    Save multiple question-answer pairs into a JSONL file at once.
    
    Args:
        qa_list (list of dicts): Each dict must have 'question' and 'answer' keys.
        filepath (str): Path to the .jsonl file.
    """
    with open(filepath, 'a', encoding='utf-8') as f:
        for qa in qa_list:
            json_line = json.dumps(qa, ensure_ascii=False)
            f.write(json_line + '\n')


In [8]:
import pandas as pd

def csv_to_jsonl(csv_path, jsonl_path):
    df = pd.read_csv(csv_path)
    qa_list = df.to_dict(orient='records')
    save_multiple_qa_to_jsonl(qa_list, jsonl_path)

# Example:
csv_to_jsonl("data.csv", "training_dataset.jsonl")


In [6]:
import json

def save_qa_to_jsonl(question, answer, filepath):
    """
    Save a question-answer pair into a JSONL file, preserving formatting.
    
    Args:
        question (str): The natural language question or prompt.
        answer (str): The code snippet (answer).
        filepath (str): Path to the .jsonl file.
    """
    data = {
        "question": question,
        "answer": answer
    }
    
    with open(filepath, 'a', encoding='utf-8') as f:
        json_line = json.dumps(data, ensure_ascii=False)
        f.write(json_line + '\n')

# Example usage:
if __name__ == "__main__":
    q = """check if variable x is a set containing "foo" and "bar"""
    a = """x == {"foo", "bar"}"""
    
    save_qa_to_jsonl(q, a, "training_dataset.jsonl")
