In [1]:
import csv

def extract_qa_pairs(input_file, output_file):
    """
    Extract question-answer pairs from a text file and save to CSV.
    
    Format of the text file:
    - Question on one line
    - Answer on the next line(s)
    - Blank line separates each Q&A pair
    
    Parameters:
    - input_file: Path to the input text file
    - output_file: Path to the output CSV file
    """
    # Read the text file
    with open(input_file, 'r', encoding='utf-8') as f:
        content = f.read()
    
    # Split the content by empty lines to get QA blocks
    qa_blocks = content.split('\n\n')
    
    # Process each block to extract question and answer
    qa_pairs = []
    for block in qa_blocks:
        if not block.strip():  # Skip empty blocks
            continue
        
        # Split the block by newline to separate question from answer
        lines = block.strip().split('\n')
        
        if len(lines) >= 1:
            question = lines[0].strip()
            # Join all remaining lines as the answer
            answer = '\n'.join(lines[1:]).strip()
            
            # Add the pair only if both question and answer exist
            if question and answer:
                qa_pairs.append([question, answer])
    
    # Write to CSV
    with open(output_file, 'w', encoding='utf-8', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['Question', 'Answer'])  # Header row
        writer.writerows(qa_pairs)
    
    print(f"Successfully extracted {len(qa_pairs)} Q&A pairs to {output_file}")

# Example usage
if __name__ == "__main__":
    input_file = "input.txt"  # Replace with your input file path
    output_file = "qa_pairs.csv"  # Replace with your desired output file path
    extract_qa_pairs(input_file, output_file)

Successfully extracted 344 Q&A pairs to qa_pairs.csv
