In [2]:
import os
import requests
import json
import time
from pathlib import Path

def query_llama(prompt, url="http://10.127.30.113:11434/api/generate"):
    payload = {
        "model": "llama3.1",
        "prompt": prompt
    }
    full_response = ""
    try:
        response = requests.post(url, json=payload, stream=True)
        for line in response.iter_lines():
            if line:
                json_response = json.loads(line)
                if 'response' in json_response:
                    full_response += json_response['response']
                if json_response.get('done', False):
                    break
        return full_response
    except Exception as e:
        return f"Error: {e}"

def clean_variation(text):
    # Remove numbering prefixes (e.g., "1.", "2.", etc.)
    text = text.strip()
    for i in range(1, 11):
        if text.startswith(f"{i}."):
            text = text[len(str(i))+1:].strip()
    
    # Remove asterisks and other formatting
    text = text.replace('*', '').strip()
    
    # Remove labels like "Original:", "Variation:", etc.
    prefixes_to_remove = ["Original:", "Variation:", "Alternative phrase:", "Technical term:", 
                         "Focused query:", "Clinical context:", "Specific anatomy:", 
                         "Comparative query:", "This variation"]
    for prefix in prefixes_to_remove:
        if text.startswith(prefix):
            text = text[len(prefix):].strip()
    
    return text

def process_batch_file(input_file, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    
    with open(input_file, 'r') as f:
        questions = [q.strip() for q in f.readlines() if q.strip()]
    
    batch_num = os.path.basename(input_file).replace('batch_', '').replace('.txt', '')
    output_file = os.path.join(output_dir, f'paraphrased_batch_{batch_num}.txt')
    
    with open(output_file, 'w') as out_f:
        for idx, question in enumerate(questions, 1):
            prompt = f'''You are an expert radiologist. I need exactly 10 different ways to ask this radiology question. Each variation must keep the exact same medical meaning but use different wording. Use proper medical terminology and maintain clinical accuracy. Each variation should be appropriate for a professional radiology report. Please provide only the variations without any additional text or numbering.

Original question: "{question}"'''
            
            response = query_llama(prompt)
            
            # Format output
            out_f.write(f"Prompt Set {idx}: {question}\n")
            out_f.write(f"Original: {question}\n")
            
            # Process variations
            variations = [v.strip() for v in response.split('\n') if v.strip()]
            valid_variations = []
            
            for var in variations:
                cleaned_var = clean_variation(var)
                if cleaned_var and cleaned_var != question:
                    valid_variations.append(cleaned_var)
            
            # Write exactly 10 variations
            for var_idx, variation in enumerate(valid_variations[:10], 1):
                out_f.write(f"Variation {var_idx}: {variation}\n")
            
            out_f.write("\n")  # Add blank line between prompt sets
            time.sleep(1)
            
        print(f"Processed {input_file} -> {output_file}")

def main():
    input_dir = "/share/ssddata/sarimhashmi/iuxray_posix_prompts/text_batches"
    output_dir = "/share/ssddata/sarimhashmi/iuxray_posix_prompts/paraphrase_prompts"
    
    for batch_file in sorted(os.listdir(input_dir)):
        if batch_file.startswith('batch_') and batch_file.endswith('.txt'):
            input_file = os.path.join(input_dir, batch_file)
            process_batch_file(input_file, output_dir)

if __name__ == "__main__":
    main()

Processed /share/ssddata/sarimhashmi/iuxray_posix_prompts/text_batches/batch_1.txt -> /share/ssddata/sarimhashmi/iuxray_posix_prompts/paraphrase_prompts/paraphrased_batch_1.txt
Processed /share/ssddata/sarimhashmi/iuxray_posix_prompts/text_batches/batch_10.txt -> /share/ssddata/sarimhashmi/iuxray_posix_prompts/paraphrase_prompts/paraphrased_batch_10.txt
Processed /share/ssddata/sarimhashmi/iuxray_posix_prompts/text_batches/batch_11.txt -> /share/ssddata/sarimhashmi/iuxray_posix_prompts/paraphrase_prompts/paraphrased_batch_11.txt
Processed /share/ssddata/sarimhashmi/iuxray_posix_prompts/text_batches/batch_12.txt -> /share/ssddata/sarimhashmi/iuxray_posix_prompts/paraphrase_prompts/paraphrased_batch_12.txt
Processed /share/ssddata/sarimhashmi/iuxray_posix_prompts/text_batches/batch_13.txt -> /share/ssddata/sarimhashmi/iuxray_posix_prompts/paraphrase_prompts/paraphrased_batch_13.txt
Processed /share/ssddata/sarimhashmi/iuxray_posix_prompts/text_batches/batch_14.txt -> /share/ssddata/sarim