In [3]:
import os
import json

def rename_subdirectories(base_dir, suffix="-exported"):
    """Rename immediate subdirectories to end with the specified suffix (default is '-exported')."""
    with os.scandir(base_dir) as entries:
        for entry in entries:
            if entry.is_dir() and not entry.name.endswith(suffix):
                target_path = f"{entry.path}{suffix}"
                if not os.path.exists(target_path):
                    os.rename(entry.path, target_path)
                else:
                    print(f"Skipping rename for {entry.path} as {target_path} already exists.")

def extract_valid_jsonl(base_dir, suffix="-exported"):
    """Extract valid JSON lines from .jsonl files in subdirectories not ending with the specified suffix."""
    valid_jsons = []
    with os.scandir(base_dir) as entries:
        for entry in entries:
            if entry.is_dir() and not entry.name.endswith(suffix):
                for file in os.scandir(entry.path):
                    if file.is_file() and file.name.endswith(".jsonl"):
                        try:
                            with open(file.path, 'r', encoding='utf-8') as f:
                                valid_jsons.extend(
                                    json_obj for line in filter(None, map(str.strip, f))
                                    if (json_obj := json.loads(line)) and isinstance(json_obj, dict)
                                )
                        except (OSError, json.JSONDecodeError) as e:
                            print(f"Error processing {file.path}: {e}   Not a valid JSON line - SKipping")
    return valid_jsons


In [5]:
if __name__ == "__main__":
    base_dir=r"C:\Users\tan_poh_keam\OneDrive - Republic Polytechnic\1. Projects\UBTS\msg_app\outputs\crew - Copy"
    if os.path.isdir(base_dir):
        valid_data = extract_valid_jsonl(base_dir=base_dir, suffix="-exportedxx")  # sequence
        rename_subdirectories(base_dir=base_dir, suffix="-exportedxx")  # sequence is important
        for item in valid_data:
            print(item)
    else:
        print(f"The directory {base_dir} does not exist.")
        
        
    

Error processing C:\Users\tan_poh_keam\OneDrive - Republic Polytechnic\1. Projects\UBTS\msg_app\outputs\crew - Copy\20250228_141548\email_drafting_task.jsonl: Expecting value: line 1 column 1 (char 0)   Not a valid JSON line - SKipping
Error processing C:\Users\tan_poh_keam\OneDrive - Republic Polytechnic\1. Projects\UBTS\msg_app\outputs\crew - Copy\20250228_151759\email_drafting_task.jsonl: Expecting value: line 1 column 1 (char 0)   Not a valid JSON line - SKipping
Error processing C:\Users\tan_poh_keam\OneDrive - Republic Polytechnic\1. Projects\UBTS\msg_app\outputs\crew - Copy\20250228_152101\email_drafting_task.jsonl: Expecting value: line 1 column 1 (char 0)   Not a valid JSON line - SKipping
Error processing C:\Users\tan_poh_keam\OneDrive - Republic Polytechnic\1. Projects\UBTS\msg_app\outputs\crew - Copy\20250228_162142\email_drafting_task.jsonl: Expecting value: line 1 column 1 (char 0)   Not a valid JSON line - SKipping
Error processing C:\Users\tan_poh_keam\OneDrive - Republ