In [None]:
def fix_encoding(text):
    """
    Carefully fix common encoding issues without changing text structure
    """
    # Mapping of common problematic character sequences
    encoding_fixes = {
        'ÃƒÂ': 'A',
        'Ã': 'A',
        'ƒÂ': '',
        'â€™': "'",  # Smart quote
        'â€œ': '"',  # Left double quote
        'â€': '"',   # Right double quote
        'Ã©': 'e',
        'Ã¨': 'e',
        'Ã': 'A',
        'Â': ' '
    }
    
    # Apply replacements
    for bad_char, replacement in encoding_fixes.items():
        text = text.replace(bad_char, replacement)
    
    return text

def clean_json_with_annotations(input_file, output_file):
    """
    Clean encoding while preserving JSON structure and annotations
    """
    import json
    
    # Read the original file
    with open(input_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    # Recursive cleaning function
    def deep_clean(obj):
        if isinstance(obj, dict):
            return {k: deep_clean(v) for k, v in obj.items()}
        elif isinstance(obj, list):
            return [deep_clean(item) for item in obj]
        elif isinstance(obj, str):
            return fix_encoding(obj)
        else:
            return obj
    
    # Clean the entire data structure
    cleaned_data = deep_clean(data)
    
    # Write the cleaned data
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(cleaned_data, f, ensure_ascii=False, indent=2)
    
    print(f"File cleaned and saved to {output_file}")

# Usage
input_file = r'd:\OneDrive - Personal\final.json'
output_file = 'cleaned_pastde.json'
clean_json_with_annotations(input_file, output_file)

File cleaned and saved to cleaned_pastde.json
