In [43]:
import os
import re
from pathlib import Path

# Mapping of country names to their ISO 3-letter codes
# IMF African Department (AFR) member countries
COUNTRY_CODES = {
    'algeria': 'DZA',
    'angola': 'AGO',
    'benin': 'BEN',
    'botswana': 'BWA',
    'burkina': 'BFA',
    'burkina_faso': 'BFA',
    'burundi': 'BDI',
    'cameroon': 'CMR',
    'cabo_verde': 'CPV',
    'cape_verde': 'CPV',
    'central_african': 'CAF',
    'central_african_republic': 'CAF',
    'chad': 'TCD',
    'comoros': 'COM',
    'congo_democratic': 'COD',
    'congo_democratic_republic_of_the': 'COD',
    'congo_republic_of': 'COG',
    'congo_republic': 'COG',
    'c√¥te_divoire': 'CIV',
    'cote_divoire': 'CIV',
    'djibouti': 'DJI',
    'egypt': 'EGY',
    'equatorial_guinea': 'GNQ',
    'eritrea': 'ERI',
    'eswatini': 'SWZ',
    'ethiopia': 'ETH',
    'gabon': 'GAB',
    'gambia': 'GMB',
    'ghana': 'GHA',
    'guinea': 'GIN',
    'guinea_bissau': 'GNB',
    'guineabissau': 'GNB',
    'kenya': 'KEN',
    'lesotho': 'LSO',
    'liberia': 'LBR',
    'libya': 'LBY',
    'madagascar': 'MDG',
    'malawi': 'MWI',
    'mali': 'MLI',
    'mauritania': 'MRT',
    'mauritius': 'MUS',
    'morocco': 'MAR',
    'mozambique': 'MOZ',
    'namibia': 'NAM',
    'niger': 'NER',
    'nigeria': 'NGA',
    'rwanda': 'RWA',
    'sao_tome': 'STP',
    'sao_tome_and_principe': 'STP',
    'senegal': 'SEN',
    'seychelles': 'SYC',
    'sierra_leone': 'SLE',
    'somalia': 'SOM',
    'south_africa': 'ZAF',
    'south_sudan': 'SSD',
    'sudan': 'SDN',
    'tanzania': 'TZA',
    'tanzania_united_republic_of': 'TZA',
    'togo': 'TGO',
    'uganda': 'UGA',
    'zambia': 'ZMB',
    'zimbabwe': 'ZWE',
}

def extract_review_number(filename):
    """Extract the review number from filename."""
    match = re.search(r'_(\d+)(?:\.\w+)?$', filename)
    if match:
        return match.group(1)
    # Handle filenames ending with "request" or similar
    if 'request' in filename.lower():
        return None
    return None

def get_country_code(filename):
    """Extract country code from filename."""
    for country_pattern, code in COUNTRY_CODES.items():
        if country_pattern in filename.lower():
            return code
    return None

def extract_year(filename):
    """Extract the year from filename."""
    match = re.search(r'_(\d{4})_', filename)
    if match:
        return match.group(1)
    return None

def determine_review_type(filename):
    """Determine if it's Article IV or standard review."""
    return 'ArticleIV' if 'article_iv' in filename.lower() else 'Standard'

def determine_combined_type(filename, review_num, next_is_article_iv=False):
    """
    Determine if it's Pre-Combined, Post-Combined, or Standalone.
    Logic: 
    - Request (0) = Standalone_Pre-Combined (comes before any reviews)
    - Review 1 with Article IV = Combined 
    - Review 1 without Article IV = Standalone_Pre-Combined
    - Review 2+ with Article IV = Combined
    - Review 2+ without Article IV:
      * If next review is Article IV (Combined) = Standalone_Pre-Combined
      * Otherwise = Standalone_Post_Combined_{suffix}
    """
    is_article_iv = 'article_iv' in filename.lower()
    review_num_int = int(review_num) if review_num else 0
    
    # Request files are always pre-combined
    if review_num_int == 0:
        return 'Standalone_Pre-Combined'
    
    # Review 1 handling
    if review_num_int == 1:
        if is_article_iv:
            return 'Combined'
        else:
            return 'Standalone_Pre-Combined'
    
    # Review 2+ handling
    if review_num_int > 1:
        if is_article_iv:
            return 'Combined'
        else:
            # Check if next review is Article IV (Combined)
            if next_is_article_iv:
                return 'Standalone_Pre-Combined'
            else:
                # Post-combined with a suffix based on review number
                suffix = review_num_int - 1
                return f'Standalone_Post_Combined_{suffix}'
    
    return 'Combined'

def rename_files_in_directory(directory_path, dry_run=True, output_dir=None, show_country=""):
    """
    Rename files in the specified directory and optionally save to output directory.
    
    Args:
        directory_path: Path to the directory containing files to rename
        dry_run: If True, print proposed changes without making them
        output_dir: Path to save renamed files (if None, files are renamed in place)
        show_country: Country code for display purposes
    """
    path = Path(directory_path)
    
    if not path.exists():
        print(f"Directory not found: {directory_path}")
        return
    
    files = sorted([f for f in path.iterdir() if f.is_file()])
    
    if not files:
        print(f"  ‚ÑπÔ∏è  No files found")
        return
    
    # Count already processed vs to process
    iso_codes = list(COUNTRY_CODES.values())
    already_processed = []
    to_process = []
    
    for file_path in files:
        old_name = file_path.name
        if any(old_name.startswith(code + '_') for code in iso_codes):
            already_processed.append(old_name)
        else:
            to_process.append(file_path)
    
    print(f"  üìä Total: {len(files)} files | Already processed: {len(already_processed)} | To process: {len(to_process)}")
    print()
    
    # Pre-process files to identify which ones are Article IV (Combined)
    file_info = []
    for file_path in files:
        old_name = file_path.name
        review_num = extract_review_number(old_name)
        
        if not review_num:
            if 'request' in old_name.lower():
                review_num = '0'
        
        is_article_iv = 'article_iv' in old_name.lower()
        file_info.append({
            'path': file_path,
            'name': old_name,
            'review_num': int(review_num) if review_num else 0,
            'is_article_iv': is_article_iv
        })
    
    # Sort by review number for lookahead
    file_info.sort(key=lambda x: x['review_num'])
    
    for idx, info in enumerate(file_info):
        file_path = info['path']
        old_name = info['name']
        country_code = get_country_code(old_name)
        
        # Skip files that already have ISO code naming (already processed)
        iso_codes = list(COUNTRY_CODES.values())
        if any(old_name.startswith(code + '_') for code in iso_codes):
            continue
        
        year = extract_year(old_name)
        review_num = str(info['review_num'])
        
        if not year:
            print(f"‚ö†Ô∏è  Skipped: {old_name} (couldn't extract year)")
            continue
        
        # Handle request files
        if info['review_num'] == 0:
            review_label = 'Request'
        else:
            review_label = f"Review{review_num}"
        
        # Look ahead to check if next review is Article IV (Combined)
        next_is_article_iv = False
        if idx + 1 < len(file_info):
            next_is_article_iv = file_info[idx + 1]['is_article_iv']
        
        # Construct new filename with lookahead logic
        file_ext = file_path.suffix
        combined_type = determine_combined_type(old_name, review_num, next_is_article_iv)
        
        # Add Article IV suffix if present in original filename
        article_iv_suffix = "_Article_IV" if 'article_iv' in old_name.lower() else ""
        new_name = f"{country_code}_{year}_ECF_{review_label}{article_iv_suffix}_{combined_type}{file_ext}"
        
        print(f"Old: {old_name}")
        print(f"New: {new_name}\n")
        
        if not dry_run:
            # Determine destination path
            if output_dir:
                new_path = Path(output_dir) / new_name
            else:
                new_path = file_path.parent / new_name
            
            try:
                # Copy file to output directory if specified, otherwise rename in place
                if output_dir and output_dir != str(file_path.parent):
                    import shutil
                    shutil.copy2(file_path, new_path)
                else:
                    file_path.rename(new_path)
                print(f"‚úì Processed successfully\n")
            except Exception as e:
                print(f"‚úó Error processing: {e}\n")

def process_all_countries(base_directory, dry_run=True, output_base=None):
    """
    Process all country subdirectories and save to new folders with ISO codes.
    
    Args:
        base_directory: Base path containing country folders
        dry_run: If True, print proposed changes without making them
        output_base: Base path for output folders (if None, creates 'IMF_Downloads_Renamed' in parent directory)
    """
    base_path = Path(base_directory)
    
    if not base_path.exists():
        print(f"‚ùå Base directory not found: {base_directory}")
        return
    
    print(f"‚úì Base directory found: {base_directory}")
    
    # Create output directory if not specified
    if output_base is None:
        output_base = base_path.parent / "IMF_Downloads_Renamed"
    
    output_path = Path(output_base)
    
    print(f"\nüìÅ Output base directory: {output_path}")
    
    if not dry_run:
        output_path.mkdir(parents=True, exist_ok=True)
        print(f"‚úì Created output directory")
    
    # Get all subdirectories
    country_dirs = [d for d in base_path.iterdir() if d.is_dir()]
    
    print(f"\nüìÇ Found {len(country_dirs)} country directories\n")
    
    for country_dir in sorted(country_dirs):
        # Get country code from directory name
        country_name = country_dir.name.lower()
        country_code = None
        for pattern, code in COUNTRY_CODES.items():
            if pattern in country_name:
                country_code = code
                break
        
        if country_code:
            # Create output directory with ISO code
            output_country_dir = output_path / country_code
            print(f"\n{'='*80}")
            print(f"üîÑ {country_dir.name.upper()} ‚Üí {country_code}")
            print(f"{'='*80}")
            if not dry_run:
                output_country_dir.mkdir(parents=True, exist_ok=True)
            
            rename_files_in_directory(str(country_dir), dry_run=dry_run, output_dir=str(output_country_dir), show_country=country_code)
        else:
            print(f"‚ö†Ô∏è  Skipped directory: {country_dir.name} (country not recognized)")

# Example usage
if __name__ == "__main__":
    # Set these paths according to your file structure
    BASE_DIR = r"C:\Users\nbenaissa\OneDrive - International Monetary Fund (PRD)\IMF_Downloads_Edited"
    
    # Output directory will be created as 'IMF_Downloads_Renamed' with ISO code subfolders
    OUTPUT_DIR = r"C:\Users\nbenaissa\OneDrive - International Monetary Fund (PRD)\IMF_Downloads_Renamed"
    
    # Process all country directories and save to renamed folder with ISO code subfolders
    process_all_countries(BASE_DIR, dry_run=True, output_base=OUTPUT_DIR)
    
    # When ready to actually rename and move files, change dry_run=False
    process_all_countries(BASE_DIR, dry_run=False, output_base=OUTPUT_DIR)

‚úì Base directory found: C:\Users\nbenaissa\OneDrive - International Monetary Fund (PRD)\IMF_Downloads_Edited

üìÅ Output base directory: C:\Users\nbenaissa\OneDrive - International Monetary Fund (PRD)\IMF_Downloads_Renamed

üìÇ Found 28 country directories


üîÑ BENIN ‚Üí BEN
  üìä Total: 11 files | Already processed: 5 | To process: 6

Old: benin_2022_Article_IV_ECF_EFF.pdf
New: BEN_2022_ECF_Request_Article_IV_Standalone_Pre-Combined.pdf

Old: benin_2022_ECF_EFF_review_1.pdf
New: BEN_2022_ECF_Review1_Standalone_Pre-Combined.pdf

Old: benin_2023_ECF_EFF_review_2.pdf
New: BEN_2023_ECF_Review2_Standalone_Post_Combined_1.pdf

Old: benin_2024_Article_IV_ECF_EFF_review_4.pdf
New: BEN_2024_ECF_Review4_Article_IV_Combined.pdf

Old: benin_2024_ECF_EFF_review_5.pdf
New: BEN_2024_ECF_Review5_Standalone_Post_Combined_4.pdf

Old: benin_2025_ECF_EFF_review_6.pdf
New: BEN_2025_ECF_Review6_Standalone_Post_Combined_5.pdf


üîÑ BURKINA_FASO ‚Üí BFA
  üìä Total: 7 files | Already processed: 3 | 