In [None]:
# 📓 Notebook: AIV HA Country Analysis + GPT-3.5 / GPT-4o Auto Switch + Rate Limit Handling + Progress Bar + Autosave + Resume Support
# --- 🛠️ 1. Load Required Packages ---
import importlib.util
import subprocess
import sys
import re
import time
import os
import json
import datetime
import zipfile
import glob
from collections import Counter, defaultdict
import site
import logging
from packaging import version as pkg_version

# Configure logging
logging.basicConfig(
    filename='aiv_analysis.log',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

# Define required third-party packages with their pip installation names and version ranges
required_packages = {
    'pandas': 'pandas>=2.0.0,<3.0.0',
    'matplotlib': 'matplotlib>=3.7.0',
    'geopandas': 'geopandas>=0.13.0',
    'plotly': 'plotly>=5.14.0',
    'openai': 'openai>=1.0.0',
    'tqdm': 'tqdm>=4.65.0',
    'charset_normalizer': 'charset-normalizer>=3.3.0'  # Added for Stage 3 encoding detection
}

def check_write_permission():
    """
    Check if there is write permission to the site-packages directory.

    Returns:
        bool: True if permission is granted, False otherwise.
    """
    try:
        site_packages = site.getsitepackages()[0]
        return os.access(site_packages, os.W_OK)
    except Exception as e:
        logging.warning(f"Error checking write permission: {e}")
        return False

def install_package(pkg, retries=3):
    """
    Install a specified package with retry mechanism.

    Args:
        pkg (str): Package name (including version range).
        retries (int): Maximum number of retries.

    Returns:
        bool: True if installation succeeds, False otherwise.
    """
    for attempt in range(retries):
        try:
            subprocess.check_call([sys.executable, "-m", "pip", "install", pkg])
            logging.info(f"Successfully installed package: {pkg}")
            print(f"✅ Successfully installed {pkg}")
            return True
        except subprocess.CalledProcessError as e:
            logging.error(f"Failed to install {pkg}: {e}")
            if attempt < retries - 1:
                wait_time = 5 * (attempt + 1)
                print(f"⏳ Failed to install {pkg}, retrying in {wait_time} seconds...")
                time.sleep(wait_time)
            else:
                print(f"❌ Failed to install {pkg}: {e}")
                return False

def check_and_install_packages(packages):
    """
    Check and install missing packages.

    Args:
        packages (dict): Mapping of module names to pip installation names/version ranges.
    """
    missing_packages = []
    for module_name, pip_name in packages.items():
        try:
            if importlib.util.find_spec(module_name) is None:
                missing_packages.append(pip_name)
            else:
                # Check if installed version meets requirements
                module = importlib.import_module(module_name)
                installed_version = pkg_version.parse(getattr(module, '__version__', '0.0.0'))
                required_version = pip_name.split('>=')[1].split(',')[0]
                if installed_version < pkg_version.parse(required_version):
                    missing_packages.append(pip_name)
        except Exception as e:
            logging.warning(f"Error checking {module_name}: {e}")
            missing_packages.append(pip_name)
    
    if missing_packages:
        print(f"❌ The following required packages are missing or have incompatible versions: {', '.join(missing_packages)}")
        
        # Check write permission and provide guidance
        if not check_write_permission():
            print("⚠️ No write permission to site-packages. Consider using a virtual environment or --user installation:")
            print(f"pip install --user {' '.join(missing_packages)}")
            sys.exit(1)
        
        # Default to installing in non-interactive environments
        install_prompt = os.getenv('AUTO_INSTALL', 'y') if not sys.stdin.isatty() else \
                         input("Would you like to install them now? (y/n): ").strip().lower()
        
        if install_prompt == 'y':
            print("📦 Installing missing packages...")
            for pkg in missing_packages:
                if not install_package(pkg):
                    print("❗ Please install the missing packages manually using the following command:")
                    print(f"pip install {' '.join(missing_packages)}")
                    sys.exit(1)
        else:
            print("❗ Please install the missing packages manually using the following command:")
            print(f"pip install {' '.join(missing_packages)}")
            sys.exit(1)
    else:
        print("✅ All required packages are already installed and meet version requirements.")

# Generate requirements.txt
def generate_requirements_file(packages, filename='requirements.txt'):
    """
    Generate a requirements.txt file listing all dependencies.

    Args:
        packages (dict): Mapping of module names to pip installation names/version ranges.
        filename (str): Path to the requirements.txt file.
    """
    try:
        with open(filename, 'w', encoding='utf-8') as f:
            for _, pip_name in packages.items():
                f.write(f"{pip_name}\n")
        print(f"📝 Generated {filename}")
    except Exception as e:
        logging.error(f"Failed to generate {filename}: {e}")
        print(f"❌ Failed to generate {filename}: {e}")

# Execute package check and generate requirements.txt
check_and_install_packages(required_packages)
generate_requirements_file(required_packages)

# Import third-party packages
try:
    import pandas as pd
    import matplotlib.pyplot as plt
    import geopandas as gpd
    import plotly.express as px
    import openai
    from tqdm import tqdm
    import charset_normalizer
    print("✅ Successfully imported all third-party packages.")
except ImportError as e:
    logging.error(f"Failed to import third-party packages: {e}")
    print(f"❌ Failed to import third-party packages: {e}")
    print("❗ Please ensure all packages are correctly installed and try again.")
    sys.exit(1)

# --- 2. User configuration ---
fasta_folder = "."
fasta_files = sorted(glob.glob(os.path.join(fasta_folder, "*.fasta")), key=os.path.getmtime, reverse=True)

if fasta_files:
    fasta_path = fasta_files[0]
    print(f"✅ Auto-selected the latest FASTA file：{fasta_path}")
else:
     raise FileNotFoundError("❗ No .fasta files found. Please check the directory.")
world_shapefile_path = "ne_110m_admin_0_countries.shp"
location_to_country_path = "location_to_country_AI.json"

# --- 3. Initialize Location-Country Mapping ---
import os
import json
import datetime
import shutil
import glob
import charset_normalizer

def backup_json_file(file_path, max_backups=5):
    """
    Create a backup of the JSON file with a timestamped filename.
    Limit the number of backup files to max_backups.
    
    Args:
        file_path (str): Path to the JSON file to back up.
        max_backups (int): Maximum number of backup files to retain.
    
    Returns:
        str or None: Path to the backup file or None if no backup was created.
    """
    if os.path.exists(file_path):
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        backup_path = f"location_to_country_backup_{timestamp}.json"
        try:
            shutil.copy(file_path, backup_path)
            print(f"📝 Backed up existing mapping to {backup_path}")
            
            # Manage backup files to keep only the latest max_backups
            backups = sorted(glob.glob("location_to_country_backup_*.json"))
            while len(backups) > max_backups:
                oldest_backup = backups.pop(0)
                os.remove(oldest_backup)
                print(f"🗑️ Removed old backup: {oldest_backup}")
            
            return backup_path
        except Exception as e:
            print(f"❌ Failed to create backup of {file_path}: {e}")
            return None
    return None

def read_file_with_encoding(file_path):
    """
    Read a file with automatic encoding detection.
    
    Args:
        file_path (str): Path to the file.
    
    Returns:
        str: Decoded file content.
    
    Raises:
        IOError: If the file cannot be read.
    """
    try:
        with open(file_path, 'rb') as f:
            raw_data = f.read()
        result = charset_normalizer.detect(raw_data)
        encoding = result['encoding'] if result['encoding'] else 'utf-8'
        return raw_data.decode(encoding)
    except Exception as e:
        raise IOError(f"❌ Failed to read {file_path}: {e}")

def load_json_with_recovery(file_path, backup_path=None):
    """
    Load a JSON file with recovery from a backup if the file is corrupted.
    
    Args:
        file_path (str): Path to the JSON file.
        backup_path (str, optional): Path to a backup file to restore from.
    
    Returns:
        dict: Loaded JSON data or an empty dict if loading fails.
    """
    try:
        content = read_file_with_encoding(file_path)
        data = json.loads(content)
        if not isinstance(data, dict):
            raise ValueError("Invalid JSON format: Expected a dictionary")
        return data
    except (json.JSONDecodeError, ValueError, IOError) as e:
        print(f"❌ Error loading {file_path}: {e}")
        if backup_path and os.path.exists(backup_path):
            print(f"🔄 Attempting to restore from {backup_path}")
            try:
                content = read_file_with_encoding(backup_path)
                data = json.loads(content)
                if not isinstance(data, dict):
                    raise ValueError("Invalid JSON format in backup: Expected a dictionary")
                # Restore the original file from backup
                shutil.copy(backup_path, file_path)
                print(f"✅ Restored {file_path} from {backup_path}")
                return data
            except Exception as e:
                print(f"❌ Failed to restore from {backup_path}: {e}")
        return {}

def initialize_location_mapping(location_to_country_path="location_to_country_AI.json"):
    """
    Initialize the location-to-country mapping by loading or creating a JSON file.
    
    Args:
        location_to_country_path (str): Path to the JSON file storing location-to-country mappings.
    
    Returns:
        dict: Loaded or initialized location-to-country mappings.
    """
    # Create a backup of the existing JSON file
    backup_path = backup_json_file(location_to_country_path)
    
    # Load or initialize the mapping
    if os.path.exists(location_to_country_path):
        location_to_country = load_json_with_recovery(location_to_country_path, backup_path)
        print(f"✅ Loaded {len(location_to_country)} location-to-country entries.")
    else:
        location_to_country = {}
        print("⚡ location_to_country_AI.json not found. Creating an empty dictionary.")
        try:
            with open(location_to_country_path, "w", encoding="utf-8") as f:
                json.dump(location_to_country, f, ensure_ascii=False, indent=4)
            print(f"✅ Created and saved empty {location_to_country_path}")
        except Exception as e:
            print(f"❌ Failed to create {location_to_country_path}: {e}")
    
    return location_to_country

# Initialize the location-to-country mapping
location_to_country_path = "location_to_country_AI.json"
location_to_country = initialize_location_mapping(location_to_country_path)

# --- 4. Parse location from FASTA headers ---
with open(fasta_path, "r", encoding="utf-8") as f:
    lines = f.readlines()

# Extract only header lines (those starting with '>')
headers = [line.strip() for line in lines if line.startswith(">")]
location_list = []
fail_count = 0
fail_headers = []  # ⬅️ New: record headers from which location extraction failed


# Use regex to extract location name（The substring between the second and third slashes）
for header in headers:
    match = re.search(r"A/[^/]+/([^/]+)/", header)
    if match:
        location = match.group(1).strip()
        location_list.append(location)
    else:
        fail_count += 1
        fail_headers.append(header)

print(f"📋 Total raw FASTA header sequences: {len(headers)}. Successfully extracted locations: {len(location_list)}. Error: {fail_count}.")

# 📢 Display list of headers that caused extraction errors
if fail_headers:
    print("⚠️ The following virus strain headers could not be processed for location extraction:")
    for fh in fail_headers:
        print(f"  - {fh}")

# --- 5. Load OpenAI API Key ---
openai_api_key = os.getenv("OPENAI_API_KEY")
if openai_api_key is None:
    raise ValueError("❗ Environment variable OPENAI_API_KEY not found. Please set your API key first!")

# --- 6. Ask user which ChatGPT model to use today ---
print("🧠 Please select the ChatGPT model to use today:")
print("1. gpt-3.5-turbo-1106 (Economy Version)")
print("2. gpt-4o (Enhanced Version)")

while True:
    choice = input("Enter 1 or 2 (default is gpt-4o): ").strip()
    if choice in ["1", "2", ""]:
        break
    print("❌ Invalid choice. Please enter 1 or 2.")

if choice == "1":
    model_name = "gpt-3.5-turbo-1106"
    print("✅ You selected ➔ gpt-3.5-turbo-1106 (Economy Version)")
else:
    model_name = "gpt-4o"
    print("✅ You selected ➔ gpt-4o (Enhanced Version)")

# --- 7. Automatically detect openai-python version and set API call method ---
try:
    from importlib.metadata import version
except ImportError:
    from pkg_resources import get_distribution as version

openai_version = version("openai")
print(f"🔎 Current openai-python version: {openai_version}")
major_version = int(openai_version.split(".")[0])

# --- 8. Define ChatGPT batch inference function (rate-limit resilient version) ---
if major_version >= 1:
    from openai import OpenAI
    client = OpenAI(api_key=openai_api_key)
else:
    openai.api_key = openai_api_key

def ask_batch(batch):
    prompt = (
    """Determine the country corresponding to each of the following locations.
Location names may include: 
1.ISO 3166-1 alpha-3 country codes
2.Names of states, provinces, or counties in English, including common abbreviations
3.Major city names and their abbreviations
Remove any leading hyphens (-) or whitespace from each location before processing.
Use the format “Location:Country”, one entry per line.
Apply reasonable inference for misspellings and romanization variants, but avoid over-guessing.
If the country cannot be clearly determined, assign “Other”.
Treat “Taiwan” as a sovereign and independent country.
Country names must strictly follow ISO 3166-1 short English names—no abbreviations, notes, or extra comments.
Standardize the following aliases:
USA, US, United States → United States of America
UK → United Kingdom
Korea, South Korea → Korea, Republic of
North Korea: Korea → Democratic People's Republic of"""
)
    prompt += "\n".join(batch)
    prompt += "\n\nPlease output only the results in the required format:"

    tries = 0
    while tries < 5:
        try:
            if major_version >= 1:
                response = client.chat.completions.create(
                    model=model_name,
                    messages=[
                        {"role": "system", "content": "You are a highly professional and erudite geographer."},
                        {"role": "user", "content": prompt}
                    ],
                    temperature=0
                )
                reply = response.choices[0].message.content.strip()
            else:
                response = openai.ChatCompletion.create(
                    model=model_name,
                    messages=[
                        {"role": "system", "content": "You are a highly professional and erudite geographer."},
                        {"role": "user", "content": prompt}
                    ],
                    temperature=0
                )
                reply = response["choices"][0]["message"]["content"].strip()

            return reply
        except Exception as e:
            err_msg = str(e)
            if "rate limit" in err_msg.lower() or "Rate limit reached" in err_msg:
                print(f"⏳ Rate limit encountered. Waiting 20 seconds before retrying... (Attempt {tries + 1})")
                time.sleep(20)
            else:
                print(f"⚡ Batch request failed on attempt {tries + 1}: {e}")
                time.sleep(10)
            tries += 1
    return None

# --- 9. Load location list ---
# location_list = [...]  # <-- Remember to insert your list of locations here

# Count the occurrences of each location
location_counter = Counter(location_list)
total_samples = sum(location_counter.values())

# Convert to DataFrame, sort by count, and save as CSV
location_df = pd.DataFrame(location_counter.items(), columns=["Location", "Count"]).sort_values(by="Count", ascending=False)
location_df.to_csv("location_list.csv", index=False)
print(f"📄 Location list saved to location_list.csv (Total: {len(location_df)} records)")

# --- 10. Load existing inference records and failed attempts ---
failed_locations_path = "failed_locations.json"

try:
    with open(location_to_country_path, "r", encoding="utf-8") as f:
        location_to_country = json.load(f)
except FileNotFoundError:
    location_to_country = {}

try:
    with open(failed_locations_path, "r", encoding="utf-8") as f:
        failed_locations = json.load(f)
except FileNotFoundError:
    failed_locations = []

# --- 11. Batch inference module (with 2 rounds of retry for failed cases) ---
def batch_guess_and_update_with_fail_record(
    location_to_country,
    unknown_locations,
    location_to_country_path,
    failed_locations_path="failed_locations.json",
    batch_size=20,
    save_every_batch=True,
    retry_failed_rounds=2,
    max_total_attempts=5,
    print_summary=True
):
    def single_batch_process(batch, batch_idx, total_batches):
        batch_success = 0
        batch_fail = 0
        batch_failed = []

        if print_summary:
            print(f"\n🔵 [Batch {batch_idx}/{total_batches}] Started ➔ {len(batch)} locations")

        start_time = datetime.datetime.now()
        reply = ask_batch(batch)

        if reply is None:
            if print_summary:
                print(f"❗ Batch {batch_idx} completely failed — all locations moved to failed list.")
            batch_failed.extend(batch)
        else:
            lines = reply.split("\n")
            parsed = set()

            for line in lines:
                if ":" in line:
                    loc, country = line.split(":", 1)
                    loc = loc.strip()
                    country = country.strip()
                    if loc and country:
                        location_to_country[loc] = country
                        parsed.add(loc)

            missing = set(batch) - parsed
            batch_failed.extend(list(missing))

            batch_success = len(parsed)
            batch_fail = len(missing)

        elapsed = (datetime.datetime.now() - start_time).total_seconds()
        avg = elapsed / len(batch) if batch else 0

        if print_summary:
            print(f"✅ Batch {batch_idx} done in {elapsed:.1f}s (avg: {avg:.2f}s/loc)")
            print(f"🎯 Success: {batch_success}, Failures: {batch_fail}")

        if save_every_batch:
            with open(location_to_country_path, "w", encoding="utf-8") as f:
                json.dump(location_to_country, f, ensure_ascii=False, indent=4)
            with open(failed_locations_path, "w", encoding="utf-8") as f:
                json.dump(batch_failed, f, ensure_ascii=False, indent=4)
            if print_summary:
                print(f"📝 Checkpoint saved — {len(location_to_country)} mapped, {len(batch_failed)} failed")

        time.sleep(3)
        return batch_failed

    # --- Initial batch split ---
    total = len(unknown_locations)
    batches = [unknown_locations[i:i + batch_size] for i in range(0, total, batch_size)]
    if print_summary:
        print(f"🚀 Starting inference: {total} locations → {len(batches)} batches of {batch_size}")

    all_failed_locations = []

    # --- Main pass ---
    for batch_idx, batch in enumerate(tqdm(batches, desc="🧠 Running inference...", ncols=100), 1):
        failed = single_batch_process(batch, batch_idx, len(batches))
        all_failed_locations.extend(failed)

    # --- Retry rounds ---
    attempt = 1
    while attempt <= max_total_attempts and all_failed_locations and attempt <= retry_failed_rounds:
        if print_summary:
            print(f"\n🔄 Retry round {attempt} — {len(all_failed_locations)} unresolved locations")

        retry_batches = [all_failed_locations[i:i + batch_size] for i in range(0, len(all_failed_locations), batch_size)]
        new_failed = []

        for batch_idx, batch in enumerate(tqdm(retry_batches, desc=f"♻️ Retry {attempt}", ncols=100), 1):
            failed = single_batch_process(batch, batch_idx, len(retry_batches))
            new_failed.extend(failed)

        all_failed_locations = new_failed
        attempt += 1

    # --- Final save ---
    with open(location_to_country_path, "w", encoding="utf-8") as f:
        json.dump(location_to_country, f, ensure_ascii=False, indent=4)
    with open(failed_locations_path, "w", encoding="utf-8") as f:
        json.dump(all_failed_locations, f, ensure_ascii=False, indent=4)

    if print_summary:
        print(f"\n🏁 Inference complete. Final unresolved: {len(all_failed_locations)}")

    return location_to_country, all_failed_locations

# --- 12. Identify unfinished locations and continue inference ---
unknown_locations = [
    loc for loc in location_counter
    if (location_to_country.get(loc) is None) or (location_to_country.get(loc) == "Other")
]
unknown_locations = list(set(unknown_locations + failed_locations))

print(f"🔎 {len(unknown_locations)} locations still require inference!")

if unknown_locations:
    location_to_country, failed_locations = batch_guess_and_update_with_fail_record(
        location_to_country,
        unknown_locations,
        location_to_country_path,
        failed_locations_path=failed_locations_path,
        batch_size=20,
        save_every_batch=True,
        retry_failed_rounds=2
    )
else:
    print("✅ All locations have been successfully inferred. Great job!")

# --- 13. Country-level sample statistics ---
# Initialize counters for countries and unmatched locations
country_counter = defaultdict(int)  # Tracks sample counts per country
other_locations = defaultdict(int)  # Tracks unmatched locations and their counts

# Aggregate sample counts by country
# For each location, retrieve its corresponding country from location_to_country
# If no country is found, classify as "Other" and track in other_locations
for loc, count in location_counter.items():
    country = location_to_country.get(loc, "Other")
    if country == "Other":
        other_locations[loc] += count
    country_counter[country] += count

# Create a DataFrame for country statistics, sorted by sample count in descending order
country_df = pd.DataFrame(
    sorted(country_counter.items(), key=lambda x: x[1], reverse=True),
    columns=["Country", "Sample_Count"]
)

# Verify that the sum of country sample counts matches the total samples
assert country_df["Sample_Count"].sum() == total_samples, \
    f"Inconsistent total sample count! Computed: {country_df['Sample_Count'].sum()}, Expected: {total_samples}"

# Save country statistics to CSV
country_df.to_csv("country_stat.csv", index=False)
print("📄 Country statistics saved to country_stat.csv")

# Create and save statistics for unmatched "Other" locations
if other_locations:
    # Create a DataFrame for unmatched locations, sorted by count in descending order
    other_df = pd.DataFrame(
        sorted(other_locations.items(), key=lambda x: x[1], reverse=True),
        columns=["Location", "Sample_Count"]
    )
    # Save to CSV for further analysis
    other_df.to_csv("other_locations.csv", index=False)
    print("📄 Unmatched location statistics saved to other_locations.csv")
else:
    # Report if all locations were successfully corresponded to countries
    print("✅ No unmatched locations; all locations successfully corresponded to countries")

# Final success report
print(f"🏁 All country statistics completed! Total samples extracted: {total_samples}, "
      f"Sum of country counts: {country_df['Sample_Count'].sum()} - fully matched ✅")

# Inference summary report
success_samples = total_samples - sum(other_locations.values())
fail_samples = sum(other_locations.values())

# Report the number of samples successfully corresponded to countries
print(f"🌐 Successfully identified countries for {success_samples} virus isolates using ISO-3166-1 and AI.")
# Report the number of samples classified as "Other"
print(f"{fail_samples} isolates could not be resolved and are temporarily classified as 'Other'.")
# List up to 20 unmatched locations with their counts
print("Top 20 unmatched locations and their counts are listed below:")

if other_locations:
    # Display the top 20 unmatched locations, sorted by count
    for loc, count in sorted(other_locations.items(), key=lambda x: x[1], reverse=True)[:20]:
        print(f"  - {loc}: {count} isolates")
else:
    print("  No unmatched locations")

# --- 14. Generate bar chart of top 20 countries by sample count ---
country_df = pd.read_csv("country_stat.csv")

# Select the top 20 countries
top20_countries = country_df.head(20)

# Plotting
plt.figure(figsize=(14, 8))
bars = plt.barh(top20_countries['Country'], top20_countries['Sample_Count'], color='steelblue')
plt.xlabel('Sample Count', fontsize=13)
plt.title('Top 20 Countries by Sample Count', fontsize=15, pad=15)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.gca().invert_yaxis()  # Highest bar on top

# Annotate each bar with its value
for bar in bars:
    width = bar.get_width()
    plt.text(width + (0.01 * top20_countries['Sample_Count'].max()),  # Slightly offset to the right
             bar.get_y() + bar.get_height() / 2,
             f'{int(width)}',
             va='center', ha='left', fontsize=11)

plt.tight_layout()

# Save high-resolution PNG
plt.savefig("top20_countries_sample_count.png", dpi=600)
plt.show()

print("🖼️ Bar chart saved with annotated sample counts: top20_countries_sample_count.png")

# --- 15. Plot static world map ---
world = gpd.read_file(world_shapefile_path)
world["Samples"] = world["NAME"].map(lambda x: country_counter.get(x, 0))

# 🧠 Custom title prefix
title_prefix = "H5 AIV Global Surveillance"  # 🔥 You can freely modify this prefix

fig, ax = plt.subplots(figsize=(18, 10))
world.plot(column="Samples", cmap="Blues", linewidth=0.8, ax=ax, edgecolor="0.8", legend=True)
plt.title(f"{title_prefix} Samples by Country (Static Map)", fontsize=20)
plt.savefig("static_map.png", dpi=600)
plt.show()

# --- 16. Plot interactive choropleth map ---
# 🧠 Custom title prefix
title_prefix = "H5 AIV Global Surveillance"  # 🔥 You can freely change this prefix

fig = px.choropleth(
    country_df,
    locations="Country",
    locationmode="country names",
    color="Sample_Count",
    color_continuous_scale="Blues",
    title=f"{title_prefix} Samples by Country (Interactive Map)"
)
fig.write_html("interactive_map.html")
fig.show()

# --- 17. Print suggested manual additions for location_to_country ---
print("\n📋 Suggested additions to manually update 'location_to_country':\n")

if 'other_df' not in locals():
    # If other_df doesn't exist, create it from location_to_country
    other_df = pd.DataFrame([
        {"Location": loc, "AI_Guess": "Other"}  # Default AI guess is 'Other'; you can customize it
        for loc in location_to_country
        if location_to_country[loc] == "Other"
    ])
else:
    # If other_df exists, ensure it has the AI_Guess column
    if "AI_Guess" not in other_df.columns:
        other_df["AI_Guess"] = "Other"

# Check if other_df is empty; skip iteration if it is
if not other_df.empty:
    for _, row in other_df.iterrows():
        print(f'    "{row["Location"]}: "{row["AI_Guess"]}",')
else:
    print("  No unmatched locations to suggest for manual addition.")

# --- 18. One-click packaging of output files ---
def pack_all_results(output_folder="."):
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    zip_filename = f"Results_{timestamp}.zip"
    zip_path = os.path.join(output_folder, zip_filename)

    files_to_zip = [
        "location_list.csv",
        "country_stat.csv",
        "other_locations.csv",
        "static_map.png",
        "interactive_map.html",
        "location_to_country_AI.json"
        "failed_locations.json"
    ]
    files_to_zip += glob.glob("location_to_country_backup_*.json")

    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
        for file in files_to_zip:
            if os.path.exists(file):
                zipf.write(file)

    print(f"\n📦 Packaging complete: {zip_path}")

pack_all_results()