In [7]:
import os
import json
import pickle
import numpy as np
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

def cross_evaluate_models(base_path):
    """
    Iterates over all site folders in 'base_path', loading each site's
    XGBoost model/label encoder and evaluating against every other site's
    test data. Results are saved as 'SITENAME_cross_accuracy_matrix.json'.
    """

    # Find all subdirectories in base_path (each expected to be a "site")
    sites = [
        d for d in os.listdir(base_path)
        if os.path.isdir(os.path.join(base_path, d))
    ]

    for model_site in sites:
        # Construct paths to model & label encoder
        model_path = os.path.join(base_path, model_site, f"{model_site}_xgb_model.json")
        label_encoder_path = os.path.join(base_path, model_site, f"{model_site}_label_encoder_classes.npy")

        # skip site if it is missing files
        if not (os.path.exists(model_path) and os.path.exists(label_encoder_path)):
            continue

        # Load the XGBoost model
        xgb_model = XGBClassifier()
        xgb_model.load_model(model_path)

        # Load the LabelEncoder classes and rebuild the encoder
        loaded_classes = np.load(label_encoder_path)
        label_encoder = LabelEncoder()
        label_encoder.classes_ = loaded_classes

        cross_results = {}  # Will hold { test_site: {accuracy, classification_report}, ... }

        # Now apply (model_site)'s model to each site's test data
        for test_site in sites:
            X_test_path = os.path.join(base_path, test_site, f"{test_site}_X_test.pkl")
            y_test_path = os.path.join(base_path, test_site, f"{test_site}_y_test.npy")

            # If test data is missing, skip
            if not (os.path.exists(X_test_path) and os.path.exists(y_test_path)):
                continue

            with open(X_test_path, "rb") as f:
                X_test = pickle.load(f)
            y_test = np.load(y_test_path)

            # Make predictions
            y_pred = xgb_model.predict(X_test)

            # Compute accuracy and classification report
            accuracy = accuracy_score(y_test, y_pred) * 100
            report = classification_report(
                y_test,
                y_pred,
                target_names=label_encoder.classes_,
                output_dict=True
            )

            cross_results[test_site] = {
                "accuracy": accuracy,
                "classification_report": report
            }

        # Save these cross-accuracy results into a JSON file
        out_json = os.path.join(base_path, model_site, f"{model_site}_cross_accuracy_matrix.json")
        with open(out_json, "w", encoding="utf-8") as f:
            json.dump(cross_results, f, indent=4)

        print(f"Saved cross-accuracy results for '{model_site}' to: {out_json}")

base_output_folder = r"C:\Users\miles\favela_analysis\output"
cross_evaluate_models(base_output_folder)


Saved cross-accuracy results for 'faz_quem_quer' to: C:\Users\miles\favela_analysis\output\faz_quem_quer\faz_quem_quer_cross_accuracy_matrix.json
Saved cross-accuracy results for 'jacarezinho' to: C:\Users\miles\favela_analysis\output\jacarezinho\jacarezinho_cross_accuracy_matrix.json
Saved cross-accuracy results for 'morro_da_guaiba' to: C:\Users\miles\favela_analysis\output\morro_da_guaiba\morro_da_guaiba_cross_accuracy_matrix.json
Saved cross-accuracy results for 'nova_cidade' to: C:\Users\miles\favela_analysis\output\nova_cidade\nova_cidade_cross_accuracy_matrix.json
Saved cross-accuracy results for 'rocinha' to: C:\Users\miles\favela_analysis\output\rocinha\rocinha_cross_accuracy_matrix.json
