In [7]:
import os
from pathlib import Path
import pandas as pd
from datetime import datetime
from tqdm.auto import tqdm

In [None]:
# ==============================================================
# MAIN DRIVER CONFIGURATION
# ==============================================================

BASE_INPUT_DIR = Path(r" ")
BASE_OUTPUT_DIR = Path(r" ")
INTERVENTION_DATE = datetime(2020, 3, 11)

In [9]:
# ==============================================================
# IMPORT YOUR EXISTING ANALYSIS FUNCTION
# ==============================================================

# def analyze_elite(FILE_PATH_INPUT, FILE_PATH_OUTPUT, INTERVENTION_DATE):
def analyze_elite(FILE_PATH_INPUT, ELITE_OUT_DIR, INTERVENTION_DATE):
    """
    This function runs your full analysis pipeline for one elite.
    The code inside is exactly what you already had ‚Äî unchanged.
    """
    import pandas as pd
    import numpy as np
    import spacy
    import regex as re
    import networkx as nx
    import matplotlib.pyplot as plt
    from fuzzywuzzy import fuzz, process
    import random
    from pathlib import Path
    from tqdm.auto import tqdm
    from collections import Counter
    from datetime import datetime
    from community import community_louvain
    import warnings
    warnings.filterwarnings("ignore")

    # ======= BEGIN your existing block =======

    """
    elite_name = Path(FILE_PATH_INPUT).stem
    ELITE_OUT_DIR = Path(FILE_PATH_OUTPUT) / elite_name
    ELITE_OUT_DIR.mkdir(parents=True, exist_ok=True)
    """
    
    ELITE_OUT_DIR = Path(ELITE_OUT_DIR)  # ‚Üê Ensure Path
    ELITE_OUT_DIR.mkdir(parents=True, exist_ok=True)
    elite_name = Path(FILE_PATH_INPUT).stem
    



    print(f"\nüöÄ Starting analysis for: {elite_name}")
    print(f"üìÅ Outputs will be saved to: {ELITE_OUT_DIR}")

    COVID_KEYWORDS = {
        "en": {"covid", "covid19", "covid-19", "coronavirus", "pandemic", "sarscov2", "vaccine", "vaccination"},
        "ar": {"ŸÉŸàÿ±ŸàŸÜÿß", "ŸÉŸàŸÅŸäÿØ", "ŸÉŸàŸÅŸäÿØ-19", "ÿ¨ÿßÿ¶ÿ≠ÿ© ŸÉŸàÿ±ŸàŸÜÿß", "Ÿàÿ®ÿßÿ° ŸÉŸàÿ±ŸàŸÜÿß", "ŸÅŸäÿ±Ÿàÿ≥ ŸÉŸàÿ±ŸàŸÜÿß", "ŸÑŸÇÿßÿ≠", "ÿ™ÿ∑ÿπŸäŸÖ"},
        "es": {"covid", "covid-19", "coronavirus", "pandemia", "vacuna", "vacunaci√≥n"},
        "pt": {"covid", "covid-19", "coronavirus", "pandemia", "vacina", "vacina√ß√£o"},
        "fr": {"covid", "covid-19", "coronavirus", "pand√©mie", "vaccin", "vaccination"},
        "he": {"◊ß◊ï◊®◊ï◊†◊î", "◊ß◊ï◊ë◊ô◊ì", "◊ß◊ï◊ë◊ô◊ì-19", "◊ï◊ô◊®◊ï◊°", "◊û◊í◊ô◊§◊î", "◊ó◊ô◊°◊ï◊ü", "◊ó◊ô◊°◊ï◊†◊ô◊ù"},
        "tr": {"koronavir√ºs", "kovid", "kovid-19", "pandemi", "salgƒ±n", "a≈üƒ±", "a≈üƒ±lanma"},
        "ko": {"ÏΩîÎ°úÎÇò", "ÏΩîÎ°úÎÇò19", "ÏΩîÎ°úÎÇòÎ∞îÏù¥Îü¨Ïä§", "Ìå¨Îç∞ÎØπ", "Î∞±Ïã†", "Ï†ëÏ¢Ö", "Ïò§ÎØ∏ÌÅ¨Î°†"},
        "id": {"covid", "covid-19", "korona", "pandemi", "vaksin", "vaksinasi"},
        "ur": {"⁄©Ÿàÿ±ŸàŸÜÿß", "⁄©ŸàŸà⁄à", "⁄©ŸàŸà⁄à-19", "Ÿàÿ®ÿßÿ°", "Ÿà€å⁄©ÿ≥€åŸÜ", "Ÿà€å⁄©ÿ≥€å ŸÜ€åÿ¥ŸÜ", "Ÿàÿßÿ¶ÿ±ÿ≥"},
        "hi": {"‡§ï‡•ã‡§∞‡•ã‡§®‡§æ", "‡§ï‡•ã‡§µ‡§ø‡§°", "‡§ï‡•ã‡§µ‡§ø‡§°-19", "‡§Æ‡§π‡§æ‡§Æ‡§æ‡§∞‡•Ä", "‡§µ‡•à‡§ï‡•ç‡§∏‡•Ä‡§®", "‡§ü‡•Ä‡§ï‡§æ‡§ï‡§∞‡§£"}
    }

    ALL_COVID_TERMS = set(term.lower() for terms in COVID_KEYWORDS.values() for term in terms)

    # ========== LOAD DATA ==========
    df = pd.read_excel(FILE_PATH_INPUT)
    # Try to detect and parse the actual datetime column
    if "creation_datetime" in df.columns:
        # Replace the weird *** separator with a space
        df["date"] = (
            df["creation_datetime"]
            .astype(str)
            .str.replace(r"\*\*\*", " ", regex=True)
        )
        df["date"] = pd.to_datetime(df["date"], errors="coerce")
    elif "date" in df.columns:
        df["date"] = pd.to_datetime(df["date"], errors="coerce")
    else:
        df["date"] = pd.NaT


    # load spacy once per run
    try:
        nlp = spacy.load("en_core_web_lg")
    except:
        import spacy.cli
        spacy.cli.download("en_core_web_lg")
        nlp = spacy.load("en_core_web_lg")

    # ========== CLEANING ==========
    def clean_text(text):
        if pd.isna(text):
            return ""
        text = re.sub(r"http\S+", "", text)           # remove URLs
        text = re.sub(r"@\w+", "", text)              # remove mentions
        text = re.sub(r"\bRT\b", "", text)            # remove RT
        text = re.sub(r"[^\p{L}\p{N}\s]", " ", text)  # keep only letters/numbers/spaces
        text = re.sub(r"\b[\p{N}]+\b", " ", text)     # remove standalone numbers (Arabic or Western)
        text = re.sub(r"\s+", " ", text)              # normalize whitespace
        return text.strip()

    df["clean_text"] = df["text"].astype(str).apply(clean_text)

    def extract_entities(text):
        doc = nlp(text)
        ents = [ent.text.strip() for ent in doc.ents if ent.label_ in [
            "PERSON", "NORP", "FAC", "ORG", "GPE", "LOC",
            "PRODUCT", "EVENT", "WORK_OF_ART", "LAW", "LANGUAGE"
        ]]
        return list(set(ents))

    df["entities"] = df["clean_text"].apply(extract_entities)

    # ---------- EXTRACT ENTITIES WITH PROGRESS ----------
    def extract_entities(text):
        doc = nlp(text)
        ents = [
            ent.text.strip()
            for ent in doc.ents
            if ent.label_ in {
                "PERSON", "NORP", "FAC", "ORG", "GPE", "LOC",
                "PRODUCT", "EVENT", "WORK_OF_ART", "LAW", "LANGUAGE"
            }
        ]
        return list(set(ents))

    # ---- NEW: apply with a manual loop + progress ----
    PROGRESS_STEP = 100                     # change to 50, 500, ‚Ä¶ if you like
    entities_col = []                       # will hold the list of entities per row

    print("Extracting entities ‚Ä¶")
    for idx, txt in enumerate(df["clean_text"].astype(str)):
        entities_col.append(extract_entities(txt))

        # print progress every PROGRESS_STEP rows
        if (idx + 1) % PROGRESS_STEP == 0:
            print(f"   processed {idx + 1:,} tweets")

    df["entities"] = entities_col
    print(f"   finished ‚Äì total {len(df):,} tweets\n")
    # ----------------------------------------------------

    # Flatten entities into dataframe (unchanged)
    entities_list = [
        (ent, idx) for idx, ents in enumerate(df["entities"]) for ent in ents
    ]
    entities_df = pd.DataFrame(entities_list, columns=["entity", "tweet_index"])

    # --- Step: Rename 'covid' mentions that occur only before intervention ---
    # Since we merge all COVID-related keywords after the intervention into a single entity named 'covid',
    # any entity literally named 'covid' before the intervention would incorrectly merge with those post-intervention mentions.
    # To prevent this overlap, we rename 'covid' mentions that occur before the intervention to 'covidbefore'.
    # Other COVID-related keywords are not affected, as they keep their original names unless they appear after the intervention.

    for ent in entities_df["entity"].unique():
        if ent.lower() == "covid":
            tweet_idxs = entities_df.loc[entities_df["entity"] == ent, "tweet_index"].unique()
            tweet_dates = df["date"].loc[tweet_idxs]
            # For tweets before intervention
            before_mask = tweet_dates < INTERVENTION_DATE
            before_idxs = tweet_idxs[before_mask]
            # Update those entity names to 'covidbefore'
            entities_df.loc[entities_df["tweet_index"].isin(before_idxs) &
                            (entities_df["entity"] == ent), "entity"] = "covidbefore"
            
    
    # --------------------------------------------------------------
    # SIMPLE FAST ENTITY RESOLUTION (Single-Core rapidfuzz + Progress)
    # --------------------------------------------------------------

    from rapidfuzz import process, fuzz

    def normalize_entities_fast(
        entities_df,
        covid_terms,
        df_dates,
        intervention_date,
        threshold=90,
        progress_step=100,
    ):
        print("Starting simple fast entity resolution (rapidfuzz)")
        uniq_entities = entities_df["entity"].unique()
        total = len(uniq_entities)
        print(f"   {total:,} unique entities to process\n")

        # ---------- COVID merge (with progress) ----------
        mapping = {}
        non_covid = []

        for i, ent in enumerate(uniq_entities):
            ent_low = ent.lower()
            tweet_idxs = entities_df.loc[entities_df["entity"] == ent, "tweet_index"].unique()
            tweet_dates = df_dates.loc[tweet_idxs]
            occurs_after = (tweet_dates >= intervention_date).any()

            if occurs_after and any(term in ent_low for term in covid_terms):
                mapping[ent] = "covid"
            else:
                non_covid.append(ent)

            if (i + 1) % progress_step == 0 or (i + 1) == total:
                print(f"   [COVID-merge] processed {i + 1:,}/{total:,} entities")

        n_covid = sum(1 for v in mapping.values() if v == "covid")
        print(f"   {n_covid:,} entities ‚Üí merged to 'covid'\n")

        if not non_covid:
            return mapping

        # ---------- Simple fuzzy deduplication (single-core, fast) ----------
        unique = []
        print(f"   Deduplicating {len(non_covid):,} non-COVID entities (rapidfuzz)‚Ä¶")

        for i, ent in enumerate(non_covid):
            match = process.extractOne(ent, unique, scorer=fuzz.token_sort_ratio)
            if match and match[1] >= threshold:
                mapping[ent] = match[0]
            else:
                unique.append(ent)
                mapping[ent] = ent

            if (i + 1) % progress_step == 0 or (i + 1) == len(non_covid):
                print(f"   [Deduplication] processed {i + 1:,}/{len(non_covid):,} entities")

        print("\nFast entity resolution complete!\n")
        return mapping

    # --------------------------------------------------------------
    # CALL THE FUNCTION
    # --------------------------------------------------------------
    entity_mapping = normalize_entities_fast(
        entities_df,
        covid_terms=ALL_COVID_TERMS,
        df_dates=df["date"],
        intervention_date=INTERVENTION_DATE,
        threshold=90,
        progress_step=100
    )

    # --------------------------------------------------------------
    # APPLY THE MAPPING
    # --------------------------------------------------------------
    entities_df["entity_normalized"] = entities_df["entity"].map(entity_mapping)
    df["entities_normalized"] = df["entities"].apply(
        lambda lst: [entity_mapping.get(e, e) for e in lst]
    )


    # ========== CREATE NETWORK ==========
    global G, covid_ego
    G = nx.Graph()
    

    for idx, ents in enumerate(df["entities_normalized"]):
        ents_unique = list(set(ents))
        for i in range(len(ents_unique)):
            for j in range(i + 1, len(ents_unique)):
                e1, e2 = ents_unique[i], ents_unique[j]
                if G.has_edge(e1, e2):
                    G[e1][e2]["weight"] += 1
                else:
                    G.add_edge(e1, e2, weight=1)

    print(f"‚úÖ Graph created with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges")


    # ========== COVID EGO EXTRACTION (Now Single Unified Node) ==========
    if "covid" in G.nodes:
        covid_ego = nx.ego_graph(G, "covid", radius=1)
        print(f"ü¶† COVID ego network has {covid_ego.number_of_nodes()} nodes and {covid_ego.number_of_edges()} edges.")
    else:
        print("‚ö†Ô∏è No COVID-related entities found after normalization.")
        covid_ego = None

    
    # ==============================================================
    # VISUALIZATION (Full Graph (comment out now) + COVID Ego Network)
    # ==============================================================

    def visualize_graph(G, title, filename, label_sample_size=25, out_dir=ELITE_OUT_DIR):
        """
        Visualize a networkx graph in a dark, high-quality style.
        Saves as a high-res PNG to the elite's output folder.
        """

        import matplotlib.pyplot as plt
        import networkx as nx
        import random
        from pathlib import Path

        plt.figure(figsize=(20, 15))
        plt.style.use("dark_background")

        # explicitly set dark background for both figure and axes
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('black')
        ax.set_facecolor('black')

        # Layout and styling
        pos = nx.spring_layout(G, k=0.6, iterations=100, seed=42)
        node_size = 30
        edge_width = 4
        edge_color = "#FA8E8E"
        node_color = "#FFFF00"

        # Label sampling
        top_node = max(G.nodes(), key=lambda node: G.degree(node))
        sampled_nodes = random.sample(list(G.nodes()), min(label_sample_size, len(G.nodes())))
        if top_node not in sampled_nodes:
            sampled_nodes.append(top_node)
        labels = {node: node for node in sampled_nodes}
        label_pos = {node: (x, y + 0.05) for node, (x, y) in pos.items() if node in labels}

        # Draw
        nx.draw_networkx_nodes(
            G, pos, node_size=node_size, node_color=node_color,
            edgecolors='white', linewidths=0.2, alpha=0.9
        )
        nx.draw_networkx_edges(
            G, pos, width=edge_width, edge_color=edge_color, alpha=0.5
        )
        nx.draw_networkx_labels(
            G, label_pos, labels=labels, font_size=18,
            font_color='yellow', font_weight='bold'
        )

        plt.title(title, fontsize=18, color='white', pad=20)
        plt.axis("off")
        plt.tight_layout()

        # Save with transparent=False to preserve dark background
        file_path = Path(out_dir) / filename
        plt.savefig(file_path, dpi=400, bbox_inches='tight', facecolor=fig.get_facecolor(), transparent=False)
        plt.close()
        print(f"‚úÖ Saved graph visualization: {file_path}")
        return file_path


    # ---- Generate both visualizations ----
    # if G is not None and G.number_of_nodes() > 0:
        # visualize_graph(G, f"Full Entity Graph ‚Äî {elite_name}", f"{elite_name}_FullGraph.png")

    if covid_ego is not None and covid_ego.number_of_nodes() > 0:
        visualize_graph(covid_ego, f"COVID Ego Network ‚Äî {elite_name}", f"{elite_name}_COVIDego.png")

    print("‚úÖ Visualizations complete.")

    
    # ==============================================================
    # FINAL ULTRA-CLEAN GRAPH METRICS (8 METRICS ONLY)
    # ==============================================================

    def graph_metrics(H: nx.Graph, sample_frac=0.3, seed=42):
        n_nodes = H.number_of_nodes()
        n_edges = H.number_of_edges()

        if n_nodes == 0:
            return {k: np.nan for k in [
                "n_nodes", "n_edges", "density", "clustering", "modularity",
                "centralization_top3", "avg_degree", "avg_closeness", "avg_pagerank"
            ]}

        results = {"n_nodes": n_nodes, "n_edges": n_edges}

        # 1. Density
        results["density"] = nx.density(H)

        # 2. Clustering
        results["clustering"] = nx.average_clustering(H, weight="weight")

        # 3. Modularity
        modularity = np.nan
        try:
            if n_edges > 0 and n_nodes > 2:
                import community as community_louvain
                partition = community_louvain.best_partition(H, weight="weight", random_state=seed)
                modularity = community_louvain.modularity(partition, H, weight="weight")
        except:
            pass
        results["modularity"] = modularity

        # 4. Centralization (top 3)
        deg = dict(H.degree(weight="weight"))
        degvals = sorted(deg.values(), reverse=True)
        results["centralization_top3"] = sum(degvals[:3]) / (sum(degvals) + 1e-12)

        # 5. Avg Degree
        degree_c = nx.degree_centrality(H)
        results["avg_degree"] = np.mean(list(degree_c.values()))

        # 6. Avg Closeness (sampled)
        if n_nodes > 10000000:
            nodes_sample = random.sample(list(H.nodes()), int(n_nodes * sample_frac))
            closeness_c = nx.closeness_centrality(H.subgraph(nodes_sample))
        else:
            closeness_c = nx.closeness_centrality(H)
        results["avg_closeness"] = np.mean(list(closeness_c.values()))

        # 7. Avg PageRank
        pagerank_c = nx.pagerank(H, weight="weight", max_iter=100)
        results["avg_pagerank"] = np.mean(list(pagerank_c.values()))

        return results

    # ==============================================================
    # COMPUTE & PRINT METRICS (ROBUST TO MISSING COVID)
    # ==============================================================
    metrics_full = graph_metrics(G)
    
    # Safely handle missing COVID ego
    if covid_ego is not None and covid_ego.number_of_nodes() > 0:
        metrics_covid = graph_metrics(covid_ego)
        covid_exists = True
    else:
        print("‚ö†Ô∏è No COVID ego network ‚Äî using NaN for COVID metrics.")
        metrics_covid = {k: np.nan for k in metrics_full.keys()}
        covid_exists = False

    # Safe comparison table
    comparison = pd.DataFrame({
        "Metric": metrics_full.keys(),
        "Full": metrics_full.values(),
        "COVID": list(metrics_covid.values()),
    })
    comparison["COVID/Full"] = comparison["COVID"] / (comparison["Full"] + 1e-12)
    print(comparison.round(4))


    # === SETTINGS (adjust if needed) ===
    N_PERMUTATIONS = 1000
    RANDOM_STATE = 42
    METRICS_TO_TEST = [
        "n_nodes",
        "n_edges",
        "density",
        "clustering",
        "modularity",
        "centralization_top3",
        "avg_degree",
        "avg_closeness",
        "avg_pagerank"
    ]

    # ==============================================================
    # PERMUTATION TEST (ONLY IF COVID EGO EXISTS)
    # ==============================================================
    if covid_exists:
        print(f"\nRunning {N_PERMUTATIONS} permutations...")
        observed_size = len(covid_ego.nodes())
        obs_metrics = metrics_covid
        perm_results = {m: {"null": [], "obs": obs_metrics.get(m, np.nan)} for m in METRICS_TO_TEST}
        rng = np.random.RandomState(RANDOM_STATE)
        all_nodes = list(G.nodes())

        summary_rows = []
        for i in tqdm(range(N_PERMUTATIONS), desc="Permutations", leave=False, ncols=80):
            sample_nodes = set(rng.choice(all_nodes, size=observed_size, replace=False))
            sG = G.subgraph(sample_nodes).copy()
            m = graph_metrics(sG)
            for metric in METRICS_TO_TEST:
                perm_results[metric]["null"].append(m.get(metric, np.nan))

        for metric in METRICS_TO_TEST:
            null_vals = np.array(perm_results[metric]["null"], dtype=np.float64)
            null_vals = null_vals[~np.isnan(null_vals)]
            obs_val = perm_results[metric]["obs"]
            if len(null_vals) == 0 or np.isnan(obs_val):
                p_val = np.nan; null_mean = np.nan; null_std = np.nan
            else:
                null_mean = float(null_vals.mean())
                null_std = float(null_vals.std(ddof=1))
                p_val = (np.sum(np.abs(null_vals - null_mean) >= abs(obs_val - null_mean)) + 1) / (len(null_vals) + 1)
            summary_rows.append({
                "metric": metric,
                "obs_value": obs_val,
                "null_mean": null_mean,
                "null_std": null_std,
                "p_two_sided": p_val,
                "obs_minus_null_mean": (obs_val - null_mean) if not np.isnan(obs_val) else np.nan,
                "ratio_to_null_mean": (obs_val / (null_mean + 1e-12)) if not np.isnan(obs_val) else np.nan,
                "n_perm": len(null_vals)
            })
        summary_df = pd.DataFrame(summary_rows)
    else:
        print("‚ö†Ô∏è Skipping permutation test ‚Äî no COVID ego network.")
        summary_df = pd.DataFrame([{
            "metric": m,
            "obs_value": np.nan,
            "null_mean": np.nan,
            "null_std": np.nan,
            "p_two_sided": np.nan,
            "obs_minus_null_mean": np.nan,
            "ratio_to_null_mean": np.nan,
            "n_perm": 0
        } for m in METRICS_TO_TEST])

    # ==============================================================
    # SAVE NETWORKS + RESULTS (ALWAYS RUN)
    # ==============================================================
    print(f"\nSaving outputs to: {ELITE_OUT_DIR}")

    # 1. Save Full Graph (ALWAYS)
    try:
        full_gml_path = ELITE_OUT_DIR / f"{elite_name}_FullGraph.gml"
        nx.write_gml(G, full_gml_path)
        print(f"‚úÖ FullGraph.gml saved ({full_gml_path.stat().st_size:,} bytes)")
    except Exception as e:
        print(f"‚ùå Failed to save FullGraph.gml: {e}")

    # 2. Save COVID Ego (ONLY IF EXISTS)
    if covid_exists:
        try:
            covid_gml_path = ELITE_OUT_DIR / f"{elite_name}_COVIDego.gml"
            nx.write_gml(covid_ego, covid_gml_path)
            print(f"‚úÖ COVIDego.gml saved")
        except Exception as e:
            print(f"‚ùå Failed to save COVIDego.gml: {e}")

        # Save COVID PNG
        try:
            visualize_graph(covid_ego, f"COVID Ego Network ‚Äî {elite_name}", f"{elite_name}_COVIDego.png", out_dir=ELITE_OUT_DIR)
        except Exception as e:
            print(f"‚ùå Failed to save COVIDego.png: {e}")
    else:
        print("‚ö†Ô∏è No COVID ego ‚Üí skipping COVIDego.gml and .png")

    # 3. Save Excel Report (ALWAYS)
    try:
        comparison_df = comparison.copy()
        comparison_df.rename(columns={"Metric": "metric"}, inplace=True)
        comparison_df.set_index("metric", inplace=True)

        combined = comparison_df.join(summary_df.set_index("metric"), how="outer")
        excel_path = ELITE_OUT_DIR / f"{elite_name}_Centrality_and_PermutationSummary.xlsx"

        with pd.ExcelWriter(excel_path, engine="openpyxl") as writer:
            combined.to_excel(writer, sheet_name="Comparison + Permutation")
            comparison_df.to_excel(writer, sheet_name="COVID_vs_Full")
            summary_df.to_excel(writer, sheet_name="Permutation_Summary")

        print(f"‚úÖ Excel report saved: {excel_path}")
    except Exception as e:
        print(f"‚ùå Failed to save Excel report: {e}")
        import traceback; traceback.print_exc()

In [None]:
# ==============================================================
# RUN PIPELINE FOR ALL ELITES (start with Politicians)
# ==============================================================

from pathlib import Path

subfolders_in_order = ["Politicians", "Businessmen", "Celebrities"]

excel_files = []
for sub in subfolders_in_order:
    sub_dir = BASE_INPUT_DIR / sub
    # ‚úÖ this pattern finds files inside each Elite's "Output" subfolder
    excel_files.extend(sorted(sub_dir.rglob("Output/*.xlsx")))

print(f"üìä Found {len(excel_files)} elite Excel files to process (starting with Politicians).")


for i, file_path in enumerate(excel_files, start=1):
    elite_name = file_path.stem
    elite_output_dir = BASE_OUTPUT_DIR / elite_name
    elite_output_dir.mkdir(parents=True, exist_ok=True)

    if any(elite_output_dir.iterdir()):
        print(f"Skipping {elite_name} (already processed)")
        continue

    print(f"\n[{i}/{len(excel_files)}] Processing: {elite_name}")
    try:
        analyze_elite(file_path, elite_output_dir, INTERVENTION_DATE)
    except Exception as e:
        print(f"Error processing {elite_name}: {e}")
        import traceback
        traceback.print_exc()
        continue


print("\nüéØ All elites processed successfully.")