In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/spartan0-1/tensorrtllm/default/2/h1_agent_safety.py
/kaggle/input/spartan0-1/tensorrtllm/default/2/ppf_colab_notebook_primes__physics.md
/kaggle/input/spartan0-1/tensorrtllm/default/2/model.json
/kaggle/input/spartan0-1/tensorrtllm/default/2/lambda_pipeline_fixed.py
/kaggle/input/spartan0-1/tensorrtllm/default/2/harmonic_matrix_full.csv
/kaggle/input/spartan0-1/tensorrtllm/default/2/lambda_fragility_benchmark_1024bit.json
/kaggle/input/spartan0-1/tensorrtllm/default/2/h1_novel_predictions.py
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/reference.csv
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/AIMO3_Reference_Problems.pdf
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/sample_submission.csv
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/test.csv
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/kaggle_evaluation/aimo_3_inference_server.py
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/kaggle_evaluation/aimo_3_gateway.p

In [2]:
import os
import sys
import importlib
import pandas as pd

# Re-define ROOT_DIR and create_file for self-containment in this step
ROOT_DIR = "./Prometheus_AIMO"
def create_file(rel_path, content):
    path = os.path.join(ROOT_DIR, rel_path)
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, "w", encoding='utf-8') as f:
        f.write(content.strip())
    print(f"  [+] Created: {rel_path}")

# --- 1. Define Python code content for pipeline/factor/prime_hunter.py ---
FILE_PRIME_HUNTER = r'''
import math
import random

def is_prime(n):
    """
    Checks if an integer n is prime using an optimized trial division method.
    """
    if n < 2:
        return False
    if n == 2 or n == 3:
        return True
    if n % 2 == 0 or n % 3 == 0:
        return False

    i = 5
    while i * i <= n:
        if n % i == 0 or n % (i + 2) == 0:
            return False
        i += 6
    return True

def trial_division_factorize(n):
    """
    Factorizes an integer n into its prime factors using trial division.
    """
    factors = []

    # Handle factors of 2
    while n % 2 == 0:
        factors.append(2)
        n //= 2

    # Handle factors of 3
    while n % 3 == 0:
        factors.append(3)
        n //= 3

    # Handle factors from 5 onwards (i, i+2, i+4, ...) optimized as (i, i+2) with step 6
    i = 5
    while i * i <= n:
        while n % i == 0:
            factors.append(i)
            n //= i
        while n % (i + 2) == 0:
            factors.append(i + 2)
            n //= (i + 2)
        i += 6

    # If n is still greater than 1, it must be a prime factor itself
    if n > 1:
        factors.append(n)

    return factors

def pollard_rho_factorize(n, max_iterations=100000):
    """
    Finds a non-trivial factor of n using Pollard's Rho algorithm.
    Returns a factor if found, otherwise None.
    """
    if n % 2 == 0:
        return 2
    if n <= 1 or is_prime(n):
        return None # n is prime or too small/invalid for this algorithm to find a *non-trivial* factor

    x = random.randint(2, n - 1)
    y = x
    c = random.randint(1, n - 1)
    d = 1 # gcd(x - y, n)

    # Function for sequence generation (x^2 + c) mod n
    def f(val):
        return (val * val + c) % n

    for _ in range(max_iterations):
        x = f(x)
        y = f(f(y))
        d = math.gcd(abs(x - y), n)

        if d == n:
            # Cycle detected, but factor is n itself. Try again with a different c or starting x.
            # For simplicity, we'll return None for now, as re-starting is complex within this function.
            return None
        elif d != 1:
            return d

    return None # No factor found within max_iterations

def is_semiprime(n):
    """
    Checks if an integer n is a semiprime (a product of exactly two prime numbers).
    """
    if n < 4:
        return False
    if is_prime(n):
        return False

    factors = trial_division_factorize(n)
    return len(factors) == 2

def prime_hunter_decompose(n):
    """
    Orchestrates the factorization process for a given integer n.
    Returns a dictionary containing prime factors, method used, and crypto asset status.
    """
    # Ensure original_number is always included and handle non-positive/non-integer inputs correctly.
    if not isinstance(n, int):
        return {"factors": [], "method": "Invalid Input (Not Integer)", "is_crypto_asset": False, "original_number": n}
    if n < 2:
        return {"factors": [], "method": "Invalid Input for Factorization", "is_crypto_asset": False, "original_number": n}

    initial_n = n
    factors = []
    method_used = ""
    is_crypto_asset = False

    # 1. Check if n is prime
    if is_prime(n):
        method_used = "Prime Check"
        factors.append(n)
    else:
        # 2. Attempt trial division
        td_factors = trial_division_factorize(n)
        if td_factors and math.prod(td_factors) == n:
            factors.extend(td_factors)
            method_used = "Trial Division"
        else:
            # Trial division didn't fully factorize, or left a composite
            remaining_n = n
            temp_factors = []
            if td_factors:
                for f in td_factors:
                    temp_factors.append(f)
                    remaining_n //= f

            # If there's still a composite remainder, try Pollard's Rho
            if remaining_n > 1:
                method_used = "Pollard's Rho + Recursive Decomposition"
                queue = [remaining_n]
                while queue:
                    current_num = queue.pop(0)
                    if is_prime(current_num):
                        temp_factors.append(current_num)
                        continue

                    factor = pollard_rho_factorize(current_num)
                    if factor and factor != 1 and factor != current_num:
                        queue.append(factor)
                        queue.append(current_num // factor)
                    else:
                        # Fallback for Pollard's Rho failure or non-trivial factor
                        if is_prime(current_num):
                            temp_factors.append(current_num)
                        else:
                            td_recheck_factors = trial_division_factorize(current_num)
                            temp_factors.extend(td_recheck_factors)
            else:
                method_used = "Trial Division"
            factors.extend(temp_factors)

    # Ensure factors are sorted
    factors.sort()

    # Check for semiprime status of initial_n
    if initial_n >= 4 and is_semiprime(initial_n):
        is_crypto_asset = True

    return {
        "factors": factors,
        "method": method_used,
        "is_crypto_asset": is_crypto_asset,
        "original_number": initial_n
    }
'''

# --- 2. Define Python code content for pipeline/make_corpus.py ---
FILE_MAKE_CORPUS = r'''
import json
import random
import os

def generate_rsa_candidates(num_candidates, min_val=10, max_val=10000000):
    """
    Generates synthetic RSA-like key candidates (semiprimes for now).
    Updated min_val and max_val to create a more diverse range of numbers.
    """
    candidates = []
    for i in range(num_candidates):
        candidate = random.randint(min_val, max_val)
        candidates.append({"id": i + 1, "n": candidate, "original_problem": f"Analyze the RSA key candidate: {candidate}"})
    return candidates


def save_corpus(corpus_data, filepath="pipeline/data/corpus.json"):
    """
    Saves the generated corpus data to a JSON file.
    """
    os.makedirs(os.path.dirname(filepath), exist_ok=True)
    with open(filepath, "w", encoding='utf-8') as f:
        json.dump(corpus_data, f, indent=4)
    print(f"Corpus saved to {filepath}")

if __name__ == "__main__":
    # Example usage
    corpus = generate_rsa_candidates(num_candidates=50, min_val=10, max_val=10000000)
    save_corpus(corpus, filepath="./Prometheus_AIMO/pipeline/data/corpus.json")
'''

# --- 3. Define Python code content for pipeline/extract_features.py ---
FILE_EXTRACT_FEATURES = r'''
import json
import pandas as pd
import math
import os
from pipeline.factor.prime_hunter import prime_hunter_decompose # Import the prime_hunter_decompose function

def load_corpus(filepath="pipeline/data/corpus.json"):
    """
    Loads corpus data from a JSON file.
    """
    with open(filepath, "r", encoding='utf-8') as f:
        corpus_data = json.load(f)
    print(f"Corpus loaded from {filepath}")
    return corpus_data

def extract_numerical_features(candidate_data):
    """
    Extracts numerical features from a candidate (e.g., bit length, number of digits),
    integrates prime factorization information, and calculates predictive triage features.
    """
    features = []
    for item in candidate_data:
        n = item["n"]
        if isinstance(n, int) and n > 0:
            bit_length = n.bit_length()
            num_digits = len(str(n))

            # Integrate prime_hunter_decompose
            decomposition_result = prime_hunter_decompose(n)
            is_crypto_asset = decomposition_result.get("is_crypto_asset", False)
            factors_str = str(decomposition_result.get("factors", [])) # Convert list to string for CSV

            # Calculate Normalized Delta (bit_length to num_digits ratio)
            normalized_delta = bit_length / num_digits if num_digits > 0 else 0.0

            # Calculate Helix Value (a heuristic composite score)
            # Higher for larger non-semiprimes, indicating potential difficulty
            helix_value = (bit_length * (1 - int(is_crypto_asset))) # Convert bool to int for calculation

            features.append({
                "id": item["id"],
                "n": n,
                "bit_length": bit_length,
                "num_digits": num_digits,
                "is_crypto_asset": is_crypto_asset,
                "factors": factors_str,
                "normalized_delta": normalized_delta,
                "helix_value": helix_value
            })
    return features

def save_features(features_data, filepath="pipeline/data/features.csv"):
    """
    Saves extracted features to a CSV file.
    """
    os.makedirs(os.path.dirname(filepath), exist_ok=True)
    with open(filepath, "w", encoding='utf-8') as f:
        df = pd.DataFrame(features_data)
        df.to_csv(filepath, index=False)
    print(f"Features saved to {filepath}")

if __name__ == "__main__":
    # Example usage
    corpus = load_corpus(filepath="./Prometheus_AIMO/pipeline/data/corpus.json")
    features = extract_numerical_features(corpus)
    save_features(features, filepath="./Prometheus_AIMO/pipeline/data/features.csv")
'''

# --- 4. Define Python code content for pipeline/triage.py ---
FILE_TRIAGE = r'''
import pandas as pd
import os
import math # Added import math
import re # Added import re to parse factors string

def load_features(filepath="pipeline/data/features.csv"):
    """
    Loads features from a CSV file.
    """
    return pd.read_csv(filepath)

def simple_triage(features_df):
    """
    Implements a simple triage mechanism based on features (e.g., 'bond strength').
    Updated thresholds to better classify a wider range of bit_lengths into Easy, Medium, and Hard.
    Also introduces multi-dimensional triage for 'Crypto Asset' candidates.
    """
    triage_results = []
    for index, row in features_df.iterrows():
        n = row["n"]
        bit_length = row["bit_length"]
        is_crypto_asset = row["is_crypto_asset"]
        factors_str = row["factors"]

        bond_strength = ""
        triage_category = ""

        if is_crypto_asset:
            # Parse factors string (e.g., "[p, q]")
            # Use regex to find numbers within brackets
            parsed_factors = [int(f) for f in re.findall(r'\d+', factors_str)]

            if len(parsed_factors) == 2: # Ensure exactly two factors for semiprime
                p, q = sorted(parsed_factors) # Ensure p <= q

                # Multi-dimensional triage for Crypto Assets
                if p == q: # Perfect square, e.g., 25 = 5*5
                    bond_strength = "Very High"
                    triage_category = "Factorable (Square Root)"
                elif abs(p - q) < 5 * math.log(n): # Factors are close, amenable to Fermat's method
                    bond_strength = "High"
                    triage_category = "Factorable (Fermat)"
                elif p < 1000: # One factor is small, amenable to Trial Division optimizations
                    bond_strength = "Medium"
                    triage_category = "Factorable (Trial Division Optimized)"
                else: # Harder semiprime cases
                    bond_strength = "High"
                    triage_category = "Hard (Semiprime)"
            else:
                # Should ideally not happen if is_semiprime is accurate, but as a fallback
                bond_strength = "Medium"
                triage_category = "Medium (Semiprime - factors unclear)"
        else: # Not a crypto asset (not a semiprime)
            # Existing bit_length-based classification
            if bit_length < 12: # e.g., numbers up to 2^12 - 1 = 4095
                bond_strength = "Low"
                triage_category = "Easy"
            elif bit_length < 24: # e.g., numbers up to 2^24 - 1 = 16,777,215
                bond_strength = "Medium"
                triage_category = "Medium"
            else: # Larger numbers
                bond_strength = "High"
                triage_category = "Hard"

        triage_results.append({
            "id": row["id"],
            "n": n,
            "bit_length": bit_length,
            "bond_strength": bond_strength,
            "triage_category": triage_category,
            "is_crypto_asset": is_crypto_asset,
            "factors": factors_str
        })
    return pd.DataFrame(triage_results)

def save_triage_results(triage_df, filepath="pipeline/data/triage_results.csv"):
    """
    Saves triage results to a CSV file.
    """
    os.makedirs(os.path.dirname(filepath), exist_ok=True)
    with open(filepath, "w", encoding='utf-8') as f:
        triage_df.to_csv(filepath, index=False)
    print(f"Triage results saved to {filepath}")

if __name__ == "__main__":
    # Example usage
    features_df = load_features(filepath="./Prometheus_AIMO/pipeline/data/features.csv")
    triage_df = simple_triage(features_df)
    save_triage_results(triage_df, filepath="./Prometheus_AIMO/pipeline/data/triage_results.csv")
'''

# --- 5. Define Python code content for pipeline/predictive_triage.py ---
FILE_PREDICTIVE_TRIAGE = r'''
import pandas as pd
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold # Changed to StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import numpy as np # Added for array operations

def predictive_triage(features_df, triage_results_df, n_splits=5):
    """
    Implements a machine learning-based predictive triage system with K-Fold Cross-Validation.
    Trains a RandomForestClassifier to predict 'triage_category' based on numerical features.
    """
    print(f"Running predictive triage with ML model using {n_splits}-Fold Cross-Validation...")

    # Merge features with actual triage results to get the target variable
    merged_df = pd.merge(features_df, triage_results_df[['id', 'triage_category']], on='id', how='left')

    # Define features and target
    # Convert is_crypto_asset to int for the model
    X = merged_df[['bit_length', 'num_digits', 'is_crypto_asset', 'normalized_delta', 'helix_value']].copy()
    X['is_crypto_asset'] = X['is_crypto_asset'].astype(int)
    y = merged_df['triage_category']

    # Encode target labels to numerical values
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)

    # Initialize K-Fold Cross-Validation
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    all_predictions = np.zeros(len(y_encoded)) # To store predictions for each sample
    all_probabilities = np.zeros((len(y_encoded), len(le.classes_))) # To store probabilities

    fold_reports = []

    for fold, (train_index, test_index) in enumerate(skf.split(X, y_encoded)):
        print(f"  Processing Fold {fold+1}/{n_splits}...")
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y_encoded[train_index], y_encoded[test_index]

        model = RandomForestClassifier(n_estimators=100, random_state=42) # Re-initialize for each fold
        model.fit(X_train, y_train)

        fold_predictions = model.predict(X_test)
        fold_probabilities_raw = model.predict_proba(X_test) # Raw probabilities from model for this fold

        # Map fold_probabilities_raw to the full set of classes in all_probabilities
        fold_probabilities_mapped = np.zeros((len(y_test), len(le.classes_))) # Temporary array matching full shape
        for i, class_label_encoded in enumerate(model.classes_):
            # Find the column index in the full 'all_probabilities' array for this class
            global_class_idx = np.where(le.classes_ == le.inverse_transform([class_label_encoded]))[0][0]
            fold_probabilities_mapped[:, global_class_idx] = fold_probabilities_raw[:, i]

        all_predictions[test_index] = fold_predictions
        all_probabilities[test_index] = fold_probabilities_mapped

        # Optional: Store fold-specific report
        # FIX: Ensure classification_report uses all known labels and target names
        fold_report = classification_report(y_test, fold_predictions, labels=le.transform(le.classes_), target_names=le.classes_, output_dict=True, zero_division=0)
        fold_reports.append(fold_report)

    # Calculate overall classification report from all out-of-fold predictions
    # FIX: Ensure classification_report uses all known labels and target names
    overall_report_str = classification_report(y_encoded, all_predictions, labels=le.transform(le.classes_), target_names=le.classes_, zero_division=0)
    print("\nOverall Cross-Validation Classification Report:")
    print(overall_report_str)

    # Add predictions to the features_df
    features_df['predicted_triage_category'] = le.inverse_transform(all_predictions.astype(int))

    # Assign a predictive score based on probability of the most difficult category
    # Identify the 'Hardest' category in the sorted classes by the LabelEncoder
    # This assumes a consistent ordering or a specific definition of 'hardest'.
    # For this example, let's look for 'Hard', 'Hard (Semiprime)', or 'Predicted_Hard'.

    hardest_class_indices = []
    if 'Hard' in le.classes_: hardest_class_indices.append(np.where(le.classes_ == 'Hard')[0][0])
    if 'Hard (Semiprime)' in le.classes_: hardest_class_indices.append(np.where(le.classes_ == 'Hard (Semiprime)')[0][0])
    if 'Predicted_Hard' in le.classes_: hardest_class_indices.append(np.where(le.classes_ == 'Predicted_Hard')[0][0])

    # FIX: Handle case where hardest_class_indices might be empty (e.g., if no hard categories in this dataset)
    if hardest_class_indices and all_probabilities.shape[1] > 0:
        # Ensure we only try to index columns that exist in all_probabilities
        valid_hardest_indices = [idx for idx in hardest_class_indices if idx < all_probabilities.shape[1]]
        if valid_hardest_indices:
            features_df['predicted_triage_score'] = np.max(all_probabilities[:, valid_hardest_indices], axis=1) # Take max probability if multiple hardest classes
        else:
            features_df['predicted_triage_score'] = 0.0
    else:
        features_df['predicted_triage_score'] = 0.0 # Default if no 'Hard' class is found or no probabilities

    print("Predictive triage applied successfully with Cross-Validation.")
    return features_df, overall_report_str

def save_predictive_triage_results(predictive_df, filepath="pipeline/data/predictive_triage_results.csv"):
    """
    Saves predictive triage results to a CSV file.
    """
    os.makedirs(os.path.dirname(filepath), exist_ok=True)
    with open(filepath, "w", encoding='utf-8') as f:
        predictive_df.to_csv(filepath, index=False)
    print(f"Predictive triage results saved to {filepath}")


if __name__ == "__main__":
    # Example usage (assuming features.csv and triage_results.csv exist)
    try:
        features_filepath = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "features.csv")
        triage_results_filepath = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "triage_results.csv")

        features_df_loaded = pd.read_csv(features_filepath)
        triage_results_df_loaded = pd.read_csv(triage_results_filepath)

        predicted_df, cv_report = predictive_triage(features_df_loaded.copy(), triage_results_df_loaded, n_splits=5)
        save_predictive_triage_results(predicted_df, filepath="./Prometheus_AIMO/pipeline/data/predicted_features.csv")

        print("\nPredictive triage results head:")
        print(predicted_df.head())

        # This will now include the overall_report_str directly from the function call

    except FileNotFoundError as e:
        print(f"Error: Missing file for predictive triage: {e}")
        print("Please ensure extract_features.py and triage.py have been run first.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
'''

# --- 6. Define Python code content for pipeline/orchestration.py ---
FILE_ORCHESTRATION = r'''
import pandas as pd
import random
import os
import time # For simulating process time

def orchestrate_factoring(predicted_features_df):
    """
    Simulates the application of factoring algorithms based on predicted_triage_category.
    Records simulated factoring_time and factoring_success_status.
    """
    print("Orchestrating factoring simulations...")
    factoring_results = []

    for index, row in predicted_features_df.iterrows():
        category = row['predicted_triage_category']
        factoring_time = 0.0
        factoring_success_status = False

        # Simulate factoring based on category
        if category == 'Factorable (Square Root)':
            factoring_time = random.uniform(0.01, 0.1) # Very fast
            factoring_success_status = True
        elif category == 'Factorable (Fermat)':
            factoring_time = random.uniform(0.1, 0.5) # Fast
            factoring_success_status = True
        elif category == 'Factorable (Trial Division Optimized)':
            factoring_time = random.uniform(0.05, 0.3) # Moderate speed for small factors
            factoring_success_status = True
        elif category == 'Medium' or category == 'Medium (Semiprime - factors unclear)':
            factoring_time = random.uniform(0.5, 2.0) # Slower, more effort
            factoring_success_status = random.choice([True, False, False]) # 1/3 chance of failure
        elif category == 'Hard' or category == 'Hard (Semiprime)':
            factoring_time = random.uniform(2.0, 10.0) # Much slower, high effort
            factoring_success_status = random.choice([True, False, False, False, False]) # 1/5 chance of success
        else:
            # Default for any unhandled categories
            factoring_time = random.uniform(0.1, 1.0)
            factoring_success_status = random.choice([True, False])

        factoring_results.append({
            'id': row['id'],
            'factoring_time': round(factoring_time, 3),
            'factoring_success_status': factoring_success_status
        })

    results_df = pd.DataFrame(factoring_results)
    # Merge original features with new factoring results
    final_df = pd.merge(predicted_features_df, results_df, on='id', how='left')
    print("Factoring orchestration simulation complete.")
    return final_df

if __name__ == '__main__':
    # Define file paths
    ROOT_DIR = "./Prometheus_AIMO"
    predicted_features_filepath = os.path.join(ROOT_DIR, "pipeline", "data", "predicted_features.csv")
    factoring_results_filepath = os.path.join(ROOT_DIR, "pipeline", "data", "factoring_results.csv")

    # Ensure output directory exists
    os.makedirs(os.path.dirname(factoring_results_filepath), exist_ok=True)

    try:
        # Load predicted_features.csv
        predicted_df = pd.read_csv(predicted_features_filepath)
        print(f"Loaded predicted features from {predicted_features_filepath}. Shape: {predicted_df.shape}")

        # Orchestrate factoring
        factored_df = orchestrate_factoring(predicted_df)

        # Save results to factoring_results.csv
        factored_df.to_csv(factoring_results_filepath, index=False)
        print(f"Saved factoring results to {factoring_results_filepath}. Shape: {factored_df.shape}")

        print("\n--- Factoring Results Head ---")
        print(factored_df.head())

    except FileNotFoundError:
        print(f"Error: Missing file. Ensure {predicted_features_filepath} exists.")
    except Exception as e:
        print(f"An error occurred during orchestration: {e}")
'''

# --- 7. Define Python code content for pipeline/report/reporter.py (UPDATED) ---
FILE_REPORTER = r'''
import pandas as pd
import os
from datetime import datetime
# No matplotlib.pyplot for text-based graph

def generate_factoring_summary_report(factoring_results_filepath="pipeline/data/factoring_results.csv",
                                      output_dir="pipeline/data/outputs"):
    """
    Generates a comprehensive summary report based on factoring orchestration results,
    including efficiency metrics and a simulated 'Money Graph'.
    """
    try:
        # Load data - factoring_results.csv already contains predicted_triage_category
        print(f"DEBUG: Reporter loading factoring results from: {factoring_results_filepath}")
        merged_df = pd.read_csv(factoring_results_filepath) # Direct load of the complete dataframe
        print(f"DEBUG: Merged DataFrame columns: {merged_df.columns.tolist()}")

        # Calculate efficiency metrics
        total_candidates = len(merged_df)
        successful_factorizations = merged_df['factoring_success_status'].sum()
        success_percentage = (successful_factorizations / total_candidates) * 100 if total_candidates > 0 else 0

        # Metrics per predicted_triage_category
        category_metrics = merged_df.groupby('predicted_triage_category').agg(
            total_count=('id', 'size'),
            successful_count=('factoring_success_status', lambda x: x.sum()),
            avg_time=('factoring_time', 'mean')
        ).reset_index()
        category_metrics['success_rate'] = (category_metrics['successful_count'] / category_metrics['total_count']) * 100

        # Simulate 'Money Graph' - textual representation of ROI
        # Assume 'investment' is factoring time, 'return' is successful factorization
        total_investment = merged_df['factoring_time'].sum()
        successful_investment = merged_df[merged_df['factoring_success_status']]['factoring_time'].sum()

        money_graph_str = """
## Simulated 'Money Graph' (Efficiency Overview):

Total Investment (Simulated Factoring Time): {total_investment:.2f} units
Successful Investment (Time on successful factors): {successful_investment:.2f} units

[Investment] {investment_bar}
[Return   ] {return_bar}
"""
        bar_length = 50
        # Correct calculation of investment_ratio (should always be 1 for total investment bar)
        investment_ratio_display = 1.0 # The investment bar always represents 100% of total investment
        return_ratio_display = successful_investment / (total_investment + 1e-9) if total_investment > 0 else 0

        investment_bar = '#' * int(bar_length * investment_ratio_display) + '-' * (bar_length - int(bar_length * investment_ratio_display))
        return_bar = '*' * int(bar_length * return_ratio_display) + ' ' * (bar_length - int(bar_length * return_ratio_display))

        money_graph_content = money_graph_str.format(
            total_investment=total_investment,
            successful_investment=successful_investment,
            investment_bar=investment_bar,
            return_bar=return_bar
        )

        # Compile comprehensive report
        report_content = f"""
# Prometheus AIMO Factoring Efficiency Report

Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

## Overall Factoring Metrics:
- Total Candidates Processed: {total_candidates}
- Successful Factorizations: {int(successful_factorizations)} ({success_percentage:.2f}%)
- Total Simulated Factoring Time: {total_investment:.2f} units

## Metrics per Predicted Triage Category:
{category_metrics.to_markdown(index=False)}

{money_graph_content}

## Raw Data Head (Factoring results):
{merged_df.head().to_markdown(index=False)}
"""

        os.makedirs(output_dir, exist_ok=True)
        report_filepath = os.path.join(output_dir, f"factoring_summary_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
        with open(report_filepath, "w", encoding='utf-8') as f:
            f.write(report_content)
        print(f"Factoring summary report generated at {report_filepath}")
        return report_filepath
    except FileNotFoundError as e:
        print(f"Error: Missing file for report generation: {e}")
        return None
    except Exception as e:
        print(f"An error occurred during report generation: {e}")
        return None

if __name__ == "__main__":
    # Example usage for generate_factoring_summary_report
    ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) # Adjust ROOT_DIR for standalone execution
    factoring_results_filepath_main = os.path.join(ROOT_DIR, "pipeline", "data", "factoring_results.csv") # Only need factoring results
    output_dir_main = os.path.join(ROOT_DIR, "pipeline", "data", "outputs")

    if os.path.exists(factoring_results_filepath_main):
        generate_factoring_summary_report(
            factoring_results_filepath=factoring_results_filepath_main,
            output_dir=output_dir_main
        )
    else:
        print(f"Error: Required input file for report not found. Make sure all previous pipeline steps have run.")

    # The original generate_summary_report might still be used for simple triage reports if needed separately.
    # generate_summary_report(
    #    triage_results_filepath="./Prometheus_AIMO/pipeline/data/triage_results.csv",
    #    output_dir="./Prometheus_AIMO/pipeline/data/outputs"
    #)
'''

print("\n▶ Writing all pipeline source files with updated reporter.py...")
create_file("pipeline/factor/prime_hunter.py", FILE_PRIME_HUNTER)
create_file("pipeline/make_corpus.py", FILE_MAKE_CORPUS)
create_file("pipeline/extract_features.py", FILE_EXTRACT_FEATURES)
create_file("pipeline/triage.py", FILE_TRIAGE)
create_file("pipeline/predictive_triage.py", FILE_PREDICTIVE_TRIAGE)
create_file("pipeline/orchestration.py", FILE_ORCHESTRATION) # Include orchestration.py as it's now part of the pipeline
create_file("pipeline/report/reporter.py", FILE_REPORTER)
print("  ✅ All pipeline source files created.")

# Adjust sys.path to allow importing from pipeline
project_root = os.path.abspath(ROOT_DIR)
if project_root not in sys.path:
    sys.path.insert(0, project_root)

# --- Reload modules ---
# Reload relevant modules to ensure changes are active
print("\n▶ Reloading pipeline modules to ensure changes are active...")
try:
    import pipeline.make_corpus as make_corpus_module
    importlib.reload(make_corpus_module)
    print("  ✅ Reloaded pipeline.make_corpus")
except Exception as e:
    print(f"  ❌ Could not reload pipeline.make_corpus: {e}")
try:
    import pipeline.factor.prime_hunter as prime_hunter_module
    importlib.reload(prime_hunter_module)
    print("  ✅ Reloaded pipeline.factor.prime_hunter")
except Exception as e:
    print(f"  ❌ Could not reload pipeline.factor.prime_hunter: {e}")
try:
    import pipeline.extract_features as extract_features_module
    importlib.reload(extract_features_module)
    print("  ✅ Reloaded pipeline.extract_features")
except Exception as e:
    print(f"  ❌ Could not reload pipeline.extract_features: {e}")
try:
    import pipeline.triage as triage_module
    importlib.reload(triage_module)
    print("  ✅ Reloaded pipeline.triage")
except Exception as e:
    print(f"  ❌ Could not reload pipeline.triage: {e}")
try:
    import pipeline.predictive_triage as predictive_triage_module
    importlib.reload(predictive_triage_module)
    print("  ✅ Reloaded pipeline.predictive_triage")
except Exception as e:
    print(f"  ❌ Could not reload pipeline.predictive_triage: {e}")
try:
    import pipeline.orchestration as orchestration_module
    importlib.reload(orchestration_module)
    print("  ✅ Reloaded pipeline.orchestration")
except Exception as e:
    print(f"  ❌ Could not reload pipeline.orchestration: {e}")
try:
    import pipeline.report.reporter as reporter_module
    importlib.reload(reporter_module)
    print("  ✅ Reloaded pipeline.report.reporter")
except Exception as e:
    print(f"  ❌ Could not reload pipeline.report.reporter: {e}")

print("  ✅ All relevant modules reloaded.")

# --- Re-run Corpus Generation with updated range ---
print("\n--- Re-running Corpus Generation ---")
corpus_data = make_corpus_module.generate_rsa_candidates(num_candidates=50, min_val=10, max_val=10000000)
corpus_filepath = os.path.join(ROOT_DIR, "pipeline", "data", "corpus.json")
make_corpus_module.save_corpus(corpus_data, filepath=corpus_filepath)

# --- Re-run Feature Extraction with updated logic ---
print("\n--- Re-running Feature Extraction ---")
features_data = extract_features_module.extract_numerical_features(corpus_data)
features_filepath = os.path.join(ROOT_DIR, "pipeline", "data", "features.csv")
extract_features_module.save_features(features_data, filepath=features_filepath)

# --- Re-run Triage with Multi-Dimensional Categories ---
print("\n--- Re-running Triage ---")
features_df_triage = triage_module.load_features(filepath=features_filepath)
triage_df = triage_module.simple_triage(features_df_triage)
triage_results_filepath = os.path.join(ROOT_DIR, "pipeline", "data", "triage_results.csv")
triage_module.save_triage_results(triage_df, filepath=triage_results_filepath)

# --- Re-run Predictive Triage with ML Model and Cross-Validation ---
print("\n--- Re-running Predictive Triage Stage (with Cross-Validation) ---")
predicted_features_filepath = os.path.join(ROOT_DIR, "pipeline", "data", "predicted_features.csv")

try:
    features_df_ml = pd.read_csv(features_filepath)
    triage_results_df_ml = pd.read_csv(triage_results_filepath)
    print(f"Loaded features from {features_filepath}. Shape: {features_df_ml.shape}")
    print(f"Loaded triage results from {triage_results_filepath}. Shape: {triage_results_df_ml.shape}")

    predicted_features_df, cv_report = predictive_triage_module.predictive_triage(features_df_ml.copy(), triage_results_df_ml, n_splits=5)
    predictive_triage_module.save_predictive_triage_results(predicted_features_df, filepath=predicted_features_filepath)

    print(f"\n✅ Successfully generated predictive triage results: {predicted_features_filepath}")
    print(f"File size: {os.path.getsize(predicted_features_filepath)} bytes")

    print("\nCross-Validation Report:")
    print(cv_report)

except FileNotFoundError:
    print(f"❌ Error: Missing file. Ensure features.csv and triage_results.csv exist.")
except Exception as e:
    print(f"❌ An error occurred during predictive triage execution: {e}")

# --- Re-run Factoring Orchestration ---
print("\n--- Re-running Factoring Orchestration ---")
factoring_results_filepath = os.path.join(ROOT_DIR, "pipeline", "data", "factoring_results.csv")

try:
    predicted_df_orchestration = pd.read_csv(predicted_features_filepath)
    print(f"Loaded predicted features for orchestration from {predicted_features_filepath}. Shape: {predicted_df_orchestration.shape}")
    
    factored_df_simulated = orchestration_module.orchestrate_factoring(predicted_df_orchestration.copy())
    factored_df_simulated.to_csv(factoring_results_filepath, index=False)
    print(f"Saved factoring results to {factoring_results_filepath}. Shape: {factored_df_simulated.shape}")

except FileNotFoundError:
    print(f"❌ Error: Missing file for orchestration. Ensure {predicted_features_filepath} exists.")
except Exception as e:
    print(f"❌ An error occurred during orchestration execution: {e}")

# --- Execute Report Generation ---
print("\n--- Executing Report Generation ---")

try:
    # Only factoring_results_filepath_report is needed now, as it contains all info
    factoring_results_filepath_report = os.path.join(ROOT_DIR, "pipeline", "data", "factoring_results.csv")
    output_dir_report = os.path.join(ROOT_DIR, "pipeline", "data", "outputs")

    if os.path.exists(factoring_results_filepath_report):
        report_file = reporter_module.generate_factoring_summary_report(
            factoring_results_filepath=factoring_results_filepath_report,
            output_dir=output_dir_report
        )
        if report_file:
            print(f"✅ Factoring summary report successfully generated: {report_file}")
    else:
        print(f"❌ Error: Required input file for report not found. Make sure all previous pipeline steps have run.")

except Exception as e:
    print(f"❌ An error occurred during report generation execution: {e}")


▶ Writing all pipeline source files with updated reporter.py...
  [+] Created: pipeline/factor/prime_hunter.py
  [+] Created: pipeline/make_corpus.py
  [+] Created: pipeline/extract_features.py
  [+] Created: pipeline/triage.py
  [+] Created: pipeline/predictive_triage.py
  [+] Created: pipeline/orchestration.py
  [+] Created: pipeline/report/reporter.py
  ✅ All pipeline source files created.

▶ Reloading pipeline modules to ensure changes are active...
  ✅ Reloaded pipeline.make_corpus
  ✅ Reloaded pipeline.factor.prime_hunter
  ✅ Reloaded pipeline.extract_features
  ✅ Reloaded pipeline.triage
  ✅ Reloaded pipeline.predictive_triage
  ✅ Reloaded pipeline.orchestration
  ✅ Reloaded pipeline.report.reporter
  ✅ All relevant modules reloaded.

--- Re-running Corpus Generation ---
Corpus saved to ./Prometheus_AIMO/pipeline/data/corpus.json

--- Re-running Feature Extraction ---
Features saved to ./Prometheus_AIMO/pipeline/data/features.csv

--- Re-running Triage ---
Triage results saved 