In [15]:
import json
import ollama
import re

In [16]:
INPUT_FILE = 'output/agent2_ideas.json'
OUTPUT_FILE = 'output/agent5_co2.json'
AGENT_5 = 'mistral'

In [17]:
PROMPT_TEMPLATE = """
You are Agent 5: a Carbon Auditor for climate-change AI projects.

Project idea ID: {idea_id}

Based solely on this ID (which concisely represents the project title), estimate the annual CO2 emissions (in kilograms per year) associated with training and inference of this deep learning project.
Do not let the CO2 emissions be 0, as that would not be realistic for any deep learning project.
Also, please return the exactly same idea_id in the output, as it is used in downstream tasks.

RESPONSE FORMAT:
Return your response in this exact format (just these two lines):

IDEA_ID: {idea_id}
CO2_KG_PER_YEAR: [your estimated number]

Do not include any additional text, explanations, introductions, or conclusions. Your entire response should be exactly two lines that follow the format above.
"""

In [18]:
def load_ideas(path: str) -> list:
    """Load project ideas and extract idea_id."""
    with open(path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    return [item['idea_id'] for item in data]

In [19]:
def extract_carbon_audit(text: str) -> dict:
    """Extract the IDEA_ID and CO2_KG_PER_YEAR values from the LLM response."""
    # Clean up any markdown or extra whitespace
    text = text.replace('```', '').strip()

    # For debugging
    print(f"Raw response:\n{text}")

    # Initialize variables
    idea_id = None
    co2_kg = None

    # Process line by line
    for line in text.splitlines():
        line = line.strip()
        if not line:
            continue

        # Look for IDEA_ID line
        if line.startswith("IDEA_ID:"):
            idea_id = line[len("IDEA_ID:"):].strip()
            print(f"Found idea_id: {idea_id}")

        # Look for CO2_KG_PER_YEAR line
        elif line.startswith("CO2_KG_PER_YEAR:"):
            co2_str = line[len("CO2_KG_PER_YEAR:"):].strip()
            print(f"Found CO2 string: {co2_str}")
            try:
                # Convert to float and handle possible formatting issues
                co2_kg = float(co2_str.replace(',', ''))
                print(f"Parsed CO2 value: {co2_kg}")
            except ValueError:
                print(f"Warning: Could not parse CO2 value from '{co2_str}'")
                # Provide a default value if parsing fails
                co2_kg = 1000.0

    # If we didn't find properly formatted values, try regex as fallback
    if idea_id is None or co2_kg is None:
        print("Using regex fallback parsing")
        # Try to extract the idea_id if not found earlier
        if idea_id is None:
            id_match = re.search(r"(?:IDEA_ID|ID)[:_\s]+([^\n]+)", text)
            if id_match:
                idea_id = id_match.group(1).strip()
                print(f"Regex found idea_id: {idea_id}")

        # Try to extract CO2 value if not found earlier
        if co2_kg is None:
            # Look for a number followed by kg
            co2_match = re.search(r"(?:CO2_KG_PER_YEAR|emissions)[:_\s]+([0-9,.]+)", text)
            if co2_match:
                try:
                    co2_kg = float(co2_match.group(1).replace(',', ''))
                    print(f"Regex found CO2: {co2_kg}")
                except ValueError:
                    pass

            # If still not found, look for any number in the text
            if co2_kg is None:
                num_match = re.search(r"([0-9,.]+)", text)
                if num_match:
                    try:
                        co2_kg = float(num_match.group(1).replace(',', ''))
                        print(f"Found any number as CO2: {co2_kg}")
                    except ValueError:
                        pass

    # Final validation
    if idea_id is None:
        print("WARNING: No idea_id found, using default")
        idea_id = "unknown_id"

    if co2_kg is None or co2_kg == 0:
        print("WARNING: No valid CO2 value found, using default")
        co2_kg = 5000.0  # Substantial default to ensure non-zero

    return {
        "idea_id": idea_id,
        "co2_kg_per_year": co2_kg
    }

In [20]:
def assess_carbon(idea_id: str) -> dict:
    """Estimate CO2 emissions for a given idea_id."""
    prompt = PROMPT_TEMPLATE.format(idea_id=idea_id)

    print(f"\nProcessing idea: {idea_id}")

    try:
        # Try using chat API first
        response = ollama.chat(
            model=AGENT_5,
            messages=[
                {"role": "user", "content": prompt}
            ]
        )
        text = response['message']['content'] if 'message' in response else str(response)
    except Exception as e:
        print(f"Chat API error: {e}, falling back to generate")
        try:
            # Fallback to generate API
            response = ollama.generate(model=AGENT_5, prompt=prompt)
            text = response['response'] if 'response' in response else str(response)
        except Exception as e2:
            print(f"Generate API error: {e2}")
            text = f"Error calling LLM: {e2}"

    # Parse the response using our new non-JSON parser
    result = extract_carbon_audit(text)

    # Ensure we return the correct idea_id if it was parsed incorrectly
    if result['idea_id'] != idea_id:
        print(f"Warning: Parsed idea_id '{result['idea_id']}' doesn't match expected '{idea_id}'")
        result['idea_id'] = idea_id

    # Ensure CO2 is non-zero
    if result['co2_kg_per_year'] <= 0:
        print("Warning: CO2 value was zero or negative, setting to default")
        result['co2_kg_per_year'] = 5000.0

    return result

In [21]:
def run_agent5():
    """Run carbon auditing on all idea_ids and save results."""
    idea_ids = load_ideas(INPUT_FILE)
    print(f"Running agent5 on idea_ids: {idea_ids}")

    outputs = []
    for i in idea_ids:
        result = assess_carbon(i)
        outputs.append(result)
        print(f"Result for {i}: {result['co2_kg_per_year']} kg CO2/year")

    print(f"\nWriting results to {OUTPUT_FILE}")

    with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
        json.dump(outputs, f, ensure_ascii=False, indent=2)

    print("Carbon audit complete!")

run_agent5()

Running agent5 on idea_ids: ['8_arctic_anomaly_detection_with_computer_vision', '9_climate_resilience_prediction_using_gans', '10_sea_level_rise_prediction_with_deep_learning', '11_renewable_energy_output_forecasting', '12_climate_policy_optimization_with_reinforcement_lea', '13_sustainable_deep_learning_models_for_energy_effici', '14_climate_data_fusion_for_enhanced_predictions', '15_ai-driven_carbon_footprint_analysis', '16_geoengineering_impact_assessment_with_simulations', '17_climate_tipping_points_prediction_with_predictive_', '18_machine_learning_for_climate_adaptation_strategies', '19_environmental_predictive_modeling_with_transformer', '20_climate_justice_analysis_using_ai', '21_machine_learning-based_weather_forecasting_for_agr', '22_climate_model_uncertainty_quantification_with_baye', '23_climate_decision_support_systems_with_multi-agent_', '24_sustainable_development_optimization_with_ai', '25_ai_for_climate_governance_and_policy_implementatio', '26_climate_informatics_for_