In [1]:
import pandas as pd
import os
import json
import ollama
os.chdir('./output/3IdeaPasses/')

In [32]:
df = pd.read_csv('utility_scores.csv')
df.head()

Unnamed: 0,title,description,data_needs,idea_id,feasibility_score,feasibility_notes,co2_kg_per_year,impact_score,summary,risk_level,mitigation_suggestions,utility_score
0,Arctic Anomaly Detection with Computer Vision,A deep learning system that detects and classi...,"['Satellite imagery', 'Arctic climate data', '...",8_arctic_anomaly_detection_with_computer_vision,0.65,Satellite imagery and climate data are partial...,1047500,0.6,should explain that the project's main impact ...,medium,['**Implement region-specific bias audits** by...,12.025
1,Climate Resilience Prediction using GANs,Generative adversarial networks (GANs) simulat...,"['Historical climate data', 'urban/agricultura...",9_climate_resilience_prediction_using_gans,0.7,Data availability is mixed with public climate...,10000,0.7,needs to explain these. The project's main ben...,high,['Implement a data diversity audit to ensure t...,15.9
2,Sea Level Rise Prediction with Deep Learning,A deep learning model that forecasts regional ...,"['Ocean current measurements', 'satellite alti...",10_sea_level_rise_prediction_with_deep_learning,0.7,Public datasets for ocean and satellite data r...,3500000,0.65,The model's predictive capabilities are essent...,medium,['**Implement geographically stratified data s...,-10.5
3,Renewable Energy Output Forecasting,Machine learning algorithms predict solar and ...,"['Weather forecasts', 'satellite imagery', 'en...",11_renewable_energy_output_forecasting,0.75,Public weather and satellite datasets are wide...,3000000,0.85,The project significantly reduces emissions by...,high,['**Implement geospatial bias audits** by trai...,-11.0
4,Climate Policy Optimization with Reinforcement...,Reinforcement learning models simulate and rec...,"['Climate policy datasets', 'economic indicato...",12_climate_policy_optimization_with_reinforcem...,0.65,Data integration challenges exist due to diver...,50000,0.75,The project has a high potential to reduce emi...,high,['**Incorporate fairness-aware reward function...,14.5


### Borda Count

In [33]:
# Rank idea_id by utility_score in descending order from df
ranked_ideas_df = df.sort_values(by='utility_score', ascending=False)
ranked_ideas_df = ranked_ideas_df.reset_index(drop=True)
ranked_ideas_df.columns


Index(['title', 'description', 'data_needs', 'idea_id', 'feasibility_score',
       'feasibility_notes', 'co2_kg_per_year', 'impact_score', 'summary',
       'risk_level', 'mitigation_suggestions', 'utility_score'],
      dtype='object')

### Check if Top Idea meets the criteria

In [34]:
print("--- Top 5 Ranked Ideas (from DataFrame) ---")
if ranked_ideas_df.empty:
    print("No ranked ideas to display.")
else:
    for index, row in ranked_ideas_df.head(5).iterrows():
        print(f"{index + 1}. ID: {row['idea_id']}, Utility: {row['utility_score']:.3f}, Feasibility: {row['feasibility_score']}, Risk: {row['risk_level']}")

def check_thresholds_df(idea_row):
    """Checks if a single idea (DataFrame row) meets the defined thresholds."""
    risk_acceptable = idea_row['risk_level'].lower() in ['low', 'medium']
    feasibility_ok = idea_row['feasibility_score'] >= 0.6
    utility_ok = idea_row['utility_score'] >= 0.5 # This should almost always be true if utility score is high enough to be a top idea

    meets_all = risk_acceptable and feasibility_ok and utility_ok

    feedback_notes = []
    if not risk_acceptable:
        feedback_notes.append(f"Risk '{idea_row['risk_level']}' is too high (threshold: low or medium).")
    if not feasibility_ok:
        feedback_notes.append(f"Feasibility {idea_row['feasibility_score']:.2f} is too low (threshold: >= 0.6).")
    if not utility_ok:
        feedback_notes.append(f"Utility {idea_row['utility_score']:.2f} is too low (threshold: >= 0.5).")

    return meets_all, feedback_notes

N_TOP_IDEAS_TO_CHECK = 3
all_top_ideas_meet_thresholds = True
feedback_for_agent1 = [] # Stores detailed feedback strings

print(f"\n--- Checking Thresholds for Top {N_TOP_IDEAS_TO_CHECK} Ideas ---")

if ranked_ideas_df.empty:
    print("No ideas to check.")
    all_top_ideas_meet_thresholds = False # No ideas means thresholds are not met by any top idea
else:
    # Ensure we don't try to check more ideas than available
    num_ideas_to_actually_check = min(N_TOP_IDEAS_TO_CHECK, len(ranked_ideas_df))
    if num_ideas_to_actually_check == 0:
        all_top_ideas_meet_thresholds = False # Still no ideas to check

    for index, idea_row in ranked_ideas_df.head(num_ideas_to_actually_check).iterrows():
        meets, notes = check_thresholds_df(idea_row)
        print(f"Idea: {idea_row['idea_id']} (Title: {idea_row['title']})")
        print(f"  Utility: {idea_row['utility_score']:.3f}, Feasibility: {idea_row['feasibility_score']}, Risk: {idea_row['risk_level']}")
        if meets:
            #print in green
            print(f"\033[92m  Status: Meets all thresholds.\033[0m")
            # print(f"  Status: Meets all thresholds.")
        else:
            print(f"\033[91m  Status: Does not meet all thresholds.\033[0m")
            for note in notes:
                print(f"    - {note}")
            all_top_ideas_meet_thresholds = False
            # Create a feedback dictionary for more structured feedback
            feedback_detail = {
                'idea_id': idea_row['idea_id'],
                'title': idea_row['title'],
                'failed_thresholds': notes,
                'current_feasibility': idea_row['feasibility_score'],
                'current_risk': idea_row['risk_level'],
                'current_utility': idea_row['utility_score']
            }
            feedback_for_agent1.append(feedback_detail)

if ranked_ideas_df.empty and N_TOP_IDEAS_TO_CHECK > 0 : # Special case if the df was empty to begin with
     all_top_ideas_meet_thresholds = False

if all_top_ideas_meet_thresholds and not ranked_ideas_df.empty and num_ideas_to_actually_check >0:
    print(f"\nAll top {num_ideas_to_actually_check} checked ideas meet the defined thresholds. Proceeding with these ideas.")
elif not ranked_ideas_df.empty and num_ideas_to_actually_check >0:
    print(f"\nNot all top {num_ideas_to_actually_check} checked ideas meet the thresholds. Feedback loop may be needed.")
else: # handles case where ranked_ideas_df was empty or num_ideas_to_actually_check was 0
    print(f"\nNo ideas were available or checked. Feedback loop likely needed.")



--- Top 5 Ranked Ideas (from DataFrame) ---
1. ID: 13_sustainable_deep_learning_models_for_energy_effici, Utility: 29.633, Feasibility: 0.85, Risk: medium
2. ID: 21_machine_learning-based_weather_forecasting_for_agr, Utility: 23.333, Feasibility: 0.75, Risk: medium
3. ID: 22_climate_model_uncertainty_quantification_with_baye, Utility: 23.233, Feasibility: 0.65, Risk: medium
4. ID: 19_environmental_predictive_modeling_with_transformer, Utility: 21.333, Feasibility: 0.7000000000000001, Risk: medium
5. ID: 27_climate_scenario_planning_with_generative_models, Utility: 16.683, Feasibility: 0.75, Risk: high

--- Checking Thresholds for Top 3 Ideas ---
Idea: 13_sustainable_deep_learning_models_for_energy_effici (Title: Sustainable Deep Learning Models for Energy Efficiency)
  Utility: 29.633, Feasibility: 0.85, Risk: medium
[92m  Status: Meets all thresholds.[0m
Idea: 21_machine_learning-based_weather_forecasting_for_agr (Title: Machine Learning-Based Weather Forecasting for Agriculture)
  

### Going back to Agent 1

In [35]:

# --- Task 3: If needed, sends feedback and loops up to three rounds. ---
# The logic for this part remains conceptually the same.
# The 'feedback_for_agent1' now contains dictionaries with more structured info.

MAX_ITERATION_ROUNDS = 3
current_round = 1 # Manage this state variable appropriately in your larger system

# This is a simulation of the decision point for looping
iteration_needed = not all_top_ideas_meet_thresholds

if iteration_needed and current_round <= MAX_ITERATION_ROUNDS:
    print(f"\n--- Iteration Round {current_round}: Feedback Loop Activated ---")
    print("Structured Feedback to Agent 1 (Paper Retriever):")
    if feedback_for_agent1:
        for feedback_item in feedback_for_agent1:
            print(f"  Idea ID: {feedback_item['idea_id']} (Title: {feedback_item['title']})")
            print(f"    Failed on: {', '.join(feedback_item['failed_thresholds'])}")
            print(f"    Current Metrics: Feasibility={feedback_item['current_feasibility']}, Risk='{feedback_item['current_risk']}', Utility={feedback_item['current_utility']:.3f}")
            # Based on feedback_item['failed_thresholds'], construct more specific instructions for Agent 1
            # Example: If "Risk too high", instruct Agent 1 to find "low-risk alternatives for {title}"
            #          If "Feasibility too low", instruct Agent 1 to find "more mature technologies for {title}"
        print("\nInstructing Agent 1 to refine keyword search based on detailed feedback and re-initiate the pipeline.")
        # current_round += 1 # You would increment this in the actual loop
    else:
        print("- General feedback: Top ideas did not meet thresholds, or no ideas were available to check. Consider broader or more constrained keyword search by Agent 1.")

elif not iteration_needed and not ranked_ideas_df.empty and num_ideas_to_actually_check >0: # Corrected condition
    print("\n--- Iteration Complete: Suitable top ideas identified. ---")
    # Proceed to detailed planning for pilot studies with the top N ideas that met thresholds.
    # You might want to filter ranked_ideas_df for those that specifically passed.
    
    # Example: Get the actual top N ideas that passed all checks
    passed_ideas_list = []
    for index, idea_row in ranked_ideas_df.head(num_ideas_to_actually_check).iterrows():
        meets, _ = check_thresholds_df(idea_row)
        if meets:
            passed_ideas_list.append(idea_row['idea_id'])
    if passed_ideas_list:
        print(f"Ideas verified and passed thresholds: {', '.join(passed_ideas_list)}")
    else:
        print("No ideas among the top checked met all thresholds, even if 'iteration_needed' was false due to other logic.")


elif current_round > MAX_ITERATION_ROUNDS:
    print(f"\n--- Max Iteration Rounds ({MAX_ITERATION_ROUNDS}) Reached ---")
    print("Unable to find top ideas meeting all thresholds after maximum iterations.")
    print("Review current top-ranked ideas and decide on next steps (e.g., relax thresholds, manual review, select best available).")
else: # Catch-all for other states, e.g. no ideas at all from the start
    print("\n--- Process Complete: Review state ---")
    if ranked_ideas_df.empty:
        print("No ideas were generated or processed.")
    else:
        print("Current top ideas (even if not meeting all thresholds):")
        for index, row in ranked_ideas_df.head(N_TOP_IDEAS_TO_CHECK).iterrows():
             print(f"  ID: {row['idea_id']}, Utility: {row['utility_score']:.3f}")


--- Iteration Complete: Suitable top ideas identified. ---
Ideas verified and passed thresholds: 13_sustainable_deep_learning_models_for_energy_effici, 21_machine_learning-based_weather_forecasting_for_agr, 22_climate_model_uncertainty_quantification_with_baye


# Get the final detailed Report

In [37]:
ranked_ideas_df[:3]

Unnamed: 0,title,description,data_needs,idea_id,feasibility_score,feasibility_notes,co2_kg_per_year,impact_score,summary,risk_level,mitigation_suggestions,utility_score
0,Sustainable Deep Learning Models for Energy Ef...,Designs energy-efficient deep learning archite...,"['Model performance metrics', 'energy consumpt...",13_sustainable_deep_learning_models_for_energy...,0.85,Utilizes established lightweight model techniq...,20000,0.733333,would mention that the project's focus on redu...,medium,['Implement a **geographically diverse dataset...,29.633333
1,Machine Learning-Based Weather Forecasting for...,Deep learning models predict hyper-local weath...,"['Weather station data', 'soil moisture measur...",21_machine_learning-based_weather_forecasting_...,0.75,Data integration challenges exist for soil moi...,350000,0.733333,This project reduces emissions indirectly by o...,medium,['**Implement geospatially balanced data colle...,23.333333
2,Climate Model Uncertainty Quantification with ...,Bayesian deep learning techniques quantify unc...,"['Climate model outputs', 'parameter sensitivi...",22_climate_model_uncertainty_quantification_wi...,0.65,Climate model data is accessible but large-sca...,10000,0.683333,This project directly improves the reliability...,medium,['**Implement model-agnostic uncertainty visua...,23.233333


In [58]:
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
from reportlab.lib.styles import getSampleStyleSheet
from pathlib import Path
import re

In [44]:
df_report = df[:3]

In [45]:
# ─── 2) Load the full details JSON ────────────────────────────────────────────
with open("agent2_ideas.json", "r") as f:
    full_list = json.load(f)

# Build a lookup dict by idea_id
full_lookup = {item["idea_id"]: item for item in full_list}


In [47]:

# ─── 3) Merge on idea_id ──────────────────────────────────────────────────────
# Keep only those rows in df for which we have full detail
df_report = df_report[df_report["idea_id"].isin(full_lookup)]
df_report = df_report.reset_index(drop=True)


In [48]:
df_report

Unnamed: 0,title,description,data_needs,idea_id,feasibility_score,feasibility_notes,co2_kg_per_year,impact_score,summary,risk_level,mitigation_suggestions,utility_score
0,Arctic Anomaly Detection with Computer Vision,A deep learning system that detects and classi...,"['Satellite imagery', 'Arctic climate data', '...",8_arctic_anomaly_detection_with_computer_vision,0.65,Satellite imagery and climate data are partial...,1047500,0.6,should explain that the project's main impact ...,medium,['**Implement region-specific bias audits** by...,12.025
1,Climate Resilience Prediction using GANs,Generative adversarial networks (GANs) simulat...,"['Historical climate data', 'urban/agricultura...",9_climate_resilience_prediction_using_gans,0.7,Data availability is mixed with public climate...,10000,0.7,needs to explain these. The project's main ben...,high,['Implement a data diversity audit to ensure t...,15.9
2,Sea Level Rise Prediction with Deep Learning,A deep learning model that forecasts regional ...,"['Ocean current measurements', 'satellite alti...",10_sea_level_rise_prediction_with_deep_learning,0.7,Public datasets for ocean and satellite data r...,3500000,0.65,The model's predictive capabilities are essent...,medium,['**Implement geographically stratified data s...,-10.5


In [56]:
REPORT_PATH = "../report"
MODEL_NAME = "qwen3:32b"
styles     = getSampleStyleSheet()

def query_ollama_model(model_name: str, prompt_text: str) -> str:
    """
    Sends prompt_text to ollama.chat(...) and returns the model's content string.
    """
    try:
        resp = ollama.chat(
            model=model_name,
            messages=[{"role": "user", "content": prompt_text}]
        )
        return resp["message"]["content"]
    except Exception as e:
        raise RuntimeError(f"Ollama chat failed: {e}")

In [60]:
for _, row in df_report.iterrows():
    idea_id    = row["idea_id"]
    info       = full_lookup[idea_id]
    title      = info["title"]
    summary    = row["description"]
    data_needs = row["data_needs"]
    full_desc  = info["description"]

    # 1) Build the prompt
    prompt = f"""
You are writing a polished, academic‐style report section for a project idea on climate change.

Title: {title}
Summary: {summary}
Data Needs: {', '.join(data_needs)}
Full Description: {full_desc}

Please produce, numbered 1)–4):
1) A 2–3 paragraph Overview.
2) A detailed Data section (sources, formats, access).
3) A Feasibility discussion.
4) Suggested Next Steps and Extensions.

Return the answer in Markdown.
"""

    # 2) Query Ollama
    raw_md = query_ollama_model(MODEL_NAME, prompt)

    # 3) Extract the fenced-in Markdown if present
    m = re.search(r'```(?:markdown)?\s*\n(?P<body>.*?)```', raw_md, flags=re.DOTALL)
    md_body = m.group('body') if m else raw_md

    # 4) Convert **bold** to ReportLab <b>…</b>
    md_body = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', md_body)

    # 5) Prepare PDF
    out_pdf = Path(f"{REPORT_PATH}/{idea_id}.pdf")
    doc     = SimpleDocTemplate(str(out_pdf))
    flow    = []

    # 6) Add a custom title
    flow.append(Paragraph(f"{title} Project Proposal", styles["Title"]))
    flow.append(Spacer(1, 12))

    # 7) Render each line, mapping Markdown headings to PDF headings
    for raw in md_body.splitlines():
        line = raw.rstrip()
        if not line:
            flow.append(Spacer(1, 6))
            continue

        # detect "#", "##", etc.
        h = re.match(r'^(?P<hashes>#{1,6})\s+(?P<text>.+)', line)
        if h:
            level = len(h.group("hashes"))
            text  = h.group("text").strip()
            if   level == 1: style = styles["Heading1"]
            elif level == 2: style = styles["Heading2"]
            else:             style = styles.get(f"Heading{level}", styles["Heading3"])
            flow.append(Paragraph(text, style))
        else:
            flow.append(Paragraph(line, styles["BodyText"]))

    # 8) Build the PDF
    doc.build(flow)
    print(f"✔ Wrote report: {out_pdf.resolve()}")

✔ Wrote report: /home/xiang_fang/adv_deep_learning_final/BEARS/src/output/report/8_arctic_anomaly_detection_with_computer_vision.pdf
✔ Wrote report: /home/xiang_fang/adv_deep_learning_final/BEARS/src/output/report/9_climate_resilience_prediction_using_gans.pdf
✔ Wrote report: /home/xiang_fang/adv_deep_learning_final/BEARS/src/output/report/10_sea_level_rise_prediction_with_deep_learning.pdf
