In [7]:
import json
import ollama

# Configuration
UNIQUE_PAPER = 'output/unique_paper.json'
KEYWORDS = 'output/keywords.json'

OUTPUT_FILE = f'output/agent2_ideas.json'

AGENT_2 = 'qwen3:32b'

In [8]:
def load_keywords(path):
    with open(path, 'r', encoding='utf-8') as f:
        return json.load(f)

In [9]:
def generate_ideas(keywords: list, paper: dict, model: str) -> str:
    prompt = f"""
    You are Agent 2 (Idea Generator) in a climate-change deep learning pipeline.

    Given the following keywords:
    {keywords}

    Given the following papers:
    {paper}

    Propose 15-20 candidate deep learning project ideas to combat climate change.

    RESPONSE FORMAT:
    Return your response as a simple formatted list with TITLE and DESCRIPTION for each idea, separated by blank lines. For example:

    TITLE: Climate Tipping Point Prediction System
    DESCRIPTION: A deep learning system that identifies potential climate tipping points by analyzing historical climate data and current trends. Uses recurrent neural networks to process time series data from multiple sources to predict non-linear climate transitions.
    DATA NEEDS: Historical climate records, current sensor data, satellite imagery

    TITLE: Carbon Sequestration Optimization Network
    DESCRIPTION: An ML algorithm that determines optimal locations and methods for carbon sequestration based on geographical and atmospheric conditions. Combines computer vision analysis of terrain with climate models to maximize carbon capture efficiency.
    DATA NEEDS: Historical climate records, current sensor data, satellite imagery

    Do not include any additional text, explanations, introductions, or conclusions. Start directly with the first TITLE and end with the last DESCRIPTION. Ensure each project idea is separated by exactly one blank line from the next idea.

    Now, generate 15-20 creative and technically feasible project ideas based on the provided keywords and papers.
    """
    try:
        response = ollama.chat(model=model, messages=[
            {"role": "user", "content": prompt}
        ])
        return response['message']['content']
    except Exception as e:
        print(f"Error calling Ollama model: {e}")
        try:
            response = ollama.generate(model=model, prompt=prompt)
            return response['response']
        except Exception as e2:
            print(f"Error in fallback generate: {e2}")
            return str(e2)

In [10]:
def parse_ideas(output_str: str) -> list:
    # Clean up fenced code blocks
    cleaned = []
    lines = output_str.splitlines()
    in_fence = False
    for line in lines:
        if line.strip().startswith('```'):
            in_fence = not in_fence
            continue
        if not in_fence:
            cleaned.append(line)
    text = '\n'.join(cleaned)

    # Save the cleaned text for debugging
    with open('output/debug/task2_text.txt', 'w') as f:
        f.write(text)

    # Split the text by double newlines to get each project idea block
    idea_blocks = [block.strip() for block in text.split('\n\n') if block.strip()]

    ideas = []
    for idx, block in enumerate(idea_blocks, start=1):
        # Initialize idea dictionary
        idea = {'title': '', 'description': '', 'data_needs': []}

        # Split the block into lines
        block_lines = block.splitlines()

        # Track which section we're currently processing
        current_section = None

        for line in block_lines:
            line = line.strip()
            if not line:
                continue

            # Detect section headers
            if line.startswith('TITLE:'):
                current_section = 'title'
                idea['title'] = line[6:].strip()  # Remove 'TITLE:' and whitespace
            elif line.startswith('DESCRIPTION:'):
                current_section = 'description'
                idea['description'] = line[12:].strip()  # Remove 'DESCRIPTION:' and whitespace
            elif line.startswith('DATA NEEDS:'):
                current_section = 'data_needs'
                # Split by commas and strip whitespace
                data_items = [item.strip() for item in line[11:].split(',')]
                idea['data_needs'] = [item for item in data_items if item]  # Filter out empty items
            elif current_section:
                # Continue adding content to the current section
                if current_section == 'title':
                    idea['title'] += ' ' + line
                elif current_section == 'description':
                    idea['description'] += ' ' + line
                elif current_section == 'data_needs':
                    # Handle multi-line data needs or add to existing data needs
                    data_items = [item.strip() for item in line.split(',')]
                    idea['data_needs'].extend([item for item in data_items if item])

        # Create a safe ID from the title
        safe_title = idea.get('title', '').lower().replace(' ', '_')[:50]
        idea['idea_id'] = f"{idx}_{safe_title}"

        ideas.append(idea)

    return ideas

In [11]:
def main():
    keyword_lists = load_keywords(KEYWORDS).get('keywords')
    unique_papers = load_keywords(UNIQUE_PAPER)
    all_ideas = []

    print("Generating ideas...")
    llm_output = generate_ideas(keyword_lists, unique_papers, AGENT_2)

    # Write raw output for debugging
    with open('output/agent2_raw_llm_output.txt', 'w', encoding='utf-8') as f:
        f.write(llm_output)

    print("Parsing ideas...")
    ideas = parse_ideas(llm_output)
    all_ideas.extend([i for i in ideas if i['title']])

    print(f"Found {len(all_ideas)} ideas")

    # Display first few ideas for verification
    if all_ideas:
        print("First idea example:")
        print(f"Title: {all_ideas[0]['title']}")
        print(f"Description: {all_ideas[0]['description']}")
        print(f"Data needs: {all_ideas[0]['data_needs']}")

    # Write results to JSON
    with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
        json.dump(all_ideas, f, ensure_ascii=False, indent=2)

    print(f"Ideas saved to {OUTPUT_FILE}")

if __name__ == "__main__":
    main()

Generating ideas...
Parsing ideas...
Found 20 ideas
First idea example:
Title: Arctic Anomaly Detection with Computer Vision
Description: A deep learning system that detects and classifies Arctic climate anomalies (e.g., ice melt, temperature spikes) using satellite imagery and computer vision techniques. Leverages CNNs to identify patterns in multi-spectral data for real-time anomaly monitoring.
Data needs: ['Satellite imagery', 'Arctic climate data', 'historical ice coverage records']
Ideas saved to output/agent2_ideas.json
