In [14]:
import os
from dotenv import load_dotenv
import openai
load_dotenv()
from generate_ideas import post
import requests


API_KEY = os.getenv("OPENROUTER_API_KEY")
API_ENDPOINT = os.getenv("OPENROUTER_API_ENDPOINT")
research_directions = [
    "Artificial Intelligence in Healthcare",
    "Renewable Energy Technologies",
    "Quantum Computing Algorithms",
    "Biotechnology Innovations",
    # Add more directions as needed
]

def generate_ideas(research_direction, chat_history=None, num_ideas=20):
    prompt = (
        f"Provide {num_ideas} novel research approaches for the following research direction:\n"
        f"{research_direction}\n"
        f"List each idea numerically."
    )

    # Initialize chat_history if it's None
    if chat_history is None:
        chat_history = []

    # Append the prompt to the chat history
    chat_history.append({"role": "user", "content": prompt})

    response = requests.post(
        API_ENDPOINT,
        headers={
            "Content-Type": "application/json",
            "Authorization": f"Bearer {API_KEY}",
        },
        json={
            "model": "openai/o1-preview-2024-09-12",
            "messages": chat_history,
            "temperature": 0.7,
            "top_p": 0.99
        },
    )

    response_json = response.json()

    # Check for errors
    if 'error' in response_json:
        print(f"API Error: {response_json['error']['message']}")
        return []

    # Proceed if 'choices' key exists
    if 'choices' in response_json:
        ideas_text = response_json['choices'][0]['message']['content']
    else:
        print(f"Unexpected response structure: {response_json}")
        return []

    # Extract numbered ideas
    ideas = []
    for line in ideas_text.split('\n'):
        if line.strip():
            if line.lstrip()[0].isdigit() and '.' in line:
                idea = line.split('.', 1)[1].strip()
                ideas.append(idea)
    return ideas




In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def filter_similar_ideas(ideas, similarity_threshold=0.8):
    unique_ideas = []
    similarity_cache = {}

    for idea in ideas:
        is_unique = True
        for unique_idea in unique_ideas:
            # Create a sorted tuple to use as a key for caching
            pair = tuple(sorted([idea, unique_idea]))
            if pair in similarity_cache:
                similarity = similarity_cache[pair]
            else:
                similarity = get_similarity_score(idea, unique_idea)
                similarity_cache[pair] = similarity
            if similarity >= similarity_threshold:
                is_unique = False
                break
        if is_unique:
            unique_ideas.append(idea)
    return unique_ideas
def get_similarity_score(idea1, idea2):
    prompt = (
        f"On a scale from 0 to 1, where 0 means completely different and 1 means identical, "
        f"how similar are the following two research ideas?\n\n"
        f"Idea 1: {idea1}\n"
        f"Idea 2: {idea2}\n\n"
        f"Provide only the similarity score as a decimal number between 0 and 1."
    )
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are an AI assistant that compares research ideas for similarity."},
                {"role": "user", "content": prompt}
            ],
            temperature=0,
            max_tokens=10
        )
        similarity_score_text = response['choices'][0]['message']['content'].strip()
        similarity_score = float(similarity_score_text)
    except Exception as e:
        print(f"Error getting similarity score: {e}")
        similarity_score = 0.0  # Default to 0 if there's an error
    return similarity_score



In [16]:
def assess_idea(idea):
    prompt = (
        f"Assess the following research idea based on three criteria: novelty, feasibility, and interestingness.\n\n"
        f"Idea: {idea}\n\n"
        f"Provide a score for each criterion on a scale of 1 to 5, along with a brief justification."
    )

    response = openai.chat_completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are an expert in research assessment."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.5,
        max_tokens=200
    )

    assessment_text = response.choices[0].message.content
    scores = {}
    for line in assessment_text.split('\n'):
        if ':' in line:
            key, value = line.split(':', 1)
            key = key.strip().lower()
            try:
                score = float(value.strip().split()[0])
                scores[key] = score
            except ValueError:
                scores[key] = 0
    return scores

In [17]:
def select_top_k_ideas(ideas, assessments, top_k=5):
    # Example: Weighted sum (weights can be adjusted)
    weights = {'novelty': 0.4, 'feasibility': 0.3, 'interestingness': 0.3}
    scores = []
    for idea, assessment in zip(ideas, assessments):
        total_score = sum(assessment.get(k, 0) * w for k, w in weights.items())
        scores.append((idea, total_score))
    # Sort by total_score descending
    scores.sort(key=lambda x: x[1], reverse=True)
    top_ideas = [idea for idea, score in scores[:top_k]]
    return top_ideas


In [18]:
def save_ideas(research_direction, ideas):
    # Sanitize folder name
    folder_name = research_direction.replace(" ", "_").lower()
    os.makedirs(os.path.join(folder_name, "idea"), exist_ok=True)
    prompt_path = os.path.join(folder_name, "idea", "prompt.txt")
    with open(prompt_path, 'w', encoding='utf-8') as f:
        for idx, idea in enumerate(ideas, 1):
            f.write(f"{idx}. {idea}\n")


In [19]:
def main():
    top_k = 5  # Number of top ideas to select
    for direction in research_directions:
        print(f"Processing research direction: {direction}")
        # Step 1: Generate ideas
        generated_ideas = generate_ideas(direction,[], num_ideas=20)
        print(f"Generated {len(generated_ideas)} ideas.")
        
        # Step 2: Filter similar ideas
        unique_ideas = filter_similar_ideas(generated_ideas, similarity_threshold=0.8)
        print(f"{len(unique_ideas)} unique ideas after filtering.")
        
        # Step 3: Assess each idea
        assessments = []
        for idea in unique_ideas:
            assessment = assess_idea(idea)
            assessments.append(assessment)
        
        # Step 4: Select top K ideas
        top_ideas = select_top_k_ideas(unique_ideas, assessments, top_k=top_k)
        print(f"Selected top {top_k} ideas.")
        
        # Step 5: Save ideas
        save_ideas(direction, top_ideas)
        print(f"Saved top ideas to folder: {direction.replace(' ', '_').lower()}/idea/prompt.txt\n")

if __name__ == "__main__":
    main()


Processing research direction: Artificial Intelligence in Healthcare
Generated 20 ideas.
Error getting similarity score: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742

Error getting similarity score: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migra

AttributeError: module 'openai' has no attribute 'chat_completions'