In [14]:
from pprint import pprint
from typing import TypedDict
import json

from dotenv import find_dotenv, load_dotenv
import google.generativeai as genai
import os

In [5]:
load_dotenv(find_dotenv())
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

In [7]:
system_instruction = """
You are a Computer Science PhD student. Your goal is to perform comparative studies in specific areas of Computer Science. You should be able to:
- Identify research problems and break them down into sub problems
- Conduct thorough literature review on your topic, summarize key findings and identify gaps in existing methodologies
- Formulate clear and testable hypotheses to address your research questions
- Develop experimental methodologies to test your hypotheses, considering factors such as data collection, analysis, and evaluation
- Collect, clean and analyze relevant data using appropriate tools and techniques
- Draw meaningful conclusions from your research findings and discuss their implications
- Prepare high-quality research papers that effectively communicate your findings
"""
model = genai.GenerativeModel(model_name="gemini-1.5-flash", system_instruction=system_instruction)

In [10]:
class Subproblem(TypedDict):
    prompt: str
    requires_internet: bool
    requires_previous_output: bool


topic = "TCP Congestion Control Algorithms"
prompt = f"""
Perform a review on the following topic: {topic}.

Instructions:
- Identify the key areas of focus within this topic and outline the subproblems that need to be addressed
- For each subproblem, create a concise prompt that states the task to be performed
- Indicate whether internet access is necessary to complete the subproblem
- Determine if the output of the previous subproblem is relevant to the subsequent subproblem
"""

gen_config = {
    "response_mime_type": "application/json",
    "response_schema": list[Subproblem]
}

response = model.generate_content(prompt, generation_config=gen_config)

[{"prompt": "Identify and categorize different TCP congestion control algorithms based on their underlying mechanisms and principles (e.g., slow start, congestion avoidance, fast retransmit, fast recovery).", "requires_internet": true, "requires_previous_output": false}, {"prompt": "Analyze the strengths and weaknesses of each algorithm in terms of network performance metrics such as throughput, delay, fairness, and robustness to network conditions.", "requires_internet": true, "requires_previous_output": true}, {"prompt": "Compare and contrast the performance of different algorithms under varying network conditions (e.g., bandwidth variations, packet losses, buffer sizes) using simulations or real-world experiments.", "requires_internet": true, "requires_previous_output": true}, {"prompt": "Investigate the impact of emerging technologies such as 5G and edge computing on TCP congestion control algorithms and identify potential challenges and opportunities.", "requires_internet": true, 

In [20]:
subproblems = json.loads(response.text)

internet_prompt = "Only generate a Semantic Scholar API call that assists in finding relevant papers on the above problem."
sub_prompt_template = """
{prompt}
{internet_prompt}

Extra Context:
{extra_context}
"""

outputs = [None] * len(subproblems)
for i, subproblem in enumerate(subproblems):
    pprint(subproblem)

    sub_prompt = sub_prompt_template.format(
        prompt=subproblem["prompt"],
        internet_prompt="" if not subproblem["requires_internet"] else internet_prompt,
        extra_context="" if not subproblem["requires_previous_output"] else outputs[i - 1],
    )
    outputs[i] = model.generate_content(sub_prompt).text
    


{'prompt': 'Identify and categorize different TCP congestion control '
           'algorithms based on their underlying mechanisms and principles '
           '(e.g., slow start, congestion avoidance, fast retransmit, fast '
           'recovery).',
 'requires_internet': True,
 'requires_previous_output': False}
{'prompt': 'Analyze the strengths and weaknesses of each algorithm in terms of '
           'network performance metrics such as throughput, delay, fairness, '
           'and robustness to network conditions.',
 'requires_internet': True,
 'requires_previous_output': True}
{'prompt': 'Compare and contrast the performance of different algorithms under '
           'varying network conditions (e.g., bandwidth variations, packet '
           'losses, buffer sizes) using simulations or real-world experiments.',
 'requires_internet': True,
 'requires_previous_output': True}
{'prompt': 'Investigate the impact of emerging technologies such as 5G and '
           'edge computing on TC

In [21]:
for output in outputs:
    print(output)

```json
{
  "query": "tcp congestion control algorithms",
  "fields": ["title", "abstract", "venue", "authors", "year", "citations"],
  "citationContext": true,
  "limit": 100,
  "include_paper_id": true,
  "sort": {
    "citations": "desc"
  },
  "filter": {
    "publication_date": {
      "from": 2010,
      "to": 2023
    }
  }
}
```

**Explanation:**

* **`query`:** This sets the main search term as "tcp congestion control algorithms". 
* **`fields`:** This specifies the data fields to be retrieved for each paper, including title, abstract, venue, authors, publication year, citation count, and the paper ID.
* **`citationContext`:** This flag enables retrieving the citation context, providing insights into how the paper is referenced in other works.
* **`limit`:** This limits the search results to the top 100 most cited papers.
* **`include_paper_id`:** This ensures the paper ID is included in the response, which is useful for further analysis.
* **`sort`:** This sorts the results b