In [24]:
# Clear existing environment variables
import os
os.environ.pop('TOGETHER_API_KEY', None)  # Remove existing key

# Force reload from .env file
from dotenv import load_dotenv
load_dotenv(override=True)  # override=True forces reload

# Reinitialize Together client
import together
together.api_key = os.getenv('TOGETHER_API_KEY')

# Verify the new key is loaded
print(f"New API key loaded: {'*' * (len(os.getenv('TOGETHER_API_KEY', '')) - 4) + (os.getenv('TOGETHER_API_KEY', '')[-4:] if os.getenv('TOGETHER_API_KEY') else 'None')}")

New API key loaded: ************************************************************bf39


In [1]:
import together
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Set your Together AI API key
together.api_key = os.getenv('TOGETHER_API_KEY')

In [3]:
# import llama helper function
from model_utils import llama

# define the prompt
prompt = "Write a haiku about AI in 13 words."

# pass prompt to the llama function, store output as 'response' then print
response = llama(prompt)
print(response)

Artificial mind
Intelligence born from code
Future's silent rise


In [5]:
from pdf_util import extract_text_from_pdf, count_tokens, extract_sections

MAX_TOKENS = 10000  # Llama-3.3-70B-Instruct context window size
RESERVE_TOKENS = 500  # Reserve some tokens for the response

extracted_text = extract_text_from_pdf('literature/human_enhancement_genetic_engineering.pdf')

token_count = count_tokens(extracted_text)
print(f"Number of tokens in the PDF: {token_count}")

sections = extract_sections(extracted_text)
print(f"Found {len(sections)} sections.")

for section_title, section_content in sections.items():
    token_count = count_tokens(section_content)
    print(f"\nProcessing section '{section_title}' with {token_count} tokens")
    
    # # If this section is too long, truncate it to make sure the prompt fits within the safe token limit.
    # if token_count > (MAX_TOKENS - RESERVE_TOKENS):
    #     print(f"Warning: Section '{section_title}' exceeds the token limit. Truncating content.")
    #     section_content = section_content[:(MAX_TOKENS - RESERVE_TOKENS)]
    
    # # Build the prompt to include the section title and content
    # prompt = (
    #     f"Section Title: {section_title}\n\n"
    #     f"Section Text: {section_content}\n\n"
    #     "Please provide a concise summary for the above section in exactly 5 words."
    # )
    
    # # Call the language model function to get the summary of the section
    # summary = llama(prompt)
    # print(f"Summary for section '{section_title}': {summary}")

# if token_count > (MAX_TOKENS - RESERVE_TOKENS):
#     print(f"Warning: Input text is too long ({token_count} tokens).")
#     print(f"Maximum safe input length is {MAX_TOKENS - RESERVE_TOKENS} tokens (reserving {RESERVE_TOKENS} tokens for response)")
#     # Optionally, you could truncate the text here
# else:
#     print(f"Text length is within limits: {token_count} tokens")
#     prompt = f"given {extracted_text}, write a summary of the document in 5 words"
#     response = llama(prompt)
#     print(response)



Number of tokens in the PDF: 8430
Found 3 sections.

Processing section 'Abstract' with 273 tokens

Processing section 'Introduction' with 6901 tokens

Processing section 'Methods' with 998 tokens


In [8]:
from pdf_util import count_tokens

# Extract just Abstract and Methods sections and combine them
abstract_text = sections.get('Abstract', '')
methods_text = sections.get('Methods', '')
combined_text = f"Abstract:\n{abstract_text}\n\nMethods:\n{methods_text}"

token_count = count_tokens(combined_text)
print(f"Number of tokens in the PDF: {token_count}")

Number of tokens in the PDF: 1276


In [9]:
# Few Shot Prompt Experiment

prompt = (
    f"Below are a few example evaluations of research papers in the field of genetic engineering:\n\n"
    
    f"Example 1:\n"
    f"Combined Abstract and Methods:\n"
    f"\"Abstract: Recent advances in CRISPR/Cas9 have enabled precise modifications in crop genomes to enhance drought tolerance. "
    f"Methods: The study used CRISPR vectors to introduce targeted mutations in drought-resistance genes of maize.\"\n"
    f"Evaluation: The research examines how targeted gene editing can improve crop resilience to environmental stress.\n"
    f"Novel hypothesis: Optimize CRISPR protocols for enhanced drought resilience in maize.\n\n"
    
    f"Example 2:\n"
    f"Combined Abstract and Methods:\n"
    f"\"Abstract: A novel gene therapy approach was explored to combat muscular dystrophy by correcting mutant genes. "
    f"Methods: Viral vectors were employed to deliver corrected gene sequences to the affected muscle tissues.\"\n"
    f"Evaluation: The study tests the feasibility of gene therapy in alleviating symptoms of genetic muscle disorders.\n"
    f"Novel hypothesis: Enhance the specificity of viral vector delivery in gene therapy for muscular dystrophy.\n\n"
    
    f"Now, here is the combined Abstract and Methods section of a research paper:\n\n"
    f"{combined_text}\n\n"
    "Evaluate the research hypothesis and methods and generate a novel hypothesis for future experimentation."
)

fewshot_response = llama(prompt)
print(fewshot_response)

Evaluation:
The research paper is a commentary that explores the implications of genetic engineering on human biology, evolution, and the natural environment. The authors highlight the importance of considering the multiple effects of genetic interventions, complex epigenetic interactions, and the need to distinguish between 'therapy' and 'enhancement' in future regulations. The paper is more of a theoretical and philosophical discussion rather than an experimental study.

Methods:
The paper does not present any experimental methods or data. Instead, it provides a comprehensive review of existing literature on genetic engineering, evolutionary biology, and systems biology. The authors cite numerous references to support their arguments and provide a framework for understanding the complexities of genetic engineering.

Novel Hypothesis:
A potential novel hypothesis for future experimentation could be:

"Develop a systems biology approach to investigate the long-term effects of genetic e

In [10]:
# Role Prompt Experiment
role_prompt = (
    "You are an expert research scientist specializing in genetic engineering with extensive experience "
    "in analyzing experimental designs and research methodologies. "
    "Your task is twofold:\n\n"
    
    "1. Evaluate the following combined Abstract and Methods section of a research paper by highlighting the key strengths and weaknesses in the existing hypothesis and experimental design.\n\n"
    
    "2. Based on your evaluation, propose a novel hypothesis for future experimentation that addresses any gaps or builds upon the findings.\n\n"
    
    "Combined Abstract and Methods section:\n"
    f"{combined_text}\n\n"
    
    "Please provide a concise evaluation followed by your novel hypothesis."
)

role_response = llama(role_prompt)
print(role_response)

**Evaluation:**

The combined Abstract and Methods section of the research paper highlights the importance of considering the evolutionary context of genetic enhancement and its implications for human biology, human evolution, and the natural environment. The authors emphasize the need to acknowledge multiple effects (pleiotropy) and complex epigenetic interactions among genotype, phenotype, and ecology. They also propose a practicable distinction between 'therapy' and 'enhancement' for future regulations.

Strengths:

1. The paper emphasizes the importance of considering the evolutionary context of genetic enhancement, which is often overlooked in discussions of genetic engineering.
2. The authors highlight the need to consider multiple effects (pleiotropy) and complex epigenetic interactions, which is a crucial aspect of understanding the implications of genetic engineering.
3. The paper proposes a practicable distinction between 'therapy' and 'enhancement', which is a necessary step

In [11]:
# Chain of Thought Prompt Experiment
cot_prompt = (
    "You are an expert research scientist specializing in genetic engineering with extensive experience in evaluating research papers. "
    "When analyzing the following combined Abstract and Methods section, list your thoughts, using your internal chain-of-thought, to consider the strengths and weaknesses of the hypothesis and experimental design, as well as any gaps or opportunities for future research. "
    "Then, provide only your final, concise evaluation along with a novel hypothesis for future experimentation. "
    "Combined Abstract and Methods section:\n\n"
    f"{combined_text}\n\n"
    "Please provide your final evaluation and novel hypothesis."
)

cot_response = llama(cot_prompt)
print(cot_response)


Internal chain-of-thought:

* The abstract seems to be more of a commentary on the ethics and implications of genetic engineering rather than a traditional research paper. It highlights the importance of considering the evolutionary context and multiple effects of genetic interventions.
* The methods section appears to be a list of references rather than a description of the research methodology used. This is unusual and makes it difficult to evaluate the experimental design.
* The authors seem to be emphasizing the need for a systems biology approach to understanding the impact of genetic engineering on human populations and the environment.
* They also propose that a distinction between "therapy" and "enhancement" is needed, which is a crucial point for future regulations.
* The references cited are from a wide range of fields, including evolutionary biology, developmental biology, and philosophy, which suggests that the authors are taking a multidisciplinary approach to the topic.
*

In [6]:
# Evaluator LLM for Prompt Engineering Experiment

evaluator_prompt = (
    "You are a scientific evaluator with expertise in genetic engineering. "
    "Your task is to assess the research evaluation outputs produced by three different prompting approaches: Few Shot, Role Based, and Chain of Thought. "
    "Each of these outputs includes an assessment of the research paper's hypothesis and a proposal for a novel hypothesis for future experimentation.\n\n"
    
    "Please evaluate each of the provided outputs based on the following criteria:\n"
    "1. Novelty and ingenuity: How original and innovative is the proposed hypothesis given the combined Abstract and Methods section?\n"
    "2. Experimental design feasibility: How feasible is the experimental design suggested for testing the hypothesis?\n"
    "3. Impact potential: What is the potential impact of this research on the field of genetic engineering?\n"
    "4. Cost and materials feasibility: Evaluate whether the required materials and overall cost make the experimental design practical.\n\n"
    
    "Below is the Combined Abstract and Methods section for context:\n\n"
    f"{combined_text}\n\n"
    
    "Now, please review and evaluate the following outputs:\n\n"
    
    "----- Few Shot Output -----\n"
    f"{fewshot_response}\n\n"
    
    "----- Role Based Output -----\n"
    f"{role_response}\n\n"
    
    "----- Chain of Thought Output -----\n"
    f"{cot_response}\n\n"
    
    "For each output, provide a detailed evaluator report that includes numerical ratings on a scale of 1 to 5 along with concise justifications for each of the four criteria. "
    "Your response should clearly indicate your evaluation for each of the three outputs."
)

evaluation_report = llama(evaluator_prompt)
print(evaluation_report)