In [None]:
# Step 1: Install Required Packages
!pip install langgraph langchain openai langchain-anthropic boto3 langchain_community




In [None]:
from google.colab import userdata
import os
from langchain_anthropic import ChatAnthropic
from langchain.chat_models.base import BaseChatModel


import boto3
import json
access_key=userdata.get('aws_access_key_id')
secret_key=userdata.get('aws_secret_access_key')
os.environ["AWS_ACCESS_KEY_ID"] = access_key
os.environ["AWS_SECRET_ACCESS_KEY"] =secret_key

os.environ["AWS_REGION"] = "us-east-1"  # or another valid region

In [None]:
from langchain_community.chat_models import BedrockChat as ChatBedrock



In [None]:
claude = ChatBedrock(
    model_id="anthropic.claude-3-sonnet-20240229-v1:0",  # Claude v3 Sonnet on Bedrock
    region_name="us-east-1",
)


In [None]:
import numpy as np
import tensorflow as tf
import pickle
import re
from tensorflow.keras import layers

# === Transformer Block for Custom Object ===
class TransformerEncoderBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
        super().__init__(**kwargs)
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
            layers.Dense(ff_dim, activation='relu'),
            layers.Dense(embed_dim),
        ])
        self.norm1 = layers.LayerNormalization(epsilon=1e-6)
        self.norm2 = layers.LayerNormalization(epsilon=1e-6)
        self.drop1 = layers.Dropout(rate)
        self.drop2 = layers.Dropout(rate)

    def call(self, inputs, training=False):
        attn_output = self.att(inputs, inputs)
        out1 = self.norm1(inputs + self.drop1(attn_output, training=training))
        ffn_output = self.ffn(out1)
        return self.norm2(out1 + self.drop2(ffn_output, training=training))

# === Load Models ===
autoencoder = tf.keras.models.load_model(
    "autoencoder_model.h5",
    custom_objects={'TransformerEncoderBlock': TransformerEncoderBlock},
    compile=False
)
decoder = tf.keras.models.load_model("decoder_model.h5", compile=False)
expression_model = tf.keras.models.load_model("expression_model.h5", compile=False)

# === Load Vocabulary ===
with open("vocab_mapping.pkl", "rb") as f:
    vocab_data = pickle.load(f)
vocab = vocab_data["base_to_idx"]
idx_to_char = vocab_data["idx_to_base"]

# === Sequence Decoding Functions ===
def decode_sequence_with_temperature(logits, temperature=1.0):
    probs = tf.nn.softmax(logits / temperature, axis=-1).numpy()
    sampled_indices = [np.random.choice(len(p), p=p) for p in probs]
    chars = [idx_to_char.get(i, 'N') for i in sampled_indices]
    return ''.join(chars)

def clean_dna_sequence(seq):
    return re.sub(r'[^ACGT]', '', seq)

# === Promoter Generation Function ===
latent_dim = autoencoder.get_layer("latent_vector").output.shape[-1]

def generate_promoter_sequences(target_expr, steps=500, lr=0.05, temps=[0.4, 0.6, 0.8, 1.0]):
    z = tf.Variable(tf.random.normal([1, latent_dim]), trainable=True)
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

    for _ in range(steps):
        with tf.GradientTape() as tape:
            pred_expr = expression_model(z, training=False)
            loss = tf.reduce_mean(tf.square(pred_expr - target_expr))
        grads = tape.gradient(loss, [z])
        optimizer.apply_gradients(zip(grads, [z]))

    logits = decoder(z, training=False).numpy().squeeze()

    generated_variants = []
    for temp in temps:
        seq = decode_sequence_with_temperature(logits, temperature=temp)
        cleaned = clean_dna_sequence(seq)
        if len(cleaned) == len(seq):
            pred_expr = expression_model(z, training=False).numpy().squeeze()
            generated_variants.append((seq, float(pred_expr)))

    return generated_variants


In [None]:
import numpy as np
import tensorflow as tf
import pickle
import re
from tensorflow.keras import layers

# === Transformer Block for Loading ===
class TransformerEncoderBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
        super().__init__(**kwargs)
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
            layers.Dense(ff_dim, activation='relu'),
            layers.Dense(embed_dim),
        ])
        self.norm1 = layers.LayerNormalization(epsilon=1e-6)
        self.norm2 = layers.LayerNormalization(epsilon=1e-6)
        self.drop1 = layers.Dropout(rate)
        self.drop2 = layers.Dropout(rate)

    def call(self, inputs, training=False):
        attn_output = self.att(inputs, inputs)
        out1 = self.norm1(inputs + self.drop1(attn_output, training=training))
        ffn_output = self.ffn(out1)
        return self.norm2(out1 + self.drop2(ffn_output, training=training))

# === Load Models and Vocab ===
autoencoder = tf.keras.models.load_model(
    "autoencoder_model.h5",
    custom_objects={'TransformerEncoderBlock': TransformerEncoderBlock},
    compile=False
)
decoder = tf.keras.models.load_model("decoder_model.h5", compile=False)
expression_model = tf.keras.models.load_model("expression_model.h5", compile=False)

with open("vocab_mapping.pkl", "rb") as f:
    vocab_data = pickle.load(f)
idx_to_char = vocab_data["idx_to_base"]

# === Helper Functions ===
def decode_sequence_with_temperature(logits, temperature=1.0):
    probs = tf.nn.softmax(logits / temperature, axis=-1).numpy()
    sampled_indices = [np.random.choice(len(p), p=p) for p in probs]
    return ''.join([idx_to_char.get(i, 'N') for i in sampled_indices])

def clean_dna_sequence(seq):
    return re.sub(r'[^ACGT]', '', seq)

# === Generation Function ===
latent_dim = autoencoder.get_layer("latent_vector").output.shape[-1]

def run_promoter_generator(target_expr, steps=500, lr=0.05, temps=[0.4, 0.6, 0.8, 1.0]):
    z = tf.Variable(tf.random.normal([1, latent_dim]), trainable=True)
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

    for step in range(steps):
        with tf.GradientTape() as tape:
            pred_expr = expression_model(z, training=False)
            loss = tf.reduce_mean(tf.square(pred_expr - target_expr))
        grads = tape.gradient(loss, [z])
        optimizer.apply_gradients(zip(grads, [z]))

    logits = decoder(z, training=False).numpy().squeeze()
    results = []

    for temp in temps:
        seq = decode_sequence_with_temperature(logits, temperature=temp)
        pred_expr = float(expression_model(z, training=False).numpy().squeeze())
        results.append({
    "temperature": temp,
    "predicted_expression": round(pred_expr, 4),
    "sequence": seq
})


    return results






In [None]:
result = run_promoter_generator(70)
print("result:", result)
print(result[1]["sequence"])

result: [{'temperature': 0.4, 'predicted_expression': 69.9947, 'sequence': 'ATCTTTCCACCCCACNAGTCNNGTNCGAAANCCACCGCCNACACTTGACCCCAACCCGGTAANNGCCNACGTCAACCACCAAAACAGCCNACACACCCATCANCATGCCTGNNAAAAACCACCACCATNACCCAAACCCGAANNCAANNANTNNCAATNTAAAAA'}, {'temperature': 0.6, 'predicted_expression': 69.9947, 'sequence': 'CNACGTNCGGAGCNCCNGCACCCNGCCCTTAAGATGCCNCCCAATNCANCACANACTANGCACTGGNNCNCCNTTCNACNNTCNCCNGACTNCACCGTGCTCNAAGCNCCACCATNCAANACCTCTCCGCCCTACGGGNCCGNCATANNNCNAGACANCCGGCNA'}, {'temperature': 0.8, 'predicted_expression': 69.9947, 'sequence': 'TCAATATGNTTCACCANNCACTCCTAAGCCATCATTGCCATAAGGNGCGNNGATCTGCTCATCCNAGCAAATCAACCNAGAAANAACCNAAGCNANACNAAATTACTACNNATCNNGGTNNGATAGAGNACANATGGCCAGCCCATGGCTGCCCTCAANAGTTNG'}, {'temperature': 1.0, 'predicted_expression': 69.9947, 'sequence': 'CTNACNTCANCGCCGCTCCCGCCNNCGNGTCGCGCGTGAATATACGACGNAANNANNTCTTGGGACATTACNGCCNTNCNCCGNAGCCGACCNAATTACTACTTTCCCGTAAACTCNAGNNCTGNCCGCANGAAAGCNTCCCTTNGACGNATGAGCGNTGTAAAT'}]
CNACGTNCGGAGCNCCNGCACCCNGCCCTTAAGATGCCNCCCAATNC

In [None]:
# ------------------ Imports ------------------
import os
from typing import TypedDict
from langchain_community.chat_models import BedrockChat as ChatBedrock
from langchain_core.prompts import ChatPromptTemplate
from langgraph.graph import StateGraph

# ------------------ AWS Credential Setup ------------------
import boto3
import json

# Replace with your actual credential logic
access_key = userdata.get('aws_access_key_id')
secret_key = userdata.get('aws_secret_access_key')

os.environ["AWS_ACCESS_KEY_ID"] = access_key
os.environ["AWS_SECRET_ACCESS_KEY"] = secret_key
os.environ["AWS_REGION"] = "us-east-1"

# ------------------ Claude via Bedrock ------------------

claude = ChatBedrock(
    model_id="anthropic.claude-3-sonnet-20240229-v1:0",
    region_name="us-east-1"
)

# ------------------ State Schema ------------------

class GraphState(TypedDict):
    user_query: str
    target_expression: float
    promoter: str
    wet_lab: str
    literature: str
    report: str

# ------------------ LangGraph Nodes ------------------

# ✨ Node 1: Extract expression value from query
def extract_expression_node(state: GraphState) -> dict:
    prompt = ChatPromptTemplate.from_template(
        "Extract the numeric expression value from this query: {user_query}. Only return the number."
    )
    chain = prompt | claude | (lambda x: {"target_expression": float(x.content.strip())})
    return chain.invoke({"user_query": state["user_query"]})


# ✨ Node 2: Use your own model to generate promoter
def generate_promoter_node(state: GraphState) -> dict:
    expression = state["target_expression"]  # dynamically get from state
    result = run_promoter_generator(expression)  # your custom function

    promoter_seq = result[1]["sequence"]  # assuming result is a tuple or list and this works

    return {"promoter": promoter_seq}


# ✨ Node 3: Suggest wet lab experiment
def suggest_wet_lab_node(state: GraphState) -> dict:
    prompt = ChatPromptTemplate.from_template(
        """You are a molecular biologist designing an experiment to validate the function of a synthetic promoter in *E. coli*.

The user has requested the following:
"{query}"

The promoter sequence is: {promoter}

Design a wet lab experiment to test its activity. Include:
- The experimental method (e.g., reporter assay, transformation protocol)
- The plasmid or vector system (if relevant)
- The host strain of *E. coli* to use
- What will be measured (e.g., fluorescence, enzymatic activity, mRNA level)
- Controls to include for comparison

Incorporate any specific requirements mentioned by the user.
Provide a clear and concise protocol outline."""
    )
    chain = prompt | claude | (lambda x: {"wet_lab": x.content.strip()})
    return chain.invoke({
        "query": state["user_query"],
        "promoter": state["promoter"]
    })

# ✨ Node 4: Summarize research literature
def summarize_research_node(state: GraphState) -> dict:
    user_query = f"""
The user is interested in this query: "{state['user_query']}"

Find and summarize recent research related to the synthetic promoter sequence:
"{state['promoter']}" in *E. coli*.

Focus on studies in:
- Synthetic biology
- Gene expression tuning
- Genetic circuit design

Especially highlight anything that aligns with the user's request.

Provide:
- A concise summary of relevant findings
- Bullet points for important insights (if applicable)
- Direct links to peer-reviewed publications, preprints (e.g., PubMed, bioRxiv, Nature, NAR, etc.)
- Mention if the exact promoter has been characterized or modified
"""
    prompt = ChatPromptTemplate.from_template(
        "You are a scientific research assistant helping summarize the literature.\n\n"
        "{query}\n\n"
        "Return your answer with:\n"
        "- A brief summary of the key research findings\n"
        "- Bullet points for important insights (if applicable)\n"
        "- Direct links to relevant publications (if available)\n"
        "- Focus on *E. coli* and synthetic promoter usage"
    )
    chain = prompt | claude | (lambda x: {"literature": x.content.strip()})
    return chain.invoke({"query": user_query})

def assemble_report_node(state: GraphState) -> dict:
    # Step 1: Build the full report
    full_report = f"""
🧬 **Synthetic Promoter Generation Report**

🔹 **Target Expression:** {state['target_expression']}

🧬 **Generated Promoter Sequence:**
{state['promoter']}

🧪 **Wet Lab Experiment Suggestion:**
{state['wet_lab']}

📚 **Research Summary:**
{state['literature']}
"""

    # Step 2: Summarize the report using Claude
    prompt = ChatPromptTemplate.from_template(
        """You are a synthetic biology expert reviewing this project report.

Please do the following:
- Carefully read the report.
- Identify and correct any factual, scientific, or formatting errors.
- Think critically about the experimental design and relevance of the research.
- Improve clarity where needed.
- Return a revised version that is suitable for inclusion in a scientific update or proposal."""

    )
    chain = prompt | claude | (lambda x: {"summary": x.content.strip()})
    result = chain.invoke({"report": full_report})

    # Step 3: Return both the report and the summary
    return {
        "report": full_report.strip(),
        "summary": result["summary"]
    }


# ------------------ LangGraph Definition ------------------

graph = StateGraph(GraphState)

graph.add_node("extract", extract_expression_node)
graph.add_node("generate", generate_promoter_node)
graph.add_node("experiment", suggest_wet_lab_node)
graph.add_node("summarize", summarize_research_node)
graph.add_node("assemble_report", assemble_report_node)

graph.set_entry_point("extract")
graph.add_edge("extract", "generate")
graph.add_edge("generate", "experiment")
graph.add_edge("experiment", "summarize")
graph.add_edge("summarize", "assemble_report")
graph.set_finish_point("assemble_report")

workflow = graph.compile()

# ------------------ Run the Agent ------------------

user_query = "Develop a synthetic promoter with 40 expression value"

result = workflow.invoke({"user_query": user_query})

print(result["report"])


🧬 **Synthetic Promoter Generation Report**

🔹 **Target Expression:** 40.0

🧬 **Generated Promoter Sequence:**
NACCNCCCATCAANCGCNNATCTGGGCAAATAGATCNNCACCGGTCAAGCNGNTGAANCANCAGGACAACACNTAAAANCNNNCNAGGAANCGTTANCAAGTCCCGCNANTCGCCCATACTNNCAGAACATGTTTATCACAGANCNTAACNCCAAAANCGGNGAA

🧪 **Wet Lab Experiment Suggestion:**
To validate the function of the synthetic promoter sequence with the requested expression value of 40, we can design a reporter assay using a fluorescent protein as the reporter gene. Here is an outline of the experimental protocol:

Experimental Method: Fluorescent Protein Reporter Assay

Plasmid/Vector System:
- Use a low-copy plasmid vector compatible with E. coli, such as pBR322 or pUC19.
- Clone the synthetic promoter sequence upstream of a fluorescent reporter gene, such as GFP or RFP.

Host Strain:
- Use a common E. coli strain suitable for cloning and expression studies, such as DH5α or BL21(DE3).

Measurement:
- Measure the fluorescence intensity of the reporter protei

In [None]:
import gradio as gr
import tempfile
import zipfile
import os

def run_agent(user_query):
    try:
        result = workflow.invoke({"user_query": user_query})
        report = result.get("report", "No report generated.")

        # Create a temp text file
        with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as tmp_txt:
            tmp_txt.write(report)
            txt_path = tmp_txt.name

        # Zip the text file to force download
        zip_path = txt_path.replace(".txt", ".zip")
        with zipfile.ZipFile(zip_path, 'w') as zipf:
            zipf.write(txt_path, arcname="synthetic_promoter_report.txt")

        return report, zip_path

    except Exception as e:
        return f"Error: {e}", None

iface = gr.Interface(
    fn=run_agent,
    inputs=gr.Textbox(label="Enter your promoter query or just enter the expression value", placeholder="e.g., Develop a synthetic promoter with 40 expression value"),
    outputs=[
        gr.Textbox(label="Agent Report"),
        gr.File(label="Download Report (ZIP)")
    ],
    title="Synthetic Promoter Agent",
    description="This agent designs synthetic promoters and provides a report using a multi-step LLM pipeline."
)

iface.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://b9f2256e8096cfa512.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


