In [None]:
import os
import requests
from datetime import datetime
import subprocess
import yaml
import glob
import tarfile
import time
import re
from pdbfixer import PDBFixer
from openmm.app import PDBFile
from typing import Literal

import os
import subprocess
from pathlib import Path


# Directory to run subprocesses in
TEST_RUN_DIR = "/home/ach/Documents/auto_cgui/auto_cgui_master"

MEMBRANE_NOTEBOOK = "/home/ach/Documents/auto_cgui/auto_cgui_master/run_membrane.ipynb"
SOLUTION_NOTEBOOK = "/home/ach/Documents/auto_cgui/auto_cgui_master/run_solution.ipynb"


def get_latest_tgz(directory):
    tgz_files = glob.glob(os.path.join(directory, "*.tgz"))
    if not tgz_files:
        raise FileNotFoundError("No .tgz files found in the directory.")
    latest_tgz = max(tgz_files, key=os.path.getmtime)
    return latest_tgz


def extract_tgz(tgz_path, extract_dir):
    with tarfile.open(tgz_path, "r:gz") as tar:
        tar.extractall(path=extract_dir)




def run_solution_simulation():
    import sys
    sys.path.append('/home/ach/Documents/auto_cgui/auto_cgui_master')

    from run_solution import search_dir, clean_file, run_simulation, rmsd_rmsf_solution_case, sasa_solution_case, radius_of_gyration_solution_case, hydrogen_bonds_solution_case
    tgz_path = get_latest_tgz(TEST_RUN_DIR)
    extract_tgz(tgz_path, TEST_RUN_DIR)
    base_dir = search_dir()
    clean_file(base_dir)
    run_simulation(base_dir)
    rmsd_rmsf_solution_case(base_dir)
    sasa_solution_case(base_dir)
    radius_of_gyration_solution_case(base_dir)
    hydrogen_bonds_solution_case(base_dir)

    


def run_membrane_simulation():
    import sys
    sys.path.append('/home/ach/Documents/auto_cgui/auto_cgui_master')

    from run_membrane import search_dir, clean_file, run_simulation, rmsd_rmsf_solution_case, sasa_solution_case, radius_of_gyration_solution_case, hydrogen_bonds_solution_case
    tgz_path = get_latest_tgz(TEST_RUN_DIR)
    extract_tgz(tgz_path, TEST_RUN_DIR)
    base_dir = search_dir()
    clean_file(base_dir)
    run_simulation(base_dir)
    rmsd_rmsf_solution_case(base_dir)
    sasa_solution_case(base_dir)
    radius_of_gyration_solution_case(base_dir)
    hydrogen_bonds_solution_case(base_dir)
    

def get_latest_yml_file():
    """Returns the path to the most recently modified .yml file in the current directory."""
    yml_files = list(Path('.').glob('*.yml'))
    if not yml_files:
        raise FileNotFoundError("No .yml files found in the current directory.")

    latest_file = max(yml_files, key=lambda f: f.stat().st_mtime)
    return latest_file.resolve()

def charmm_gui_solution():
    """Run the test with -m solution and the latest .yml file and wait for the tool to finish executing."""
    yml_file = get_latest_yml_file()
    # cmd = ["python3", "run_tests.py", "-m", "solution", "-t", str(yml_file).removesuffix('.yml')]
    # subprocess.run(cmd, cwd=TEST_RUN_DIR, check=True)
    yml_path_str = str(yml_file)

    # Remove '.yml' suffix
    if not yml_path_str.endswith('.yml'):
        raise ValueError("The latest file does not end with .yml")
    yml_path_no_suffix = yml_path_str.removesuffix('.yml')

    # Prepare the terminal command
    command = f"cd {TEST_RUN_DIR} && python3 run_tests.py -m solution -t '{yml_path_no_suffix}'"
    subprocess.run([
        "gnome-terminal", "--", "bash", "-c", f"{command}; exec bash"
    ])
    time.sleep(300)
    return "CHARMM-GUI solution builder completed"


def charmm_gui_membrane():
    """Run the test with -m bilayer and the latest .yml file and wait for the tool to finish executing."""
    yml_file = get_latest_yml_file()
    # cmd = ["python3", "run_tests.py", "-m", "bilayer", "-t", str(yml_file).removesuffix('.yml')]
    # subprocess.run(cmd, cwd=TEST_RUN_DIR, check=True)
    yml_path_str = str(yml_file)

    # Remove '.yml' suffix
    if not yml_path_str.endswith('.yml'):
        raise ValueError("The latest file does not end with .yml")
    yml_path_no_suffix = yml_path_str.removesuffix('.yml')

    # Prepare the terminal command
    command = f"cd {TEST_RUN_DIR} && python3 run_tests.py -m bilayer -t '{yml_path_no_suffix}'"
    subprocess.run([
        "gnome-terminal", "--", "bash", "-c", f"{command}; exec bash"
    ])
    time.sleep(300)

    return "CHARMM-GUI membrane builder completed"



def fix_and_clean_pdb(input_file, ph=7.0):
    fixer = PDBFixer(filename=input_file)
    print(f"Loaded PDB file: {input_file}")

    print("Finding missing residues...")
    fixer.findMissingResidues()

    print("Finding nonstandard residues...")
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()

    print("Removing heterogens (ligands, ions, water)...")
    fixer.removeHeterogens(keepWater=False)

    print("Finding missing atoms...")
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()

    print(f"Adding missing hydrogens at pH {ph}...")
    fixer.addMissingHydrogens(pH=ph)

    print(f"Overwriting fixed structure to: {input_file}")
    with open(input_file, 'w') as out_file:
        PDBFile.writeFile(fixer.topology, fixer.positions, out_file)

    print("✅ Done!")

def download_pdb(pdb_id: str):
    """
    Downloads a PDB file given its ID and saves it to the specified directory.
    If the file is downloaded, it is then fixed and cleaned.

    Args:
        pdb_id (str): The 4-character PDB ID (e.g., '1A2B').
    """
    global saved_pdb_path
    pdb_id = pdb_id.lower()
    
    # Define the target directory
    pdb_dir = "/home/ach/Documents/auto_cgui/auto_cgui_master/files/pdb/basic"
    pdb_filename = f"{pdb_id}.pdb"
    pdb_filepath = os.path.join(pdb_dir, pdb_filename)
    
    # Create directory if it doesn't exist
    os.makedirs(pdb_dir, exist_ok=True)
    
    # Check if file already exists
    if os.path.exists(pdb_filepath):
        print(f"{pdb_filename} already exists at {pdb_dir}")
        saved_pdb_path = pdb_filepath
        return
    
    # Download the file
    url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
    response = requests.get(url)

    if response.status_code == 200:
        with open(pdb_filepath, "w") as f:
            f.write(response.text)
        print(f"Downloaded {pdb_filename} successfully to {pdb_dir}")
        saved_pdb_path = pdb_filepath

        # Fix and clean the newly downloaded PDB
        fix_and_clean_pdb(pdb_filepath)
    else:
        print(f"Failed to download {pdb_filename}. Status code: {response.status_code}")
        saved_pdb_path = None





def run_and_save_yml(query1: str, solution_or_bilayer: str):
    output_file = "/home/ach/Documents/auto_cgui/new_rag/claude_output.txt"
    output_yml = f"/home/ach/Documents/auto_cgui/new_rag/{solution_or_bilayer}.yml"

    # Remove the old output file if it exists
    os.system(f"rm -f '{output_file}'")

    response = query_engine.query(query1)

    query1+= "only use the field names in the context and do not forget the hyphen in front of the word label"

    # Write the string "response.response" to the output file
    with open(output_file, 'w') as f:
        f.write(response.response)

    print("Written string to output file.")

    # Read the output
    if not os.path.exists(output_file):
        print(f"Error: Output file {output_file} not found.")
        return

    with open(output_file, 'r') as f:
        output = f.read()

    print("Captured output:\n", output)

    # Extract YAML content using regex
    match = re.search(r'```yaml(.*?)```', output, re.DOTALL)
    if match:
        yaml_content = match.group(1).strip()

        # Save the extracted YAML content
        with open(output_yml, 'w') as out_file:
            out_file.write(yaml_content)

        time.sleep(5)
        print(f"YAML content successfully saved to: {output_yml}")
    else:
        print("YAML block not found in the input file.")


def clean_yml():
    # Find all .yml and .yaml files in the current directory
    yaml_files = glob.glob("*.yml") + glob.glob("*.yaml")
    if not yaml_files:
        raise FileNotFoundError("No YAML files found in the current directory.")

    for yaml_file in yaml_files:
        with open(yaml_file, 'r') as file:
            lines = file.readlines()

        if not lines:
            continue  # Skip empty files

        first_line = lines[0].lstrip()
        if first_line.startswith('- ') or lines[0].startswith('  '):
            print(f"Skipped already formatted: {yaml_file}")
            continue  # Skip if already formatted

        # Format the content: '- ' on first line, '  ' on others
        modified_lines = ['- ' + lines[0].rstrip('\n')]
        modified_lines += ['  ' + line.rstrip('\n') for line in lines[1:]]

        formatted_content = '\n'.join(modified_lines)

        # Overwrite the original file
        with open(yaml_file, 'w') as file:
            file.write(formatted_content)

        print(f"Formatted and saved: {yaml_file}")

In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.agent.workflow import FunctionAgent
from llama_index.core.agent.workflow import ReActAgent
# from llama_index.llms.openai import OpenAI
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.embeddings.google_genai import GoogleGenAIEmbedding
from llama_index.core import VectorStoreIndex, Settings
from llama_index.core.workflow import Context
import asyncio
import os

api_ky = os.getenv("GOOGLE_API_KEY")

llm = GoogleGenAI(
    model="gemini-2.0-flash",
    api_key=api_ky,  # uses GOOGLE_API_KEY env var by default
)


embed_model = GoogleGenAIEmbedding(
    model_name="text-embedding-004",
    embed_batch_size=100,
)

Settings.embed_model = embed_model
Settings.llm = llm

#Create a RAG tool using LlamaIndex
documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()


In [None]:

from llama_index.core.agent.workflow import AgentStream, ToolCallResult
#Create an enhanced workflow with both tools
agent = ReActAgent(
    # tools=[download_pdb, clean_yml, run_and_save_yml, charmm_gui_membrane, charmm_gui_solution, run_solution_simulation, run_membrane_simulation],
    tools=[run_and_save_yml, download_pdb, clean_yml, charmm_gui_membrane, charmm_gui_solution, run_solution_simulation, run_membrane_simulation],
    llm = GoogleGenAI(
    model="gemini-2.0-flash",
    ),

    system_prompt=""" You are an agent that runs tools given to you. these tools aid in performing simulations down the line. 
    wait for each tool to return a signal of completion before proceeding with the next tool""",
)

# Create a context to store the conversation history/session state
ctx = Context(agent)

async def main():
    handler = agent.run("""pass on this query after downloading the pdb file: Generate a YAML-formatted configuration file for a molecular dynamics system labeled 1UBQ solution system. 
                        The system uses the protein 1UBQ, sourced from the file 1ubq.pdb, and is prepared in explicit solvent with periodic boundary conditions. 
                        The simulation will be conducted using NAMD with hydrogen mass repartitioning enabled, at a temperature of 300 K. 
                        Use the ion type KCl at a concentration of 0.15 M. This setup corresponds to a solution case type. 
                        Ensure the PDB orientation is not adjusted. 
                        After generating and cleaning the YAML file, run the simulation and perform post-processing analysis.
you may pass only one of two cases to case_type: solution or bilayer

clean the yml file after generation
Then run the relevant charmm gui builder for it 
Then run the relevant simulation for it                        

""", ctx=ctx)

    async for ev in handler.stream_events():
        # if isinstance(ev, ToolCallResult):
        #     print(f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}")
        if isinstance(ev, AgentStream):
            print(f"{ev.delta}", end="", flush=True)

    response = await handler


async def main():
    handler = agent.run("""   run the simulation for the solution case      

""", ctx=ctx)

    async for ev in handler.stream_events():
        # if isinstance(ev, ToolCallResult):
        #     print(f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}")
        if isinstance(ev, AgentStream):
            print(f"{ev.delta}", end="", flush=True)

    response = await handler



# Run the agent
if __name__ == "__main__":
    await main()

In [None]:
from llama_index.core.agent.workflow import AgentStream, ToolCallResult
# Create an enhanced workflow with both tools
agent = ReActAgent(
    tools=[run_and_save_yml, download_pdb, clean_yml, charmm_gui_membrane, charmm_gui_solution],
    llm = GoogleGenAI(
    model="gemini-2.0-flash",
    ),

    system_prompt=""" You are an agent that runs tools given to you. these tools aid in performing simulations down the line.""",
)


# Now we can ask questions about the documents or do calculations
async def main():
    handler = await agent.run(
        """pass on this query: Generate a YAML-formatted configuration file for a 
        molecular dynamics system labeled 1L2Y solution system. 
        The protein used is 1L2Y, sourced from the file 1l2y.pdb. 
        The system is solvated explicitly and periodic boundary conditions are enabled. 
        NAMD is the simulation engine, with hydrogen mass repartitioning active. 
        The simulation will be run at 300 K and categorized under the solution case type. 
        Do not apply orientation from the PDB. Once the YAML file is generated and cleaned, run the simulation and execute post-processing routines.
you may pass only one of two cases to case_type: solution or bilayer

clean the yml file after generation
Then run the relevant charmm gui builder for it 

"""
             )
    async for ev in handler.stream_events():
        # if isinstance(ev, ToolCallResult):
        #     print(f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}")
        if isinstance(ev, AgentStream):
            print(f"{ev.delta}", end="", flush=True)

    response = await handler



# Run the agent
if __name__ == "__main__":
    await main()