# Documention Agent

This notebook demonstrates a documention agent:
1. Video-to-protocol conversion using Vertex AI
2. With knowledge from documents and pictures that are loaded into cache

In [1]:
# %pip install google-cloud-storage
# %pip install --upgrade --user --quiet google-cloud-aiplatform

In [2]:
# %load_ext autoreload
%reload_ext autoreload
%autoreload 2

import os
import sys
from pathlib import Path

import configparser
from IPython.display import Markdown

path_to_append = Path(Path.cwd()).parent / "proteomics_specialist"
sys.path.append(str(path_to_append))
import videoToProtocol as video_to_protocol

config = configparser.ConfigParser()
config.read("../secrets.ini")

['../secrets.ini']

In [3]:
import configparser
import vertexai

config = configparser.ConfigParser()
config.read("../secrets.ini")

PROJECT_ID = config["DEFAULT"]["PROJECT_ID"]
vertexai.init(project=PROJECT_ID, location="europe-west9") # europe-west9 is Paris

In [4]:
import os
from google.cloud import storage

os.environ["GOOGLE_CLOUD_PROJECT"] = config["DEFAULT"]["PROJECT_ID"]

# Initialize Cloud Storage client
storage_client = storage.Client()
bucket_name = "mannlab_videos"
bucket = storage_client.bucket(bucket_name)

In [5]:
import datetime

from vertexai.generative_models import Part
from vertexai.preview import caching
from vertexai.preview.generative_models import GenerativeModel

MODEL_ID = "gemini-1.5-pro-001" 

# Following: https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/context-caching/intro_context_caching_vertex_ai_sdk.ipynb

In [6]:
# Upload knowledge files to Google Cloud Storage
folder_path = '/Users/patriciaskowronek/Documents/documentation_agent_few_shot_examples/knowledge_base'
subfolder_in_bucket = "knowledge"

knowledge_uris=[]
for filename in os.listdir(folder_path):
    if filename.lower().endswith(('.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.tif', '.pdf')):
        path = os.path.join(folder_path, filename)
        try:
            file_uri = video_to_protocol.upload_video_to_gcs(path, bucket, subfolder_in_bucket)
            knowledge_uris.append(file_uri) 
        except Exception as e:
                print(f"Error processing {filename}: {e}")

Uploaded to: gs://mannlab_videos/knowledge/07a_Getting Started with NanoElute_Revision_F.pdf
Uploaded to: gs://mannlab_videos/knowledge/Evosep-One-User-Guide-v18.pdf
Uploaded to: gs://mannlab_videos/knowledge/01b(i)_Intro Trapped Ion Mobility_Pro2, fleX, HT_Revision_G.pdf
Uploaded to: gs://mannlab_videos/knowledge/02a_Introduction to Electrospray Ionization_Revision_F.pdf
Uploaded to: gs://mannlab_videos/knowledge/MSD-Queue and measure samples in HyStar-130225-101612.pdf
Uploaded to: gs://mannlab_videos/knowledge/41596_2024_1104_Fig6_ESM.jpg
Uploaded to: gs://mannlab_videos/knowledge/03a_TIMS-TOF Calibration with ESI Sources_Revision_F.pdf
Uploaded to: gs://mannlab_videos/knowledge/07h_diagonal-PASEF_User_Guide_timsControl_Revision_A.pdf
Uploaded to: gs://mannlab_videos/knowledge/41596_2024_1104_Fig7_ESM.jpg
Uploaded to: gs://mannlab_videos/knowledge/MSD-Troubleshooting- Refilling Tuning Mix in the timsTOF Ultra UltraSource-130225-101559.pdf
Uploaded to: gs://mannlab_videos/knowledge/0

In [13]:
# Create cache with Vertex AI
import os
from collections import defaultdict

# Define supported file types with corresponding MIME types
MIME_TYPES = {
    '.pdf': 'application/pdf',
    '.jpg': 'image/jpeg',
    '.jpeg': 'image/jpeg',
    '.png': 'image/png' 
}

def create_cached_content(knowledge_uris, bucket_name, subfolder_in_bucket, model_id):
    contents = []
    file_counts = defaultdict(int)
    
    for file_path in knowledge_uris:
        filename = os.path.basename(file_path)
        file_ext = os.path.splitext(filename)[1].lower()
        
        if file_ext in MIME_TYPES:
            mime_type = MIME_TYPES[file_ext]
            
            try:
                contents.append(Part.from_uri(file_path, mime_type=mime_type))
                file_counts[file_ext] += 1
            except Exception as e:
                print(f"Error creating Part from {file_path}: {e}")
        else:
            print(f"Skipping unsupported file: {filename}")
    
    print(f"Total files processed: {len(contents)}")
    for ext, count in file_counts.items():
        print(f"  {ext[1:].upper()}: {count}")
    
    if contents:
        cached_content = caching.CachedContent.create(
            model_name=model_id,
            contents=contents,
            ttl=datetime.timedelta(minutes=60),
        )
        print("Cached content created successfully!")
        return cached_content
    else:
        print("No matching files found. Cached content not created.")
        return None

cached_content = create_cached_content(
    knowledge_uris,
    bucket_name,
    subfolder_in_bucket,
    model_id=MODEL_ID
)

Total files processed: 51
  PDF: 44
  JPG: 7
Cached content created successfully!


In [15]:
# cached_content.delete()

In [16]:
print(cached_content.name)
# print(cached_content.resource_name)
# print(cached_content.model_name)
print(cached_content.create_time)
print(cached_content.expire_time)

8978981388325748736
2025-03-06 16:52:46.391281+00:00
2025-03-06 17:52:46.385585+00:00


In [17]:
from vertexai.preview.generative_models import GenerativeModel
model = GenerativeModel.from_cached_content(cached_content=cached_content)

In [18]:
video_path = "/Users/patriciaskowronek/Documents/documentation_agent_few_shot_examples/ready_examples/Disconnect_IonOpticks_column_from_sample_line.mp4"
video_uri_input = video_to_protocol.upload_video_to_gcs(video_path, bucket)

Uploaded to: gs://mannlab_videos/Disconnect_IonOpticks_column_from_sample_line.mp4


In [19]:
prompt = """
You are a highly observant research assistant in Professor Matthias Mann's proteomics and mass spectrometry laboratory. Your expertise lies in detailed documentation of experimental procedures.

Analyze the video and reconstruct a step-by-step protocol by focusing on the actions in the video. Focus on user interactions with equipment, devices, and software. The goal is a clear, concise, unambiguous protocol reproducible by someone with no prior knowledge. "Think aloud" as if you were the researcher in the video that describes their work. Describe what you see at every secound. Take deep breath and think step-by-step. Answer direct.

For each action, describe:

* **Timestamp:** [timestamp]
* **Action:** [Specific Action/Change Observed (Include details of tools used, and observable results. (e.g., opening a lid, pressing a button, turning a knob, screwing/unscrewing, connecting/disconnecting, etc.))]

**Example:**
[02:15] timsControl Software: Mode changed from "Operate" to "Standby" by clicking the power button.
[03:45] Ion Source: NanoViper connector disconnected by unscrewing the nut counterclockwise.

"""

In [20]:
inputs = [
    prompt,
    "Input Video:",
    Part.from_uri(
        video_uri_input, mime_type="video/mp4"
    ),
    "Observations:"
]

response = model.generate_content(
    inputs,
    generation_config={"temperature": 0} 
)
observation = response.text
print(response.usage_metadata)
Markdown(observation)

prompt_token_count: 274027
candidates_token_count: 273
total_token_count: 274300
cached_content_token_count: 259807
prompt_tokens_details {
  modality: TEXT
  token_count: 256
}
prompt_tokens_details {
  modality: VIDEO
  token_count: 13965
}
prompt_tokens_details {
  modality: IMAGE
  token_count: 1806
}
prompt_tokens_details {
  modality: DOCUMENT
  token_count: 258000
}
candidates_tokens_details {
  modality: TEXT
  token_count: 273
}



[00:00] **Action:** The video starts with a view of a computer screen displaying a chromatogram and a list of files. The user is wearing a red sweater.
[00:08] **Action:** The user moves the camera to show the back of a timsTOF SCP instrument.
[00:12] **Action:** The user moves the camera to show the front of an Evosep One LC system.
[00:14] **Action:** Ion Source: The user starts to connect the transfer line to the timsTOF SCP instrument using both hands.
[00:20] **Action:** Ion Source: The user uses both hands to connect the transfer line to the column.
[00:33] **Action:** Ion Source: The user uses pliers to tighten the connection between the transfer line and the column.
[00:40] **Action:** Ion Source: The user removes the protective cap from the transfer line and places it in a yellow container.
[00:43] **Action:** The user moves the camera to show the front of the timsTOF SCP instrument.
[00:48] **Action:** The user moves the camera back to the computer screen displaying the chromatogram and files.