In [None]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Metadata Exploration

### Install and import required packages

In [None]:
# Install packages
! pip install -U google-cloud-firestore google-cloud-workflows ipywidgets
! pip install "colabfold[alphafold] @ git+https://github.com/sokrypton/ColabFold"

In [None]:
# Reload the kernel before proceeding
%load_ext autoreload
%autoreload 2

In [None]:
# Import packages
import numpy as np
import os
import pickle
import py3Dmol

from google.cloud import firestore
from google.cloud import storage
from IPython import display
from ipywidgets import GridspecLayout
from ipywidgets import Output
import matplotlib.pyplot as plt
from pathlib import Path

# Instantiate firestore client
fs_client = firestore.Client()
gcs_client = storage.Client()

#### Succeeded pipelines

List 10 experiments that completed successfully.

In [None]:
# You can change to 'RUNNING', 'FAILED' or 'SUCCEEDED'
status = 'SUCCEEDED'

# Run Query
docs = fs_client.collection('colabfold-experiments').where(
    'status', '==', status).limit(10).get()

if len(docs) == 0:
    raise ValueError('No experiment with state SUCCEEDED')

# Print results
print(f'Found {len(docs)} experiment(s).')
for doc in docs:
    doc_dict = doc.to_dict()
    print(f'=> Experiment: {doc_dict["job_id"]}')
    job_id = doc_dict['firestore_ref']

The next cell will:
 - Query one specific job and bring its information from Firestore.
 - Get the list of all the files generated by this job.
 - Get the list of PDB files generated by this job
 - List the models by its ranking condifence
 - Display a sample image

In [None]:
# Run Query
doc = fs_client.collection('colabfold-experiments').document(job_id).get()

if not doc.exists:
    raise ValueError('Document does not exist.')

# Print results
doc_dict = doc.to_dict()

blobs_listing = gcs_client.list_blobs(
    doc_dict['job_gcs_path'],
    prefix=doc_dict['job_id'] + '/',
    delimiter='/')
filenames = [blob.name for blob in blobs_listing]
pdb_filenames = []

print('Model Ranking')
i = 1
for f in filenames:
    if 'pdb' in f:
        print(f'Rank {i}: {Path(f).name}')
        pdb_filenames.append(f)
        i+=1

bucket = gcs_client.bucket(doc_dict['job_gcs_path'])
image_blobs = [i for i in filenames if '.png' in i]

# Download all the images
for i in image_blobs:
    blob = bucket.blob(i)
    blob.download_to_filename(Path(i).name)
    if 'plddt.png' in Path(i).name:
        image_to_display = Path(i).name

display.Image(image_to_display, width=800)

#### Search for jobs by its labels

List all the experiments with experiment name equals to the variable "experiment_name" and print the result of the execution.

In [None]:
# Set a different label to search
experiment_name = 'experiment-1'

# Run Query
docs = fs_client.collection('colabfold-experiments').where(
    'labels.experiment_name', '==', experiment_name).limit(10).get()

if len(docs) == 0:
    raise ValueError('No experiment with this specific label.')

print(f'Experiments with experience_name equals to: {experiment_name}')
# Print results
for idx, doc in enumerate(docs):
    doc_dict = doc.to_dict()
    print(f'=> Experiment {idx}: {doc_dict["job_id"]}')

# Visualize Prediction

Next you will visualise the prediction & confidence.

In [None]:
# Run Query
blob = bucket.blob(pdb_filenames[0])
blob.download_to_filename(Path(pdb_filenames[0]).name)
prediction_filename = Path(pdb_filenames[0]).name

In [None]:
with open(prediction_filename, 'r') as fp:
    pdb_file = fp.read()
view = py3Dmol.view(width=800, height=600)
view.addModelsAsFrames(pdb_file)
view.zoomTo()