In [1]:
#@title Install conda for colab
#@markdown Kernel will restart automatically, then you can continue to the next cell.
#@markdown <br> (Don't worry if you get a 'Your session crashed for an unknown reason.' error, this is the expected behaviour.)
!if [ -n "$COLAB_RELEASE_TAG" ]; then pip install condacolab; fi
import condacolab
condacolab.install()

Collecting condacolab
  Downloading condacolab-0.1.10-py3-none-any.whl.metadata (5.5 kB)
Downloading condacolab-0.1.10-py3-none-any.whl (7.2 kB)
Installing collected packages: condacolab
Successfully installed condacolab-0.1.10
⏬ Downloading https://github.com/jaimergp/miniforge/releases/download/24.11.2-1_colab/Miniforge3-colab-24.11.2-1_colab-Linux-x86_64.sh...
📦 Installing...
📌 Adjusting configuration...
🩹 Patching environment...
⏲ Done in 0:00:14
🔁 Restarting kernel...


In [2]:
#@title Get density prediction code from GitHub repository
%%capture
import condacolab
%cd ~/../content

!rm -rf ProteinDensity
repo_url = "https://github.com/Degiacomi-Lab/DensiTree.git"
!git clone -b test https://github.com/Degiacomi-Lab/ProteinDensity.git


/content
Cloning into 'ProteinDensity'...
remote: Enumerating objects: 222, done.[K
remote: Counting objects: 100% (15/15), done.[K
remote: Compressing objects: 100% (15/15), done.[K
remote: Total 222 (delta 9), reused 0 (delta 0), pack-reused 207 (from 2)[K
Receiving objects: 100% (222/222), 22.80 MiB | 31.00 MiB/s, done.
Resolving deltas: 100% (125/125), done.


In [3]:
#@title Install dependencies and enable widgets
%%capture
!mamba env update -n base -f ProteinDensity/DensiTree/environment.yml

#from google.colab import output
#output.enable_custom_widget_manager()


AttributeError: 'CapturingDisplayPublisher' object has no attribute 'register_hook'

In [4]:
# @title Upload Protein Structure PDB File(s)
from google.colab import files
import ipywidgets as widgets
from IPython.display import display, HTML
import os
import shutil

# Ensure correct cwd
%cd /content

if not os.path.exists('structure_data'):
    os.makedirs('structure_data')

# filename variable
uploaded_filename = None

# file upload function
def file_upload(b):
    global uploaded_filename
    uploaded = files.upload()
    for fname in uploaded.keys():
        # Move each file into the 'data' folder
        shutil.move(fname, os.path.join('structure_data', fname))
        uploaded_filename = os.path.join('structure_data', fname)
        print(f"✅ Uploaded file saved to: {uploaded_filename}")

# button widget
btn = widgets.Button(
    description='📁 Upload PDB File(s)',
    button_style='success',
    layout=widgets.Layout(width='300px', height='75px'),
    style={'font_weight': 'bold', 'font_size': '16px'}
)

btn.on_click(file_upload)
display(HTML("<h3 style='font-family:sans-serif;'>Upload Protein Structure PDB File</h3>"))
display(btn)



/content


Button(button_style='success', description='📁 Upload PDB File(s)', layout=Layout(height='75px', width='300px')…

Saving 3RY2_clean.pdb to 3RY2_clean.pdb
Saving 2ptn_clean.pdb to 2ptn_clean.pdb
Saving 1crm_clean.pdb to 1crm_clean.pdb
Saving 1bti_clean.pdb to 1bti_clean.pdb
Saving 5pti_clean.pdb to 5pti_clean.pdb
✅ Uploaded file saved to: structure_data/3RY2_clean.pdb
✅ Uploaded file saved to: structure_data/2ptn_clean.pdb
✅ Uploaded file saved to: structure_data/1crm_clean.pdb
✅ Uploaded file saved to: structure_data/1bti_clean.pdb
✅ Uploaded file saved to: structure_data/5pti_clean.pdb


In [5]:
# @title Upload Protein Sequences as FASTA file(s)

# Ensure correct cwd
%cd /content

if not os.path.exists('sequence_data'):
    os.makedirs('sequence_data')

# filename variable
uploaded_filename = None

# file upload function
def file_upload(b):
    global uploaded_filename
    uploaded = files.upload()
    for fname in uploaded.keys():
        # Move each file into the 'data' folder
        shutil.move(fname, os.path.join('sequence_data', fname))
        uploaded_filename = os.path.join('sequence_data', fname)
        print(f"✅ Uploaded file saved to: {uploaded_filename}")

# button widget
btn = widgets.Button(
    description='📁 Upload FASTA File(s)',
    button_style='success',
    layout=widgets.Layout(width='300px', height='75px'),
    style={'font_weight': 'bold', 'font_size': '16px'}
)

btn.on_click(file_upload)
display(HTML("<h3 style='font-family:sans-serif;'>Upload Protein Structure PDB File</h3>"))
display(btn)

/content


Button(button_style='success', description='📁 Upload FASTA File(s)', layout=Layout(height='75px', width='300px…

Saving rcsb_pdb_5PTI.fasta to rcsb_pdb_5PTI.fasta
✅ Uploaded file saved to: sequence_data/rcsb_pdb_5PTI.fasta


In [6]:
#@title Add Protein Sequences as Text
#@markdown Protein amino residue sequences in either one or three letter codes separated by commas.

textarea = widgets.Textarea(
    description='Sequence(s):',
    disabled=False,
    placeholder='Add comma-separated protein sequences',
    layout=widgets.Layout(width='600px', height='100px'),
    style={'font_weight': 'bold', 'font_size': '16px'}
)

button = widgets.Button(
    description='Submit',
    button_style='success'
)
box_layout = widgets.Layout(display='flex',
                flex_flow='column',
                align_items='center',
                width='50%')
box = widgets.HBox(children=[button],layout=box_layout)

sequences_text = ""
def on_button_click(b):
  global sequences_text
  sequences_text = textarea.value
  print(f"Sequence(s) = {sequences_text}")
  return sequences_text
button.on_click(on_button_click)

display(textarea, box)

Textarea(value='', description='Sequence(s):', layout=Layout(height='100px', width='600px'), placeholder='Add …

HBox(children=(Button(button_style='success', description='Submit', style=ButtonStyle()),), layout=Layout(alig…

Sequence(s) = AAAAAAAAAAAAAAA, HHHHHHHHHHHHHH
Sequence(s) = AAAAAAAAAAAAAAA, AAAAAAAAAAAA
Sequence(s) = AAAAAAAAAAAAAAA, HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH


In [7]:
#@title Choose Random Forest Regressor options
#@markdown <br>'Sequence & Structure' estimates protein density using **both** the protein structure-derived characteristic features and the protein sequence (valid if protein structures in PDB format have been uploaded).
#@markdown <br>'20 best features' refers to the most important features when using structure-derived features.
import ipywidgets as widgets
from IPython.display import display

options = ["Sequence & Structure?"]

checkboxes = []
for option in options:
    if "recommended" in option:
      checkbox = widgets.Checkbox(description=option, value=True)
    else:
      checkbox = widgets.Checkbox(description=option, value=False)
    checkboxes.append(checkbox)

checks1 = widgets.VBox(checkboxes)
display(checks1)

temp_options = ["300 K (recommended)","310.15 K"]

dropdown1 = widgets.Dropdown(
    options=temp_options,
    value="300 K (recommended)",
    description='Temperature:',
    disabled=False,
)
display(dropdown1)


feature_options = ["20 best features (recommended)", "all features"]
dropdown2 = widgets.Dropdown(
    options=feature_options,
    value="20 best features (recommended)",
    description='Features:',
    disabled=False,
)
display(dropdown2)




VBox(children=(Checkbox(value=False, description='Sequence & Structure?'),))

Dropdown(description='Temperature:', options=('300 K (recommended)', '310.15 K'), value='300 K (recommended)')

Dropdown(description='Features:', options=('20 best features (recommended)', 'all features'), value='20 best f…

In [18]:
#@title Calculate results and save to file (predictions.txt) in results folder

%cd ~/../content/ProteinDensity/DensiTree

import DensiTree as DT

structure_data_files = os.listdir("../../structure_data")

sequence_data_files = os.listdir("../../sequence_data")

for data_file in structure_data_files:
  if not data_file.endswith(".pdb"):
    print(f"{data_file} is not a PDB file, please only upload files with the '.pdb' extension to the data folder.")

if not os.path.exists('results'):
    os.makedirs('results')

temp = dropdown1.value.strip("(recommended)").rstrip()

if dropdown2.value == "20 best features (recommended)":
  important_features = True
else:
  important_features = False

with open("results/predictions.txt", "w") as w_file:
  for data_file in sequence_data_files:
    seq = DT.Sequence(f"../../sequence_data/{data_file}", temp=temp)
    prediction, feats = seq.predict()
    print(seq.sequence.split("/")[-1], prediction)
    w_file.write(f"{seq.sequence.split('/')[-1]}, {prediction}, {temp}, sequence\n")

  for data_file in structure_data_files:
    if not data_file.endswith(".pdb"): continue
    struct = DT.Structure(f"../../structure_data/{data_file}", temp=temp, important_features=important_features)
    prediction, feats = struct.predict()
    print(struct.structure.split("/")[-1], prediction)
    w_file.write(f"{struct.structure.split('/')[-1]}, {prediction}, {temp}, structure\n")
    if checkboxes[0].value == True:
      seq = DT.Sequence(f"../../structure_data/{data_file}", temp=temp)
      prediction, feats = seq.predict()
      print(seq.sequence.split("/")[-1], prediction)
      w_file.write(f"{seq.sequence.split('/')[-1]}, {prediction}, {temp}, sequence\n")
  for sequence in sequences_text.split(","):
    sequence = sequence.strip()
    seq = DT.Sequence(sequence, temp=temp)
    prediction, feats = seq.predict()
    print(seq.sequence, prediction)
    w_file.write(f"{seq.sequence}, {prediction}, {temp}, sequence\n")




rcsb_pdb_5PTI.fasta 1.3138007663123212


  self.times[idx] = ts.time


1crm_clean.pdb 1.3099975662717478
1crm_clean.pdb 1.2945663516495904
5pti_clean.pdb 1.2988566592946427
5pti_clean.pdb 1.3138007663123212
AAAAAAAAAAAAAAA 1.3253078977550965
HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH 1.3259360923826236


  self.times[idx] = ts.time
