<a href="https://colab.research.google.com/github/KhondamirRustamov/AF-Multimer-cluster/blob/main/AF_Multimer_Cluster-colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#@title Installing ColabFold
%%time
!pip install -q biopython
!pip install -q MDAnalysis

import warnings
warnings.filterwarnings("ignore")
import json
import numpy as np
import matplotlib.pyplot as plt

import glob
from scipy.spatial import distance_matrix

import os
import sys
import urllib.request

import Bio
import Bio.PDB
import Bio.SeqRecord
from sklearn.metrics.pairwise import euclidean_distances

from sklearn.decomposition import PCA

import pandas as pd

import MDAnalysis as mda
from MDAnalysis.analysis import pca, align, rms
#print('installing ColabFold...')
if not os.path.isfile("COLABFOLD_READY"):
  print("installing colabfold...")
  os.system("pip install -q --no-warn-conflicts 'colabfold[alphafold-minus-jax] @ git+https://github.com/sokrypton/ColabFold'")
  if os.environ.get('TPU_NAME', False) != False:
    os.system("pip uninstall -y jax jaxlib")
    os.system("pip install --no-warn-conflicts --upgrade dm-haiku==0.0.10 'jax[cuda12_pip]'==0.3.25 -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html")
  os.system("ln -s /usr/local/lib/python3.*/dist-packages/colabfold colabfold")
  os.system("ln -s /usr/local/lib/python3.*/dist-packages/alphafold alphafold")
  os.system("touch COLABFOLD_READY")

!git clone https://github.com/HWaymentSteele/AF_Cluster
!pip install -r /content/AF_Cluster/requirements.txt
print('successful')



fatal: destination path 'AF_Cluster' already exists and is not an empty directory.
successful
CPU times: user 1.12 s, sys: 124 ms, total: 1.24 s
Wall time: 9.1 s


In [7]:
#@markdown **Input options**
#@markdown metamorphic protein sequence first
sequence = 'MEGISIYTSDNYTEEMGSGDYDSMKEPCFREENANFNKIFLPTIYSIIFLTGIVGNGLVILVMGYQKKLRSMTDKYRLHLSVADLLFVITLPFWAVDAVANWYFGNFLCKAVHVIYTVNLYSSVLILAFISLDRYLAIVHATNSQRPRKLLAEKVVYVGVWIPALLLTIPDFIFANVSEADDRYICDRFYPNDLWVVVFQFQHIMVGLILPGIVILSCYCIIISKLSHSKGHQKRKALKTTVILILAFFACWLPYYIGISIDSFILLEIIKQGCEFENTVHKWISITEALAFFHCCLNPILYAFLGAKFKTSAQHALTSVSRGSSLKILSKGKRGGHSSVSTESESSSFHSS:MNAKVVVVLVLVLTALCLSDGKPVSLSYRCPCRFFESHVARANVKHLKILNTPNCALQIVARLKNNNRQVCIDPKLKWIQEYLEKALNKRFKM' #@param {type:"string"}
name = 'test' #@param {type:"string"}
os.system(f'mkdir {name}/')
file_fasta = open(f'{name}/complex.fasta', 'w')
file_fasta.write(f'>{name}\n{sequence}\n')
file_fasta.close()
file_fasta = open(f'{name}/metamorphic.fasta', 'w')
file_fasta.write(f">{name}\n{sequence.split(':')[0]}\n")
file_fasta.close()
num_models = 5 #@param ["1", "2", "3", "4", "5"] {type:"raw"}

print('Predicting metamorphic protein using ColabFold')
#os.system(f'colabfold_batch /content/{name}/metamorphic.fasta /content/{name}/metamorphic/ --num-recycle 3 --num-models 1')

print('Predicting complex using ColabFold')
os.system(f'colabfold_batch /content/{name}/complex.fasta /content/{name}/complex/ --num-recycle 3 --num-models 1 --pair-mode unpaired')

Predicting metamorphic protein using ColabFold
Predicting complex using ColabFold


2

In [10]:
#@markdown **Cluster MSAs using AFcluster and merge with with**
# Cluster MSA of metamorphic protein using AFcluster
os.system(f"python /content/AF_Cluster/scripts/ClusterMSA.py EX -i /content/{name}/metamorphic/test.a3m -o /content/{name}/met_msas")

name = 'test'
filein = open(f'/content/{name}/complex/test.a3m').readlines()
ligand_unpaired = 0
for i in filein:
  if i=='>102\n':
    break
  else:
    ligand_unpaired+=1
filein=filein[ligand_unpaired:]
binder_seq = sequence.split(':')[1]
receptor_seq = sequence.split(':')[0]

# Merge clusters of unpaired Metamorphic protein (receptor) MSA to full-MSA of ligand
for x in glob.glob(f'/content/{name}/met_msas/*.a3m'):
    if 'EX' in x:
        file_test = open(x).readlines()
        seq_name = '>102\n'
        initial_seq = '-'*len(binder_seq)
        new_msa = []
        new_msa.append(seq_name)
        for i in file_test[1:]:
            if i[:1]!='>':
                new_msa.append(i[:-2]+initial_seq+'\n')
            else:
                new_msa.append(i)
        cluster_name=x.split('/')[-1]
        os.system(f'mkdir complex_msas/')
        file=open(f'/content/complex_msas/{cluster_name}', 'w')
        file.write(f'#{len(receptor_seq)},{len(binder_seq)}\t1,1\n')
        file.write('>101\t102\n')
        file.write(f'{receptor_seq}{binder_seq}\n')
        for i in filein[1:]:
            file.write(i)
        for i in new_msa:
            file.write(i)
        file.close()

In [None]:
#@markdown **Predict clusters with AlphaFold2-Multimer**
for i in glob.glob('complex_msas/*.a3m'):
  os.system(f'colabfold_batch {i} /content/complex_msas/pdbs/ --num-recycle 3 --num-models 1')

In [None]:
#@markdown **Plot results**
#@markdown Download the reference structures and input their names

import warnings
warnings.filterwarnings("ignore")
pdbs = []
contacts = []
names = []
plddts = []
plddts_full = []
iptms = []
ipaes = []

fold1_pdb = 'active.pdb' #@param {type:"string"}
fold2_pdb = 'inactive.pdb' #@param {type:"string"}

ref_inactive = mda.Universe(f'/content/{fold1_pdb}')
ref_inactive = ref_inactive.select_atoms('protein and name CA')
ref_active = mda.Universe(f'/content/{fold2_pdb}')
ref_active = ref_active.select_atoms('protein and name CA')

for i in glob.glob('/content/complex_msas/pdbs/*.pdb'):

    u = mda.Universe(i)
    u = u.select_atoms("chainID A and name CA")
    R_inactive = mda.analysis.rms.RMSD(u, ref_inactive,)
    R_inactive.run()

    R_active = mda.analysis.rms.RMSD(u, ref_active,)
    R_active.run()

    name = i.split('/')[-1].split('_')[1]

    data = open(f'result_folder/pdbs/EX_{name}_scores_rank_001_alphafold2_ptm_model_1_seed_000.json')
    #conf_prediction/a2a/a2a_gi/pdbs/EX_000_scores_rank_001_alphafold2_multimer_v3_model_1_seed_000.json
    #conf_prediction/a2a/a2a_msas/pdbs/EX_000_scores_rank_001_alphafold2_ptm_model_1_seed_000.json
    #conf_prediction/a2a/a2a_gi/pdbs/EX_000_predicted_aligned_error_v1.json
    data = json.load(data)
    plddts.append(np.mean(data['plddt']))
    plddts_full.append(data['plddt'])
    #iptms.append(data['iptm'])
    #ipaes.append(np.mean(np.array([np.min(np.array(data['pae'])[chain_a_hotspots[0]:chain_a_hotspots[1],i]) for i in chain_b_hotspots])))
    pdbs.append(np.array([R_inactive.rmsd[0][-1], R_active.rmsd[0][-1]]))
    names.append(i)
    contacts.append(euclidean_distances(u.atoms.positions,u.atoms.positions).reshape((1,-1))[0])
pdbs = np.array(pdbs).T
contacts.append(euclidean_distances(ref_inactive.atoms.positions,ref_inactive.atoms.positions).reshape((1,-1))[0])
contacts.append(euclidean_distances(ref_active.atoms.positions,ref_active.atoms.positions).reshape((1,-1))[0])

plt.rcParams["figure.figsize"] = (7,6)
data = pd.DataFrame({'rmsd1':pdbs[0],
                     'rmsd2':pdbs[1],
                     'plddts1':plddts,
                     'name':names})
data = data.sort_values(by='plddts1')

plt.scatter(data['rmsd1'],data['rmsd2'], c=data['plddts1'], cmap='rainbow_r', alpha=0.95, s=200, vmin=40, vmax=90)
plt.ylim(0,20)
plt.xlim(0,20)
plt.xlabel('RMSD to inactive', fontsize=14)
plt.ylabel('RMSD to active', fontsize=14)
#plt.axhline(y = 0.8, xmin = 0, xmax = 1.2)
#plt.axvline(x = 0.8, ymin = 0, ymax = 1.2)
plt.colorbar()
plt.plot([0, 20], [0,20], ls="--", c='black')
plt.savefig('results/image.png', dpi=600)