<a href="https://colab.research.google.com/github/Hoopsforever/AlphaFold-capabilities-with-nanobody-antigen-complex-prediction/blob/main/Special_RMSD_calculator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#@title Install conda
%%bash
MINICONDA_INSTALLER_SCRIPT=Miniconda3-py310_23.3.1-0-Linux-x86_64.sh
MINICONDA_PREFIX=/usr/local
wget https://repo.continuum.io/miniconda/$MINICONDA_INSTALLER_SCRIPT
chmod +x $MINICONDA_INSTALLER_SCRIPT
./$MINICONDA_INSTALLER_SCRIPT -b -f -p $MINICONDA_PREFIX

conda install --channel defaults conda python=3.10 --yes
conda update --channel defaults --all --yes

In [None]:
#@title Import libraries
import sys
_ = (sys.path
     .append("/usr/local/lib/python3.10/site-packages"))

!conda install -c bioconda anarci
import anarci
import Bio
from Bio.PDB import Superimposer, PDBParser
from Bio.PDB.PDBIO import PDBIO
from Bio import pairwise2
from Bio.pairwise2 import format_alignment
import pandas as pd
from Bio.SeqUtils import seq1
import math
import os

In [None]:
#@title define functions
def clean(pdb_fnm, target_chain):
  io = PDBIO()
  parser = PDBParser()
  structure = parser.get_structure("pdb", pdb_fnm)
  #Isolate NB chain
  del_chain_ids = []
  for model in structure:
    for chain in model:
      if chain.id != target_chain:
        del_chain_ids.append(chain.id)

    for id in del_chain_ids:
      model.detach_child(id)

  #Get rid of hydrogen atoms
  del_hydro = []
  for model in structure:
      for chain in model:
        for residue in chain.get_residues():
          for atom in residue.get_atoms():
            if atom.element == "H":
              del_hydro.append([chain.id, residue.id, atom.id])

  for H in del_hydro:
    model[H[0]][H[1]].detach_child(H[2])

  #Get rid of OTX atoms
  del_OXT = []
  for model in structure:
      for chain in model:
        for residue in chain.get_residues():
          for atom in residue.get_atoms():
            if atom.element == "OXT":
              del_OXT.append([chain.id, residue.id, atom.id])

  for T in del_OXT:
    model[T[0]][T[1]].detach_child(T[2])

  #Get rid of heteroatoms
  del_het = []
  for model in structure:
    for chain in model:
      for residue in chain.get_residues():
        if residue.get_id()[0] != " ":
          del_het.append(residue.get_id())

    for hetero in del_het:
        chain.detach_child(hetero)

  io.set_structure(structure)
  return structure

def normalize(pdb1_fnm, chain_1, pdb2_fnm, chain_2, out_dir):
  io = PDBIO()
  parser = PDBParser()
  structure1 = parser.get_structure("pdb1", pdb1_fnm)
  structure2 = parser.get_structure("pdb2", pdb2_fnm)

  pdb1 = pdb1_fnm[-10:-6]
  pdb2 = pdb2_fnm[-10:-6]

  alignments = pairwise2.align.globalms(nanobody_sequences(pdb1_fnm, chain_1),nanobody_sequences2(pdb2_fnm, chain_2), 2, -1, -.5, -.1) # find alignments

  aligned_sequence1 = alignments[0][0]
  aligned_sequence2 = alignments[0][1]

  # Find positions where the sequences differ
  differences = [i for i, (a, b) in enumerate(zip(aligned_sequence1, aligned_sequence2)) if a != b]
  # go through the entire 'differences' list; for all the positions with a dash in a particular sequence, that index will correspond
  # to a number in the 'differences' list; delete the residue ids from that sequence that correspond to the 'differences' number

  del_count1 = []
  del_count2 = []
  for items in differences:
    if aligned_sequence2[items] == "-":
      del_count1.append(items)
    elif aligned_sequence1[items] == "-":
      del_count2.append(items)

  del_res1 = []
  counter = 0
  for model in structure1:
    for chain in model:
      for residue in chain:
        while  counter<len(aligned_sequence1) and aligned_sequence1[counter] == '-':
            counter += 1
        if counter in del_count1:
          del_res1.append(residue.get_id())
        counter += 1

  for res in del_res1:
    structure1[0][nb1].detach_child(res)

  del_res2 = []
  counter = 0
  for model in structure2:
      for chain in model:
          for residue in chain:
            while counter<len(aligned_sequence2) and aligned_sequence2[counter] == '-':
              counter += 1
            if counter in del_count2:
              del_res2.append(residue.get_id())
            counter += 1

  for res in del_res2:
    structure2[0][nb2].detach_child(res)

  io.set_structure(structure1)
  io.save(out_dir + f'/{pdb1}_{chain_1}_nomr.pdb', preserve_atom_numbering = True)

  io.set_structure(structure2)
  io.save(out_dir + f'/{pdb2}_{chain_2}_nomr.pdb', preserve_atom_numbering = True)

def normalize_struct(struct1, pdb1, chain1, struct2, pdb2, chain2, out_dir1, out_dir2):
  io = PDBIO()
  alignments = pairwise2.align.globalms(struct_seq(struct1, chain1), struct_seq(struct2, chain2), 2, -1, -.5, -.1) # find alignments

  aligned_sequence1 = alignments[0][0]
  aligned_sequence2 = alignments[0][1]

  # Find positions where the sequences differ
  differences = [i for i, (a, b) in enumerate(zip(aligned_sequence1, aligned_sequence2)) if a != b]
  # go through the entire 'differences' list; for all the positions with a dash in a particular sequence, that index will correspond
  # to a number in the 'differences' list; delete the residue ids from that sequence that correspond to the 'differences' number

  del_count1 = []
  del_count2 = []
  for items in differences:
    if aligned_sequence2[items] == "-":
      del_count1.append(items)
    elif aligned_sequence1[items] == "-":
      del_count2.append(items)

  del_res1 = []
  counter = 0
  for model in struct1:
    for chain in model:
      for residue in chain:
        while  counter<len(aligned_sequence1) and aligned_sequence1[counter] == '-':
            counter += 1
        if counter in del_count1:
          del_res1.append(residue.get_id())
        counter += 1

  for res in del_res1:
    struct1[0][chain1].detach_child(res)

  del_res2 = []
  counter = 0
  for model in struct2:
      for chain in model:
          for residue in chain:
            while counter<len(aligned_sequence2) and aligned_sequence2[counter] == '-':
              counter += 1
            if counter in del_count2:
              del_res2.append(residue.get_id())
            counter += 1

  for res in del_res2:
    struct2[0][chain2].detach_child(res)

  #renumber PDBs from 1
  for model in struct1:
        for chain in model:
            counter = 1
            for residue in chain:
                if 'resseq=' + str(counter) + ' icode= ' in str(list(chain.get_residues())):
                    chain[counter].id = (' ', counter + 1000, ' ')
                    residue.id = (' ', counter, ' ')
                    counter += 1
                else:
                    residue.id = (' ', counter, ' ')
                    counter += 1

  for model in struct2:
        for chain in model:
            counter = 1
            for residue in chain:
                if 'resseq=' + str(counter) + ' icode= ' in str(list(chain.get_residues())):
                    chain[counter].id = (' ', counter + 1000, ' ')
                    residue.id = (' ', counter, ' ')
                    counter += 1
                else:
                    residue.id = (' ', counter, ' ')
                    counter += 1

  #save normalized PDBs
  io.set_structure(struct1)
  io.save(out_dir1 + f'/{pdb1}_{chain1}_nomr.pdb', preserve_atom_numbering = True)

  io.set_structure(struct2)
  io.save(out_dir2 + f'/{pdb2}_{chain2}_nomr.pdb', preserve_atom_numbering = True)

def nanobody_sequence(pdb_fnm, nb_chain):
  parser = PDBParser()
  structure = parser.get_structure("pdb", pdb_fnm)
  AA_seq = '' #holds the string of one letter amino acid codes for the nanobody from the prediction file
  chains = ''
  for model in structure.get_models():
    for chain in model.get_chains():
      if chain.get_id() == nb_chain:
        for residue in chain.get_residues(): # access residues
            residue_name = seq1(residue.get_resname())
            if residue_name == "X":
              continue
            AA_seq+=residue_name
  return AA_seq

def struct_seq(struct, nb_chain):
  AA_seq = ''
  chains = ''
  for model in struct.get_models():
    for chain in model.get_chains():
      if chain.get_id() == nb_chain:
        for residue in chain.get_residues(): # access residues
            residue_name = seq1(residue.get_resname())
            if residue_name == "X":
              continue
            AA_seq+=residue_name
  return AA_seq

def anarci_df(sequence):
  query = anarci.run_anarci(sequence)[1][0][0]
  start = query[1]
  data = query[0]

  imgt = []
  for i in range(0, len(data)):
    new = {}
    new['res'] = data[i][-1]
    new['num'] = (str(data[i][0][0]) + data[i][0][1]).replace(' ', '')
    imgt.append(new)
  DF = pd.DataFrame(imgt)
  DF = DF[DF['res'] != '-']
  DF = DF.reset_index(drop = True)
  return DF, start

def superimpose(pdb_file1, pdb_file2):
  # Create a PDB parser
  parser = PDBParser()

  # Get the structures
  structure1 = parser.get_structure("Protein 1", pdb_file1)
  structure2 = parser.get_structure("Protein 2", pdb_file2)

  # Get the first model in the structures
  model1 = structure1[0]
  model2 = structure2[0]

  # Choose the first chain in the model
  chain1 = model1.get_list()[0]
  chain2 = model2.get_list()[0]

  # Get the list of atoms for the full chains
  full_atoms1 = [atom for atom in chain1.get_atoms()]
  full_atoms2 = [atom for atom in chain2.get_atoms()]


  # Superimpose structure2 on structure1
  super_imposer = Superimposer()
  super_imposer.set_atoms(full_atoms1, full_atoms2)
  super_imposer.apply(chain2.get_atoms())

  #set new structure for superimposed structure 2
  io = Bio.PDB.PDBIO()
  io.set_structure(structure2)

  return structure2

def superimpose_backbone(pdb_file1, pdb_file2):
  # Create a PDB parser
  parser = PDBParser()

  # Get the structures
  structure1 = parser.get_structure("Protein 1", pdb_file1)
  structure2 = parser.get_structure("Protein 2", pdb_file2)

  # Get the first model in the structures
  model1 = structure1[0]
  model2 = structure2[0]

  # Choose the first chain in the model
  chain1 = model1.get_list()[0]
  chain2 = model2.get_list()[0]

  # Get lists of all C N O CA atoms
  full_atoms1 = [atom for atom in chain1.get_atoms() if atom.id in ['C', 'N', 'O', 'CA']]
  full_atoms2 = [atom for atom in chain2.get_atoms() if atom.id in ['C', 'N', 'O', 'CA']]


  # Superimpose structure2 on structure1
  super_imposer = Superimposer()
  super_imposer.set_atoms(full_atoms1, full_atoms2)
  super_imposer.apply(full_atoms2)

  #set new structure for superimposed structure 2
  io = Bio.PDB.PDBIO()
  io.set_structure(structure2)

  return structure2

In [None]:
summary1 = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Rishi_project_files/Compare_Conformations/Reference_chart.txt",header=None)
summary2 = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Rishi_project_files/Compare_Conformations/Reference_chart_prediction_w_model.txt",header = None)

dir1 = sorted(os.listdir('/content/drive/MyDrive/Colab Notebooks/Rishi_project_files/Compare_Contacts/Final/without_target'))
dir2 = sorted(os.listdir('/content/drive/MyDrive/Colab Notebooks/Rishi_project_files/Compare_Contacts/Final/with_target'))

pdb1_fnm = f'/content/drive/MyDrive/Colab Notebooks/Rishi_project_files/Compare_Contacts/Final/without_target/{dir1[2]}'
pdb2_fnm = f'/content/drive/MyDrive/Colab Notebooks/Rishi_project_files/Compare_Contacts/Final/with_target/{dir2[2]}'

nb_notar = dir1[2][5]
nb_orig = dir2[2][5]

seq_AA = nanobody_sequence(pdb1_fnm, nb_notar)
imgt, start = anarci_df(seq_AA)

parser = PDBParser()
structure1 = parser.get_structure('Protein 1', pdb1_fnm)
structure2 = superimpose(pdb1_fnm, pdb2_fnm)

model1 = structure1[0]
model2 = structure2[0]

chain1 = model1.get_list()[0]
chain2 = model2.get_list()[0]

In [None]:
list(chain1.get_atoms())

In [None]:
#@title normalize pdbs
dir1 = '/content/drive/MyDrive/Colab Notebooks/Rishi_project_files/Compare_Conformations/models_target'
dir2 = '/content/drive/MyDrive/Colab Notebooks/Rishi_project_files/Compare_Conformations/models_notarget'
out_dir1 = '/content/drive/MyDrive/Colab Notebooks/Rishi_project_files/Compare_Conformations/normalized_pdbs/Normalized_target' # file where you want to save to
out_dir2 = '/content/drive/MyDrive/Colab Notebooks/Rishi_project_files/Compare_Conformations/normalized_pdbs/Normalized_notarget'

files1 = sorted(os.listdir(dir1))
files2 = sorted(os.listdir(dir2))

for i in range(0, len(files1)):
  pdb1 = files1[i][0:4]
  pdb2 = files2[i][0:4]
  chain1 = files1[i][5]
  chain2 = files2[i][5]

  clean1 = clean(dir1+ '/' + files1[i], chain1)
  clean2 = clean(dir2 + '/' + files2[i], chain2)

  normalize_struct(clean1, pdb1, chain1, clean2, pdb2, chain2, out_dir1, out_dir2)

In [None]:
str(list(chain.get_residues()))

In [None]:
pdb1_fnm = dir1 + f'/{files1[2]}'
pdb2_fnm = dir2 + f'/{files2[2]}'
chain1 = files1[2][5]
chain2 = files2[2][5]

#nb_notar = summary1.iloc[i,0][5]
#nb_orig = summary2.iloc[i,0][5]

seq_AA = nanobody_sequence(pdb1_fnm, chain1)
imgt, start = anarci_df(seq_AA)

parser = PDBParser()
structure1 = parser.get_structure('Protein 1', pdb1_fnm)
structure2 = superimpose_backbone(pdb1_fnm, pdb2_fnm)

model1 = structure1[0]
model2 = structure2[0]

chain1 = model1.get_list()[0]
chain2 = model2.get_list()[0]

#start += chain1.get_list()[0].get_id()[1] - 1

#loop indexes
CDR1_start = imgt['num'][imgt['num'] == '27'].index[0]+start
CDR1_end = imgt['num'][imgt['num'] == '39'].index[0]+start

CDR2_start = imgt['num'][imgt['num'] == '56'].index[0]+start
CDR2_end = imgt['num'][imgt['num'] == '67'].index[0]+start

CDR3_start = imgt['num'][imgt['num'] == '105'].index[0]+start
CDR3_end = imgt['num'][imgt['num'] == '117'].index[0]+start

In [None]:
#@title run rmsd
#summary1 = pd.read_csv("/content/drive/MyDrive/Rishi_project_files/Compare_Conformations/Reference_chart.txt",header=None)
#summary2 = pd.read_csv("/content/drive/MyDrive/Rishi_project_files/Compare_Conformations/Reference_chart_prediction_w_model.txt",header = None)

dir1 = '/content/drive/MyDrive/Colab Notebooks/Rishi_project_files/Compare_Conformations/control_nbs/normalized_with_target'
dir2 = '/content/drive/MyDrive/Colab Notebooks/Rishi_project_files/Compare_Conformations/control_nbs/normalized_without_target'
files1 = sorted(os.listdir('/content/drive/MyDrive/Colab Notebooks/Rishi_project_files/Compare_Conformations/control_nbs/normalized_with_target'))
files2 = sorted(os.listdir('/content/drive/MyDrive/Colab Notebooks/Rishi_project_files/Compare_Conformations/control_nbs/normalized_without_target'))

RMSDs = []
for i in range(0, len(files1)):

  pdb1_fnm = dir1 + f'/{files1[i]}'
  pdb2_fnm = dir2 + f'/{files2[i]}'
  chain1 = files1[i][5]
  chain2 = files2[i][5]

  #nb_notar = summary1.iloc[i,0][5]
  #nb_orig = summary2.iloc[i,0][5]

  seq_AA = nanobody_sequence(pdb1_fnm, chain1)
  imgt, start = anarci_df(seq_AA)

  parser = PDBParser()
  structure1 = parser.get_structure('Protein 1', pdb1_fnm)
  structure2 = superimpose_backbone(pdb1_fnm, pdb2_fnm)

  model1 = structure1[0]
  model2 = structure2[0]

  chain1 = model1.get_list()[0]
  chain2 = model2.get_list()[0]

  #start += chain1.get_list()[0].get_id()[1] - 1

  #loop indexes
  CDR1_start = imgt['num'][imgt['num'] == '27'].index[0]+start
  CDR1_end = imgt['num'][imgt['num'] == '39'].index[0]+start

  CDR2_start = imgt['num'][imgt['num'] == '56'].index[0]+start
  CDR2_end = imgt['num'][imgt['num'] == '67'].index[0]+start

  CDR3_start = imgt['num'][imgt['num'] == '105'].index[0]+start
  CDR3_end = imgt['num'][imgt['num'] == '117'].index[0]+start

  s1_fr1_atoms = [atom for atom in chain1.get_atoms() if start <= atom.get_parent().get_id()[1] < CDR1_start and atom.id in ['C', 'N', 'O', 'CA']]
  s1_cdr1_atoms = [atom for atom in chain1.get_atoms() if CDR1_start <= atom.get_parent().get_id()[1] < CDR1_end and atom.id in ['C', 'N', 'O', 'CA']]
  s1_fr2_atoms = [atom for atom in chain1.get_atoms() if CDR1_end <= atom.get_parent().get_id()[1] < CDR2_start and atom.id in ['C', 'N', 'O', 'CA']]
  s1_cdr2_atoms = [atom for atom in chain1.get_atoms() if CDR2_start <= atom.get_parent().get_id()[1] < CDR2_end and atom.id in ['C', 'N', 'O', 'CA']]
  s1_fr3_atoms = [atom for atom in chain1.get_atoms() if CDR2_end <= atom.get_parent().get_id()[1] < CDR3_start and atom.id in ['C', 'N', 'O', 'CA']]
  s1_cdr3_atoms = [atom for atom in chain1.get_atoms() if CDR3_start <= atom.get_parent().get_id()[1] < CDR3_end and atom.id in ['C', 'N', 'O', 'CA']]
  s1_fr4_atoms = [atom for atom in chain1.get_atoms() if CDR3_end <= atom.get_parent().get_id()[1] < len(imgt)+start and atom.id in ['C', 'N', 'O', 'CA']]

  s2_fr1_atoms = [atom for atom in chain2.get_atoms() if start <= atom.get_parent().get_id()[1] < CDR1_start and atom.id in ['C', 'N', 'O', 'CA']]
  s2_cdr1_atoms = [atom for atom in chain2.get_atoms() if CDR1_start <= atom.get_parent().get_id()[1] < CDR1_end and atom.id in ['C', 'N', 'O', 'CA']]
  s2_fr2_atoms = [atom for atom in chain2.get_atoms() if CDR1_end <= atom.get_parent().get_id()[1] < CDR2_start and atom.id in ['C', 'N', 'O', 'CA']]
  s2_cdr2_atoms = [atom for atom in chain2.get_atoms() if CDR2_start <= atom.get_parent().get_id()[1] < CDR2_end and atom.id in ['C', 'N', 'O', 'CA']]
  s2_fr3_atoms = [atom for atom in chain2.get_atoms() if CDR2_end <= atom.get_parent().get_id()[1] < CDR3_start and atom.id in ['C', 'N', 'O', 'CA']]
  s2_cdr3_atoms = [atom for atom in chain2.get_atoms() if CDR3_start <= atom.get_parent().get_id()[1] < CDR3_end and atom.id in ['C', 'N', 'O', 'CA']]
  s2_fr4_atoms = [atom for atom in chain2.get_atoms() if CDR3_end <= atom.get_parent().get_id()[1] < len(imgt)+start and atom.id in ['C', 'N', 'O', 'CA']]

  len_fr1 = len(s1_fr1_atoms)
  len_cdr1 = len(s1_cdr1_atoms)
  len_fr2 = len(s1_fr2_atoms)
  len_cdr2 = len(s1_cdr2_atoms)
  len_fr3 = len(s1_fr3_atoms)
  len_cdr3 = len(s1_cdr3_atoms)
  len_fr4 = len(s1_fr4_atoms)
  len_frs = len_fr1 + len_fr2 + len_fr3 + len_fr4
  len_cdrs = len_cdr1 + len_cdr2 + len_cdr3
  len_tot = len_frs + len_cdrs

  sum = 0
  for j in range(0,len_fr1):
    sum += (math.dist(s1_fr1_atoms[j].get_coord(), s2_fr1_atoms[j].get_coord()))**2
  rmsd_fr1 = math.sqrt(sum/len_fr1)

  sum = 0
  for j in range(0,len_cdr1):
    sum += (math.dist(s1_cdr1_atoms[j].get_coord(), s2_cdr1_atoms[j].get_coord()))**2
  rmsd_cdr1 = math.sqrt(sum/len_cdr1)

  sum = 0
  for j in range(0,len_fr2):
    sum += (math.dist(s1_fr2_atoms[j].get_coord(), s2_fr2_atoms[j].get_coord()))**2
  rmsd_fr2 = math.sqrt(sum/len_fr2)

  sum = 0
  for j in range(0,len_cdr2):
    sum += (math.dist(s1_cdr2_atoms[j].get_coord(), s2_cdr2_atoms[j].get_coord()))**2
  rmsd_cdr2 = math.sqrt(sum/len_cdr2)

  sum = 0
  for j in range(0,len_fr3):
    sum += (math.dist(s1_fr3_atoms[j].get_coord(), s2_fr3_atoms[j].get_coord()))**2
  rmsd_fr3 = math.sqrt(sum/len_fr3)

  sum = 0
  for j in range(0,len_cdr3):
    sum += (math.dist(s1_cdr3_atoms[j].get_coord(), s2_cdr3_atoms[j].get_coord()))**2
  rmsd_cdr3 = math.sqrt(sum/len_cdr3)

  sum = 0
  for j in range(0,len_fr4):
    sum += (math.dist(s1_fr4_atoms[j].get_coord(), s2_fr4_atoms[j].get_coord()))**2
  rmsd_fr4 = math.sqrt(sum/len_fr4)

  fr_avg = math.sqrt((((rmsd_fr1**2)*len_fr1) + ((rmsd_fr2**2)*len_fr2) + ((rmsd_fr3**2)*len_fr3) + ((rmsd_fr4**2)*len_fr4))/len_frs)
  cdr_avg = math.sqrt((((rmsd_cdr1**2)*len_cdr1) + ((rmsd_cdr2**2)*len_cdr2) + ((rmsd_cdr3**2)*len_cdr3))/len_cdrs)
  tot_avg = math.sqrt((((rmsd_fr1**2)*len_fr1) + ((rmsd_fr2**2)*len_fr2) + ((rmsd_fr3**2)*len_fr3) + ((rmsd_fr4**2)*len_fr4) + ((rmsd_cdr1**2)*len_cdr1) + ((rmsd_cdr2**2)*len_cdr2) + ((rmsd_cdr3**2)*len_cdr3))/len_tot)

  #RMSDs.append([dir1[i], f'FR1:{rmsd_fr1:.3f}', f'CDR1:{rmsd_cdr1:.3f}', f'FR2:{rmsd_fr2:.3f}', f'CDR2:{rmsd_cdr2:.3f}', f'FR3:{rmsd_fr3:.3f}', f'CDR3:{rmsd_cdr3:.3f}', f'FR4:{rmsd_fr4:.3f}', f'FR_avg:{fr_avg:.3f}', f'CDR_avg:{cdr_avg:.3f}', f'TOT_RMSD:{tot_avg:.3f}'])
  RMSDs.append([files1[i], f'FR1:{rmsd_fr1:.3f}', f'CDR1:{rmsd_cdr1:.3f}', f'FR2:{rmsd_fr2:.3f}', f'CDR2:{rmsd_cdr2:.3f}', f'FR3:{rmsd_fr3:.3f}', f'CDR3:{rmsd_cdr3:.3f}', f'FR4:{rmsd_fr4:.3f}', f'FR_avg:{fr_avg:.3f}', f'CDR_avg:{cdr_avg:.3f}', f'TOT_RMSD:{tot_avg:.3f}'])

In [None]:
RMSDs

In [None]:
#@title test individual full rmsd values
pdb1_fnm = f'/content/drive/MyDrive/Rishi_project_files/Compare_Conformations/normalized_pdbs/Normalized_notarget/{dir1[0]}'
pdb2_fnm = f'/content/drive/MyDrive/Rishi_project_files/Compare_Conformations/normalized_pdbs/Normalized_target/{dir2[0]}'

parser = PDBParser()

# Get the structures
structure1 = parser.get_structure("Protein 1", pdb1_fnm)
structure2 = parser.get_structure("Protein 2", pdb2_fnm)

# Get the first model in the structures
model1 = structure1[0]
model2 = structure2[0]

# Choose the first chain in the model
chain1 = model1.get_list()[0]
chain2 = model2.get_list()[0]

# Get the list of atoms for the full chains
full_atoms1 = [atom for atom in chain1.get_atoms()]
full_atoms2 = [atom for atom in chain2.get_atoms()]

# Get the list of atoms for the specific region


# Superimpose structure2 on structure1
super_imposer = Superimposer()
super_imposer.set_atoms(full_atoms1, full_atoms2)
super_imposer.apply(chain2.get_atoms())

super_imposer.rms