In [1]:
import Bio.PDB
import numpy as np
import os
from matplotlib import pyplot as plt
% matplotlib inline



In [None]:
ascii_letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'

def get_atom_distance(atom1, atom2):
    diff_vector  = atom1.coord - atom2.coord
    return np.sqrt(np.sum(diff_vector * diff_vector))

def calc_residue_dist(residue_one, residue_two, mode='min'):
    assert mode in ['CA', 'min']
    if mode == 'CA':
        diff_vector  = residue_one["CA"].coord - residue_two["CA"].coord
        return np.sqrt(np.sum(diff_vector * diff_vector))
    if mode == 'min':
        distances = []
        for atom1 in residue_one.get_atom():
            for atom2 in residue_two.get_atom():
                distances.append(get_atom_distance(atom1, atom2))
        return min(distances)

def strip_non_aa_residues(residues):
    for index, residue in enumerate(residues):
        if residue.resname not in aa3:
            break
    residues = residues[:index]
    return residues

def dist_to_other_residues(residue, all_residues, distance_type='CA'):
    distances = np.empty_like(all_residues)
    for index, other_residue in enumerate(all_residues):
        distances[index] = calc_residue_dist(residue, other_residue)
    return distances

def calc_dist_matrix(chain1, chain2, stripping=True):
    """Returns a matrix of C-alpha distances between two chains"""
    chain1_residues = strip_non_aa_residues(list(chain1.get_residues()))
    chain2_residues = strip_non_aa_residues(list(chain2.get_residues()))
    answer = np.zeros((len(chain1_residues), len(chain2_residues)), np.float)
    for row, residue_one in enumerate(chain1_residues):
        for col, residue_two in enumerate(chain2_residues) :
            answer[row, col] = calc_residue_dist(residue_one, residue_two)
    return answer


def get_minimal_distances_in_a_complex(chains):
    distances = []
    chains = [chain for chain in chains if chain.id in ascii_letters]
    for chain1 in chains:
        for chain2 in chains:
            distances.append(calc_dist_matrix(chain1, chain2))
    return np.amin(np.array(distances), axis=0)

In [2]:
files_dump_folder = '/Users/karen/HIS3InterspeciesEpistasis/Analysis/Karen/files_dump/'
aa3 = "ALA CYS ASP GLU PHE GLY HIS ILE LYS LEU MET ASN PRO GLN ARG SER THR VAL TRP TYR XXX".split()

In [3]:
pdb_file = os.path.join(files_dump_folder, 'structure_predictions', 'HIS3_saccharomyces_cerevisiae__swissmodel_prediction.pdb')
structure = Bio.PDB.PDBParser().get_structure('His3_itasser', pdb_file)
model = structure[0]

In [6]:
ascii_letters_upper = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

In [None]:
# cd /Users/karen/HIS3InterspeciesEpistasis/Analysis/Karen/files_dump/structure_visualizations/
# load 4lom_assembly.pdb

for i in range(1,25):cmd.do("load HIS3_saccharomyces_cerevisiae__swissmodel_prediction__mono.pdb, swiss-%s" %i)
for i in range(1,25): cmd.do("align swiss-%s, 4lom_assembly, target_state=%s"  %( i,i))
    
# renaming chains because currently there is only chainA 
for i in range(1,25): cmd.do("alter swiss-%s, chain='%s'" %(i,ascii_letters_upper[i]))

In [5]:
merger = 'or'.join([" swiss-%s " % i for i in range(1,25)])
print merger

 swiss-1 or swiss-2 or swiss-3 or swiss-4 or swiss-5 or swiss-6 or swiss-7 or swiss-8 or swiss-9 or swiss-10 or swiss-11 or swiss-12 or swiss-13 or swiss-14 or swiss-15 or swiss-16 or swiss-17 or swiss-18 or swiss-19 or swiss-20 or swiss-21 or swiss-22 or swiss-23 or swiss-24 
