In [38]:
import numpy as np

AAs = ['A', 'R', 'N', 'D', 'C', 'E', 'Q', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']
code3to1 = {'CYS': 'C', 'ASP': 'D', 'SER': 'S', 'GLN': 'Q', 'LYS': 'K',
     'ILE': 'I', 'PRO': 'P', 'THR': 'T', 'PHE': 'F', 'ASN': 'N', 
     'GLY': 'G', 'HIS': 'H', 'LEU': 'L', 'ARG': 'R', 'TRP': 'W', 
     'ALA': 'A', 'VAL':'V', 'GLU': 'E', 'TYR': 'Y', 'MET': 'M'}

def get_contacts(chain, chains_list, data):
  """
  get the contact (residue, pos) between `chain' and each chain in `chain_list' using `data'
  `data' is dictionary containing information of interchain contact points computed using ChimeraX
  """
  chain_contacts = {AA:[] for AA in AAs}
  for i in range(len(data['c1'])):
    if data['c1'][i] in chains_list and data['c2'][i] in chains_list:
      if data['c1'][i]!=data['c2'][i]:
        if chain in [data['c1'][i],data['c2'][i]]:
          if chain==data['c1'][i] and data['AA1'][i] in code3to1.keys():
            chain_contacts[code3to1[data['AA1'][i]]].append(data['pos1'][i])
          elif chain==data['c2'][i] and data['AA2'][i] in code3to1.keys():
            chain_contacts[code3to1[data['AA2'][i]]].append(data['pos2'][i])
  return chain_contacts

def get_data(filepath):
  """
  Create a dictionary of chain ID, amino-acid type, and position 
  for each pair of inter-chain contacts computed using ChimeraX
  """
  f = open(filepath, "r")
  file_text = f.read()
  f.close()
  data = {'c1':[], 'AA1':[], 'pos1':[], 'c2':[], 'AA2':[], 'pos2':[]}
  for entry in file_text.split('\n'):
    entry = entry.split()
    data['c1'].append(entry[0][-1])
    data['AA1'].append(entry[1])
    data['pos1'].append(entry[2])
  
    data['c2'].append(entry[4][-1])
    data['AA2'].append(entry[5])
    data['pos2'].append(entry[6])
  return data

def get_AA_contact_count(chain, chain_list, data):
  chain_contacts = get_contacts(chain, chain_list, data)
  for aa in chain_contacts: # remove duplicates
    chain_contacts[aa] = list(np.unique(chain_contacts[aa]))
  aa_contact_count ={aa:0 for aa in chain_contacts}
  for aa in chain_contacts:
    aa_contact_count[aa] = len(chain_contacts[aa])
  return aa_contact_count

In [39]:
def print_fss_counts(p1, p1_chain, FSS_p1, p2, p2_chain, FSS_p2, all_chains, data):
  p1_contacts = get_AA_contact_count(p1_chain, all_chains, data)
  p2_contacts = get_AA_contact_count(p2_chain, all_chains, data)
  print('FSS(',p1, ') count in', p1, 'and', p2)
  total_fss_p1_in_p1 = 0
  total_fss_p1_in_p2 = 0
  for aa in FSS_p1:
    print(aa,':',p1, ':',p1_contacts[aa], '\t|', p2,':',p2_contacts[aa])
    total_fss_p1_in_p1 += p1_contacts[aa]
    total_fss_p1_in_p2 += p2_contacts[aa]
  print('Total\n#',':',p1, ':',total_fss_p1_in_p1, '\t|', p2,':',total_fss_p1_in_p2)
  
  print('_'*50)
  print('FSS(',p2, ') count in', p1, 'and', p2)
  total_fss_p2_in_p1 = 0
  total_fss_p2_in_p2 = 0
  for aa in FSS_p2:
    print(aa,':',p1, ':',p1_contacts[aa], '\t|', p2,':',p2_contacts[aa])
    total_fss_p2_in_p1 += p1_contacts[aa]
    total_fss_p2_in_p2 += p2_contacts[aa]
  print('Total\n#',':',p1, ':',total_fss_p2_in_p1, '\t|', p2,':',total_fss_p2_in_p2)

# Histone H2A and H2B

In [40]:
FSS_h2a = ['L', 'G', 'N']
FSS_h2b = ['S', 'M', 'K', 'T', 'Y', 'F']

In [41]:
filepath = "./datasets/histone/1aoi_contacts"
data = get_data(filepath)
h2a_chains = ['C', 'G']
h2b_chains = ['D', 'H']
other_chains = ['A', 'E', 'B', 'F', 'I', 'J']
p1 = 'h2a'
p2 = 'h2b'
FSS_p1 = FSS_h2a
FSS_p2 = FSS_h2b
p1_chain = 'C'
p2_chain = 'D'
all_chains = h2a_chains+h2b_chains+other_chains

FileNotFoundError: [Errno 2] No such file or directory: './datasets/histone/1aoi_contacts'

In [None]:
print_fss_counts(p1, p1_chain, FSS_p1, p2, p2_chain, FSS_p2, all_chains, data)

In [None]:
filepath = "./datasets/histone/3kwq_contacts"
data = get_data(filepath)
h2a_chains = ['C', 'G']
h2b_chains = ['D', 'H']
other_chains = ['A', 'E', 'B', 'F', 'I', 'J']
p1 = 'h2a'
p2 = 'h2b'
FSS_p1 = FSS_h2a
FSS_p2 = FSS_h2b
p1_chain = 'C'
p2_chain = 'D'
all_chains = h2a_chains+h2b_chains+other_chains
print_fss_counts(p1, p1_chain, FSS_p1, p2, p2_chain, FSS_p2, all_chains, data)

# Tubulin-$\alpha$ vs Tubulin-$\beta$

In [None]:
FSS_alpha = ['K', 'I', 'H', 'C', 'Y']
FSS_beta = ['M', 'Q', 'N', 'F', 'A']

In [None]:
filepath = "../datasets/tubulin/3jar_contacts"
data = get_data(filepath)
alpha_chains = ['E', 'J', 'C', 'L', 'A', 'K']
beta_chains = ['F', 'G', 'D', 'I', 'B', 'H']
all_chains = alpha_chains+beta_chains
p1 = 'alpha'
p2 = 'beta'
FSS_p1 = FSS_alpha
FSS_p2 = FSS_beta
p1_chain = 'A'
p2_chain = 'B'
print_fss_counts(p1, p1_chain, FSS_p1, p2, p2_chain, FSS_p2, all_chains, data)

In [None]:
filepath = "../datasets/tubulin/5n5n_contacts"
data = get_data(filepath)
alpha_chains = ['G', 'H', 'I', 'J', 'K', 'L']
beta_chains = ['A', 'B', 'C', 'D', 'E', 'F']
all_chains = alpha_chains+beta_chains
p1 = 'alpha'
p2 = 'beta'
FSS_p1 = FSS_alpha
FSS_p2 = FSS_beta
p1_chain = 'G'
p2_chain = 'B'
print_fss_counts(p1, p1_chain, FSS_p1, p2, p2_chain, FSS_p2, all_chains, data)