In [1]:
AAs = ['A', 'R', 'N', 'D', 'C', 'E', 'Q', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']
code3to1 = {'CYS': 'C', 'ASP': 'D', 'SER': 'S', 'GLN': 'Q', 'LYS': 'K',
     'ILE': 'I', 'PRO': 'P', 'THR': 'T', 'PHE': 'F', 'ASN': 'N', 
     'GLY': 'G', 'HIS': 'H', 'LEU': 'L', 'ARG': 'R', 'TRP': 'W', 
     'ALA': 'A', 'VAL':'V', 'GLU': 'E', 'TYR': 'Y', 'MET': 'M'}
FSS_alpha = ['K', 'I', 'H', 'C', 'Y']
FSS_beta = ['M', 'Q', 'N', 'F', 'A']

In [2]:
import numpy as np

In [3]:
def get_contacts(chain='A'):
  chain_contacts = {AA:[] for AA in AAs}
  for i in range(len(data['c1'])):
    if data['c1'][i] in tubulin_chains and data['c2'][i] in tubulin_chains:
      if data['c1'][i]!=data['c2'][i]:
        if chain in [data['c1'][i],data['c2'][i]]:
          if chain==data['c1'][i] and data['AA1'][i] in code3to1.keys():
            chain_contacts[code3to1[data['AA1'][i]]].append(data['pos1'][i])
          elif chain==data['c2'][i] and data['AA2'][i] in code3to1.keys():
            chain_contacts[code3to1[data['AA2'][i]]].append(data['pos2'][i])
  return chain_contacts

In [4]:
def get_data(filepath):
  f = open(filepath, "r")
  file_text = f.read()
  f.close()
  data = {'c1':[], 'AA1':[], 'pos1':[], 'c2':[], 'AA2':[], 'pos2':[]}
  for entry in file_text.split('\n'):
    entry = entry.split()
    data['c1'].append(entry[0][-1])
    data['AA1'].append(entry[1])
    data['pos1'].append(entry[2])
  
    data['c2'].append(entry[4][-1])
    data['AA2'].append(entry[5])
    data['pos2'].append(entry[6])
  return data

In [5]:
filepath = "./datasets/tubulin/3jar_contacts.txt"
data = get_data(filepath)
alpha_chains = ['E', 'J', 'C', 'L', 'A', 'K']
beta_chains = ['F', 'G', 'D', 'I', 'B', 'H']
tubulin_chains = alpha_chains+beta_chains

In [6]:
A_chain_contacts = get_contacts('A')
for aa in A_chain_contacts:
  A_chain_contacts[aa] = list(np.unique(A_chain_contacts[aa]))
B_chain_contacts = get_contacts('B')
for aa in B_chain_contacts:
  B_chain_contacts[aa] = list(np.unique(B_chain_contacts[aa]))

In [7]:
A_contact_count ={aa:0 for aa in A_chain_contacts}
for aa in A_chain_contacts:
  # print(aa, len(A_chain_contacts[aa]))
  A_contact_count[aa] = len(A_chain_contacts[aa])
print(A_contact_count)

{'A': 3, 'R': 4, 'N': 4, 'D': 1, 'C': 0, 'E': 5, 'Q': 5, 'G': 1, 'H': 3, 'I': 1, 'L': 1, 'K': 8, 'M': 1, 'F': 2, 'P': 5, 'S': 2, 'T': 6, 'W': 2, 'Y': 3, 'V': 6}


In [8]:
B_contact_count ={aa:0 for aa in B_chain_contacts}
for aa in B_chain_contacts:
  B_contact_count[aa] = len(B_chain_contacts[aa])
print(B_contact_count)

{'A': 6, 'R': 7, 'N': 5, 'D': 3, 'C': 1, 'E': 1, 'Q': 6, 'G': 1, 'H': 1, 'I': 1, 'L': 2, 'K': 6, 'M': 2, 'F': 3, 'P': 7, 'S': 2, 'T': 3, 'W': 2, 'Y': 2, 'V': 5}


In [9]:
total_fss_alpha_in_A = 0
total_fss_alpha_in_B = 0
for aa in FSS_alpha:
  print(aa,': alpha:',A_contact_count[aa], '| beta:',B_contact_count[aa])
  total_fss_alpha_in_A += A_contact_count[aa]
  total_fss_alpha_in_B += B_contact_count[aa]
print('#',': alpha:',total_fss_alpha_in_A, '| beta:',total_fss_alpha_in_B)

K : alpha: 8 | beta: 6
I : alpha: 1 | beta: 1
H : alpha: 3 | beta: 1
C : alpha: 0 | beta: 1
Y : alpha: 3 | beta: 2
# : alpha: 15 | beta: 11


In [10]:
total_fss_beta_in_A = 0
total_fss_beta_in_B = 0
for aa in FSS_beta:
  print(aa,': alpha:',A_contact_count[aa], '| beta:',B_contact_count[aa])
  total_fss_beta_in_A += A_contact_count[aa]
  total_fss_beta_in_B += B_contact_count[aa]
print('#',': alpha:',total_fss_beta_in_A, '| beta:',total_fss_beta_in_B)

M : alpha: 1 | beta: 2
Q : alpha: 5 | beta: 6
N : alpha: 4 | beta: 5
F : alpha: 2 | beta: 3
A : alpha: 3 | beta: 6
# : alpha: 15 | beta: 22


In [11]:
A_chain_contacts

{'A': ['100', '180', '403'],
 'R': ['105', '2', '214', '221'],
 'N': ['101', '249', '258', '329'],
 'D': ['76'],
 'C': [],
 'E': ['254', '284', '77', '90', '97'],
 'Q': ['11', '128', '176', '256', '85'],
 'G': ['131'],
 'H': ['283', '406', '88'],
 'I': ['332'],
 'L': ['248'],
 'K': ['280', '326', '336', '352', '394', '401', '60', '96'],
 'M': ['398'],
 'F': ['404', '87'],
 'P': ['222', '261', '263', '325', '348'],
 'S': ['178', '439'],
 'T': ['179', '223', '253', '257', '349', '73'],
 'W': ['346', '407'],
 'Y': ['210', '224', '262'],
 'V': ['177', '181', '260', '324', '353', '62']}

In [12]:
B_chain_contacts

{'A': ['285', '397', '403', '438', '56', '57'],
 'R': ['2', '253', '284', '400', '401', '48', '88'],
 'N': ['101', '249', '258', '349', '350'],
 'D': ['130', '179', '329'],
 'C': ['131'],
 'E': ['183'],
 'Q': ['11', '247', '282', '394', '85', '96'],
 'G': ['100'],
 'H': ['406'],
 'I': ['347'],
 'L': ['248', '333'],
 'K': ['105', '176', '254', '326', '352', '60'],
 'M': ['325', '398'],
 'F': ['214', '262', '404'],
 'P': ['184', '222', '245', '261', '348', '72', '89'],
 'S': ['178', '324'],
 'T': ['180', '221', '314'],
 'W': ['346', '407'],
 'Y': ['210', '283'],
 'V': ['177', '181', '257', '260', '62']}

In [13]:
filepath = "./datasets/tubulin/5n5n_contacts.txt"
data = get_data(filepath)

alpha_chains = ['G', 'H', 'I', 'J', 'K', 'L']
beta_chains = ['A', 'B', 'C', 'D', 'E', 'F']
tubulin_chains = alpha_chains+beta_chains

In [14]:
def get_contacts(chain='A'):
  chain_contacts = {AA:[] for AA in AAs}
  for i in range(len(data['c1'])):
    if data['c1'][i] in tubulin_chains and data['c2'][i] in tubulin_chains:
      if data['c1'][i]!=data['c2'][i]:
        if chain in [data['c1'][i],data['c2'][i]]:
          if chain==data['c1'][i] and data['AA1'][i] in code3to1.keys():
            chain_contacts[code3to1[data['AA1'][i]]].append(data['pos1'][i])
          elif chain==data['c2'][i] and data['AA2'][i] in code3to1.keys():
            chain_contacts[code3to1[data['AA2'][i]]].append(data['pos2'][i])
  return chain_contacts

In [15]:
A_chain_contacts = get_contacts('G')
for aa in A_chain_contacts:
  A_chain_contacts[aa] = list(np.unique(A_chain_contacts[aa]))
B_chain_contacts = get_contacts('B')
for aa in B_chain_contacts:
  B_chain_contacts[aa] = list(np.unique(B_chain_contacts[aa]))

In [16]:
A_contact_count ={aa:0 for aa in A_chain_contacts}
for aa in A_chain_contacts:
  # print(aa, len(A_chain_contacts[aa]))
  A_contact_count[aa] = len(A_chain_contacts[aa])
print(A_contact_count)

{'A': 3, 'R': 3, 'N': 4, 'D': 3, 'C': 1, 'E': 5, 'Q': 5, 'G': 1, 'H': 3, 'I': 0, 'L': 2, 'K': 4, 'M': 2, 'F': 3, 'P': 6, 'S': 1, 'T': 7, 'W': 2, 'Y': 3, 'V': 4}


In [17]:
B_contact_count ={aa:0 for aa in B_chain_contacts}
for aa in B_chain_contacts:
  B_contact_count[aa] = len(B_chain_contacts[aa])
print(B_contact_count)

{'A': 5, 'R': 6, 'N': 5, 'D': 4, 'C': 0, 'E': 3, 'Q': 6, 'G': 1, 'H': 1, 'I': 1, 'L': 3, 'K': 5, 'M': 2, 'F': 3, 'P': 6, 'S': 2, 'T': 7, 'W': 2, 'Y': 3, 'V': 6}


In [18]:
FSS_alpha = ['K', 'I', 'H', 'C', 'Y']
FSS_beta = ['M', 'Q', 'N', 'F', 'A']

In [19]:
total_fss_alpha_in_A = 0
total_fss_alpha_in_B = 0
for aa in FSS_alpha:
  print(aa,': alpha:',A_contact_count[aa], '| beta:',B_contact_count[aa])
  total_fss_alpha_in_A += A_contact_count[aa]
  total_fss_alpha_in_B += B_contact_count[aa]
print('#',': alpha:',total_fss_alpha_in_A, '| beta:',total_fss_alpha_in_B)

K : alpha: 4 | beta: 5
I : alpha: 0 | beta: 1
H : alpha: 3 | beta: 1
C : alpha: 1 | beta: 0
Y : alpha: 3 | beta: 3
# : alpha: 11 | beta: 10


In [20]:
total_fss_beta_in_A = 0
total_fss_beta_in_B = 0
for aa in FSS_beta:
  print(aa,': alpha:',A_contact_count[aa], '| beta:',B_contact_count[aa])
  total_fss_beta_in_A += A_contact_count[aa]
  total_fss_beta_in_B += B_contact_count[aa]
print('#',': alpha:',total_fss_beta_in_A, '| beta:',total_fss_beta_in_B)

M : alpha: 2 | beta: 2
Q : alpha: 5 | beta: 6
N : alpha: 4 | beta: 5
F : alpha: 3 | beta: 3
A : alpha: 3 | beta: 5
# : alpha: 17 | beta: 21


In [21]:
A_chain_contacts

{'A': ['100', '180', '247'],
 'R': ['105', '214', '221'],
 'N': ['101', '249', '258', '329'],
 'D': ['251', '76', '98'],
 'C': ['347'],
 'E': ['254', '290', '55', '71', '77'],
 'Q': ['128', '15', '176', '285', '85'],
 'G': ['350'],
 'H': ['283', '406', '88'],
 'I': [],
 'L': ['248', '397'],
 'K': ['326', '352', '401', '96'],
 'M': ['1', '398'],
 'F': ['351', '404', '87'],
 'P': ['222', '261', '263', '325', '348', '72'],
 'S': ['178'],
 'T': ['179', '223', '253', '257', '349', '56', '73'],
 'W': ['346', '407'],
 'Y': ['210', '224', '262'],
 'V': ['177', '181', '324', '62']}

In [22]:
B_chain_contacts

{'A': ['256', '285', '397', '403', '56'],
 'R': ['2', '253', '284', '401', '48', '88'],
 'N': ['101', '249', '258', '349', '350'],
 'D': ['179', '251', '329', '90'],
 'C': [],
 'E': ['127', '327', '71'],
 'Q': ['11', '247', '282', '394', '85', '96'],
 'G': ['100'],
 'H': ['406'],
 'I': ['347'],
 'L': ['248', '286', '333'],
 'K': ['254', '326', '338', '352', '60'],
 'M': ['325', '398'],
 'F': ['214', '262', '404'],
 'P': ['222', '245', '261', '263', '348', '89'],
 'S': ['178', '324'],
 'T': ['180', '220', '221', '223', '314', '353', '57'],
 'W': ['346', '407'],
 'Y': ['210', '224', '283'],
 'V': ['177', '181', '257', '260', '351', '62']}