In [1]:
import MDAnalysis as mda
import numpy as np
import pandas as pd
import sys
#sys.path.append('/Users/dburns/Library/CloudStorage/Box-Box/my_scripts')
from ChACRA.ContactAnalysis.ContactFrequencies import *
import os
from ChACRA.ContactAnalysis.contact_functions import _parse_id

In [3]:
#u = mda.Universe('/Users/dburns/Library/CloudStorage/Box-Box/1_GROUP_data/Enzymes/atcase/from_pronto/1q95/1q95_minimized_nohoh.pdb')
u = mda.Universe('../../../from_box/atcase/1q95_minimized_nohoh.pdb')
protein = u.select_atoms('protein')
u = mda.Merge(protein)
universe = u.copy()
residues = {seg.segid: seg.residues.resnames for seg in universe.segments}
segids = list(residues.keys())
# make a square matrix that will be filled with True values for identical subunits
array = np.zeros((len(segids),len(segids)),dtype=np.bool_)
# every subunit is identical with itself
np.fill_diagonal(array,True)
# work with it as a df
identical_table = pd.DataFrame(array, columns=segids, index=segids)

#contact_dir = '/Users/dburns/Library/CloudStorage/Box-Box/1_GROUP_data/Enzymes/atcase/from_nova/contacts/freqs'
contact_dir = '../../../from_box/atcase/freqs'
contact_files = [f'{contact_dir}/{file}' for file in os.listdir(contact_dir)]
contact_files.sort()
cont = make_contact_frequency_dictionary(contact_files)
cont = ContactFrequencies(pd.DataFrame(cont))

In [4]:
identical_subunits = find_identical_subunits(u)

In [5]:
sorted_distances = get_chain_distances(identical_subunits, u)

In [7]:
sorted_distances

{0: {('A', 'C'): 1.7065504023539435,
  ('A', 'B'): 1.7316963647164045,
  ('A', 'E'): 2.276290121014368,
  ('A', 'D'): 14.695149666859432,
  ('A', 'F'): 23.72274977485476},
 1: {('G', 'J'): 1.7411957335158068,
  ('G', 'I'): 27.48132477068823,
  ('G', 'H'): 28.549628162117425,
  ('G', 'K'): 30.40205853744378,
  ('G', 'L'): 44.52822064542099},
 (0, 1): {('A', 'G'): 1.6783015165014643,
  ('A', 'H'): 2.3374572559516813,
  ('A', 'K'): 10.352570156109394,
  ('A', 'I'): 11.368680308402194,
  ('A', 'J'): 19.089641017069628,
  ('A', 'L'): 27.828149816818694}}

In [6]:
sorted_all_chain_dists =  get_all_chain_dists(u)

In [9]:
sorted_all_chain_dists

{'F': {'L': 1.6868845940431783,
  'E': 1.6997656953686828,
  'D': 1.7251738858528087,
  'B': 1.8194105875476971,
  'K': 2.539416945533425,
  'H': 11.278661873099622,
  'J': 12.173696860953113,
  'C': 14.49427711966249,
  'I': 16.762685420723876,
  'A': 23.72274977485476,
  'G': 32.02890115513436},
 'B': {'C': 1.6813929744436025,
  'H': 1.6994563762386845,
  'A': 1.7316963647164045,
  'F': 1.8194105875476971,
  'I': 2.405174661796874,
  'L': 10.596713522639728,
  'G': 11.396500598235063,
  'E': 12.988872906635818,
  'K': 16.547568071963777,
  'D': 24.546360514064247,
  'J': 30.74457695856177},
 'A': {'G': 1.6783015165014643,
  'C': 1.7065504023539435,
  'B': 1.7316963647164045,
  'E': 2.276290121014368,
  'H': 2.3374572559516813,
  'K': 10.352570156109394,
  'I': 11.368680308402194,
  'D': 14.695149666859432,
  'J': 19.089641017069628,
  'F': 23.72274977485476,
  'L': 27.828149816818694},
 'I': {'C': 1.649130675274525,
  'L': 1.8766633250333378,
  'B': 2.405174661796874,
  'D': 10.76695

# calculate the distances between each chain and all others

### Use contact df to determine which chains actually make contact

In [7]:

df = cont.freqs
partner_chains = get_contacting_chains(df)

In [19]:
partner_chains

{'B': {'A', 'B', 'C', 'E', 'F', 'H', 'I'},
 'C': {'A', 'B', 'C', 'D', 'F', 'G', 'I', 'J'},
 'D': {'C', 'D', 'E', 'F', 'I', 'J', 'L'},
 'E': {'A', 'B', 'D', 'E', 'F', 'J', 'K'},
 'A': {'A', 'B', 'C', 'E', 'G', 'H'},
 'L': {'D', 'F', 'I', 'L'},
 'J': {'C', 'D', 'E', 'G', 'J'},
 'I': {'B', 'C', 'D', 'I', 'L'},
 'G': {'A', 'C', 'G', 'J'},
 'H': {'A', 'B', 'H', 'K'},
 'F': {'B', 'C', 'D', 'E', 'F', 'K', 'L'},
 'K': {'E', 'F', 'H', 'K'}}

# New Averaging Loop

In [94]:
df_copy = df.copy()
# hold the averaged data
averaged_data = {}
# select one of each type of chain to be the representative chain for average naming
representative_chains = ['A', 'G']
# determine what the equivalent chain interactions relative to representative chains are for all the subunits
equivalent_interactions = get_equivalent_interactions(representative_chains,u)
# Retrieve/assign these from sorted identical subunits first indices.... although we want the other identical subunits first index to neighbor the chain in
# the first set... A needs to be next to G... This is the case for ATCASE - can just edit the chains in the PDB.... and produce a PDB for visualization along with averaging...
# identify priority name from each set of identical subunits

starting_length = len(df_copy.columns)

while len(df_copy.columns) > 0:
     resids = _parse_id(df_copy.columns[0])

     # find all of the other contacts that involve these residue names and numbers
     # intersubunit contacts can have swapped resids
     # so search with both regexes 
     regex1 = f"[A-Z1-9]+:{resids['resna']}:{resids['resida']}(?!\d)-[A-Z1-9]+:{resids['resnb']}:{resids['residb']}(?!\d)"
     regex2 = f"[A-Z1-9]+:{resids['resnb']}:{resids['residb']}(?!\d)-[A-Z1-9]+:{resids['resna']}:{resids['resida']}(?!\d)"
     regex = f"{regex1}|{regex2}"
     to_average = list(df_copy.filter(regex=regex, axis=1).columns)
     # now filter these to ensure you're only taking ones with equivalent_interaction names
     chaina, chainb = resids['chaina'], resids['chainb']




     if chaina == chainb:
          identical_pair = True
          for key, identical_subunit_list in identical_subunits.items():
               if chaina in identical_subunit_list:
                    for representative_chain in representative_chains:
                         if representative_chain in representative_chains:
                              representative_pair = (representative_chain, representative_chain)
                              
     # determine which equivalent_interaction set it came from 
     else:
          paira = (chaina,chainb)
          #pairb = (chainb,chaina)
          for representative_pair_name, equivalent_interaction_list in equivalent_interactions.items():
               for pair in equivalent_interaction_list:
                    if paira == pair: #or pairb == pair:
                         representative_pair = representative_pair_name
                         matching_pair = pair
                         break

     

     # make is_identical_subunit function
     # Can't deal the same with things ivolving a single subunit because not accounting for it in equivalent_interactions
     if identical_pair:
          averaged_name =  f"{representative_pair[0]}:{resids['resna']}:{resids['resida']}-{representative_pair[1]}:{resids['resnb']}:{resids['residb']}"
          to_drop = []
          for contact_name in to_average:
               contact_info = _parse_id(contact_name)
               # the chain equivalency check isn't necessary
               # but if if the chaina variable isn't in the same identical subunit group as the current contact's chain
               # drop it (that means it's happens to have the same resname and resnum but is happening on a different kind of subunit)
               if (contact_info['chaina'] != contact_info['chainb']) or \
               (contact_info['chaina'] not in identical_subunits[get_chain_group(chaina, identical_subunits)]):
                    to_drop.append(contact_name)
          for contact_name in to_drop:
               to_average.remove(contact_name)

     else:
          # if they're not identical and the first representative pair member matches is from the same identical subunit set as chaina
          # this only works for inter-hetero-subunit contacts NOT for inter-homo-subunit

          # inter-homo-subunit then need to first drop everything that involves the hetero-subunit
          to_drop = []
          resnums = []
          matched_name = None
          #REMOVE
          #flipped = None
          for contact_name in to_average:
               contact_info = _parse_id(contact_name)
               if (contact_info['chaina'],contact_info['chainb']) not in equivalent_interactions[representative_pair]:
                    to_drop.append(contact_name)
               else: 
                    # use this to identify the outlier that would give a flipped naming scheme
                    resnums.append(contact_info['resida'])
                    
                    if (contact_info['chaina'], contact_info['chainb']) == representative_pair:
                         matched_name = contact_name
                    # this doesn't make sense, comparing resids to chainids REMOVE
                    #elif (contact_info['residb'], contact_info['resida']) == representative_pair:
                         #flipped = True
          # drop the ones that don't fit the original pair
          for contact_name in to_drop:
               to_average.remove(contact_name)
          # if inter-hetero subunit     
          if get_chain_group(representative_pair[0], identical_subunits) != get_chain_group(representative_pair[1], identical_subunits):
               # and the hetero unit order matches (This shouldn't be happening in the test case)
               if get_chain_group(representative_pair[0], identical_subunits) ==  get_chain_group(resids['chaina'], identical_subunits):
                    averaged_name =  f"{representative_pair[0]}:{resids['resna']}:{resids['resida']}-{representative_pair[1]}:{resids['resnb']}:{resids['residb']}"
               else:
                    averaged_name =  f"{representative_pair[0]}:{resids['resnb']}:{resids['residb']}-{representative_pair[1]}:{resids['resna']}:{resids['resida']}"
          # it's inter-homo subunit
          else:
               # this should be the only case where you have to guess.. 
               # if the contact chains are the same as the representative pair, you eliminate one situation
               if matched_name is not None:
                    averaged_name = matched_name
               elif flipped is not None:
                    averaged_name = f"{representative_pair[0]}:{resids['resnb']}:{resids['residb']}-{representative_pair[1]}:{resids['resna']}:{resids['resida']}"
               else:
                    #measure the distance between the contacting residues for each contact
                    # can probably move this to top of the parent else (3rd back)
                    contact_distances = []
                    for contact_name in to_average:
                         contact_info = _parse_id(contact_name)
                         sel1 = f"chain {contact_info['chaina']} and resnum {contact_info['resida']} and name CA"
                         sel2 = f"chain {contact_info['chainb']} and resnum {contact_info['residb']} and name CA"
                         contact_distances.append(get_pair_distance(sel1, sel2, u))

                    mean_distance = np.mean(contact_distances)

                    testa1 = f"chain {representative_pair[0]} and resnum {contact_info['resida']} and name CA"
                    testa2 = f"chain {representative_pair[1]} and resnum {contact_info['residb']} and name CA"
                    testb1 = f"chain {representative_pair[1]} and resnum {contact_info['resida']} and name CA"
                    testb2 = f"chain {representative_pair[0]} and resnum {contact_info['residb']} and name CA"

                    testa = get_pair_distance(testa1, testa2, u)
                    testb = get_pair_distance(testb1, testb2, u)
                    
                    # if the difference between the mean contact distance distance and the chain swapped one is greater....
                    if np.abs(testa-mean_distance) < np.bas(testb-mean_distance):
                         averaged_name = f"{representative_pair[0]}:{resids['resna']}:{resids['resida']}-{representative_pair[1]}:{resids['resnb']}:{resids['residb']}"
                    else:
                         averaged_name =  f"{representative_pair[0]}:{resids['resnb']}:{resids['residb']}-{representative_pair[1]}:{resids['resna']}:{resids['resida']}"
                    # now get both distances for the representative pair with resids swapped 
                    # and take the name that has the distance most closely matching the mean.
                    #compare_distances between the resids (and possibly all the others to get avg dist) and then the representative_pair and flipped representative_pair



         

         
         
     ## TODO adjust average to account for number of subunits
     #if chaina == chainb:
     #    denominator = len(identical_subunits(get_chain_group(chain)))
     #else:
     #  denominator = len(longest_identical_chain_group participating in intersubunit contact)
     averaged_data[averaged_name] = df_copy[to_average].sum(axis=1)/6
     df_copy.drop(to_average, axis=1, inplace=True)
     # report length of columns or percent remaining at intervals.
     if len(df_copy.columns)%10 == 0:
          print(len(df_copy.columns))

In [None]:
sorted_distances ## The distances between priority_subunit and the other subunits
                # dictionary keys are integer or tuples for inter-subunit data
segids  # the list of all the segids
sorted_all_chain_dists ## the minimum distances between the segid key and all other segments 
                # the other segments are in a nested dictionary holding their distances as values
partner_chains # the dictionary of segid keys and lists of all other segids it makes contact with from the contact df
identical_subunits # the dictionary containing lists of identical subunit segids
#angles # the angles between a list of contacting residues using the com as the vertex
#distances # the distances between a list of contacts 

# Averaging Loop

In [14]:
def make_equivalent_contact_regex(resids):
     '''
     resids : the _parse_id dictionary containing the contact data
     '''
     regex1 = f"[A-Z1-9]+:{resids['resna']}:{resids['resida']}(?!\d)-[A-Z1-9]+:{resids['resnb']}:{resids['residb']}(?!\d)"
     regex2 = f"[A-Z1-9]+:{resids['resnb']}:{resids['residb']}(?!\d)-[A-Z1-9]+:{resids['resna']}:{resids['resida']}(?!\d)"
     return f"{regex1}|{regex2}"

In [25]:
def get_representative_pair_name(chaina, chainb, identical_subunits, equivalent_interactions):
     '''
     provide the chains from the contact and get the chain names to use for making a generalized/averaged contact name
     '''
     if chaina == chainb:
          for key, identical_subunit_list in identical_subunits.items():
               if chaina in identical_subunit_list:
                    for representative_chain in representative_chains:
                         if representative_chain in identical_subunit_list:
                              representative_pair = (representative_chain, representative_chain)
                                   
     # determine which equivalent_interaction set it came from 
     else:
          paira = (chaina,chainb)
          
          for representative_pair_name, equivalent_interaction_list in equivalent_interactions.items():
               for pair in equivalent_interaction_list:
                    if paira == pair: #or pairb == pair:
                         representative_pair = representative_pair_name
                         
                         break
     
     return representative_pair


In [26]:
identical_subunits

{0: ['A', 'B', 'C', 'D', 'E', 'F'], 1: ['G', 'H', 'I', 'J', 'K', 'L']}

In [27]:
get_chain_group(chaina, identical_subunits)

0

In [29]:
identical_subunits[get_chain_group(chaina, identical_subunits)]

['A', 'B', 'C', 'D', 'E', 'F']

In [35]:
resids = _parse_id('B:ALA:112-B:ASN:126')
sel1 = f"chainID {contact_info['chaina']} and resnum {contact_info['resida']} and name CA"
sel2 = f"chainID {contact_info['chainb']} and resnum {contact_info['residb']} and name CA"
get_pair_distance(sel1, sel2, u)

9.083493

In [36]:
df_copy = df.copy()
# hold the averaged data
averaged_data = {}
# select one of each type of chain to be the representative chain for average naming
representative_chains = ['A', 'G']
# determine what the equivalent chain interactions relative to representative chains are for all the subunits
equivalent_interactions = get_equivalent_interactions(representative_chains,u)
# Retrieve/assign these from sorted identical subunits first indices.... although we want the other identical subunits first index to neighbor the chain in
# the first set... A needs to be next to G... This is the case for ATCASE - can just edit the chains in the PDB.... and produce a PDB for visualization along with averaging...
# identify priority name from each set of identical subunits

starting_length = len(df_copy.columns)

while len(df_copy.columns) > 0:
     resids = _parse_id(df_copy.columns[0])

     # find all of the other contacts that involve these residue names and numbers
     # intersubunit contacts can have swapped resids
     # so search with both regexes 
    
     regex = make_equivalent_contact_regex(resids)
     to_average = list(df_copy.filter(regex=regex, axis=1).columns)
     # now filter these to ensure you're only taking ones with equivalent_interaction names
     # chains should always be in alphabetical order
     chaina, chainb = resids['chaina'], resids['chainb']
     
     if chaina == chainb:
          identical_pair = True

     representative_pair = get_representative_pair_name(chaina, chainb, identical_subunits, equivalent_interactions)
    


     
     # Can't deal the same with things ivolving a single subunit because not accounting for it in equivalent_interactions
     if identical_pair:
          averaged_name =  f"{representative_pair[0]}:{resids['resna']}:{resids['resida']}-{representative_pair[1]}:{resids['resnb']}:{resids['residb']}"
          to_drop = []
          for contact_name in to_average:
               contact_info = _parse_id(contact_name)
               # the chain equivalency check isn't necessary
               # but if if the chaina variable isn't in the same identical subunit group as the current contact's chain
               # drop it (that means it's happens to have the same resname and resnum but is happening on a different kind of subunit)
               if (contact_info['chaina'] not in identical_subunits[get_chain_group(chaina, identical_subunits)]):
                    to_drop.append(contact_name)
          for contact_name in to_drop:
               to_average.remove(contact_name)

     else:
          # if they're not identical and the first representative pair member matches is from the same identical subunit set as chaina
          # this only works for inter-hetero-subunit contacts NOT for inter-homo-subunit

          # inter-homo-subunit then need to first drop everything that involves the hetero-subunit
          to_drop = []
          resnums = []
          matched_name = None
          #REMOVE
          #flipped = None
          for contact_name in to_average:
               contact_info = _parse_id(contact_name)
               if (contact_info['chaina'],contact_info['chainb']) not in equivalent_interactions[representative_pair]:
                    to_drop.append(contact_name)
               # else: 
               #     # use this to identify the outlier that would give a flipped naming scheme
               #    resnums.append(contact_info['resida'])
                    
               if (contact_info['chaina'], contact_info['chainb']) == representative_pair:
                    matched_name = contact_name
                    # this doesn't make sense, comparing resids to chainids REMOVE
                    #elif (contact_info['residb'], contact_info['resida']) == representative_pair:
                         #flipped = True


          # drop the ones that don't fit the original pair
          for contact_name in to_drop:
               to_average.remove(contact_name)


          # if inter-hetero subunit     
          if get_chain_group(representative_pair[0], identical_subunits) != get_chain_group(representative_pair[1], identical_subunits):
               # and the order of the hetero subunit matches the representative_pairs, you can name it without further consideration 
               if get_chain_group(representative_pair[0], identical_subunits) ==  get_chain_group(resids['chaina'], identical_subunits):
                    averaged_name =  f"{representative_pair[0]}:{resids['resna']}:{resids['resida']}-{representative_pair[1]}:{resids['resnb']}:{resids['residb']}"
               # else:
               #      averaged_name =  f"{representative_pair[0]}:{resids['resnb']}:{resids['residb']}-{representative_pair[1]}:{resids['resna']}:{resids['resida']}"
          # it's inter-homo subunit
          else:
               # this should be the only case where you have to guess.. 
               # if the contact chains are the same as the representative pair, you eliminate one situation
               if matched_name is not None:
                    averaged_name = matched_name
               # elif flipped is not None:
               #      averaged_name = f"{representative_pair[0]}:{resids['resnb']}:{resids['residb']}-{representative_pair[1]}:{resids['resna']}:{resids['resida']}"
               else:
                    #measure the distance between the contacting residues for each contact
                    # can probably move this to top of the parent else (3rd back) when it originally iterates over to_average.  
                    contact_distances = []
                    for contact_name in to_average:
                         contact_info = _parse_id(contact_name)
                         sel1 = f"chainID {contact_info['chaina']} and resnum {contact_info['resida']} and name CA"
                         sel2 = f"chainID {contact_info['chainb']} and resnum {contact_info['residb']} and name CA"
                         contact_distances.append(get_pair_distance(sel1, sel2, u))

                    mean_distance = np.mean(contact_distances)

                    testa1 = f"chainID {representative_pair[0]} and resnum {contact_info['resida']} and name CA"
                    testa2 = f"chainID {representative_pair[1]} and resnum {contact_info['residb']} and name CA"
                    testb1 = f"chainID {representative_pair[1]} and resnum {contact_info['resida']} and name CA"
                    testb2 = f"chainID {representative_pair[0]} and resnum {contact_info['residb']} and name CA"

                    testa = get_pair_distance(testa1, testa2, u)
                    testb = get_pair_distance(testb1, testb2, u)
                    
                    # if the difference between the mean contact distance distance and the chain swapped one is greater....
                    if np.abs(testa-mean_distance) < np.bas(testb-mean_distance):
                         averaged_name = f"{representative_pair[0]}:{resids['resna']}:{resids['resida']}-{representative_pair[1]}:{resids['resnb']}:{resids['residb']}"
                    else:
                         # TODO Fix bad swap
                         averaged_name =  f"{representative_pair[0]}:{resids['resnb']}:{resids['residb']}-{representative_pair[1]}:{resids['resna']}:{resids['resida']}"
                    # now get both distances for the representative pair with resids swapped 
                    # and take the name that has the distance most closely matching the mean.
                    #compare_distances between the resids (and possibly all the others to get avg dist) and then the representative_pair and flipped representative_pair



         

         
         
     ## TODO adjust average to account for number of subunits
     #if chaina == chainb:
     #    denominator = len(identical_subunits(get_chain_group(chain)))
     #else:
     #  denominator = len(longest_identical_chain_group participating in intersubunit contact)
     averaged_data[averaged_name] = df_copy[to_average].sum(axis=1)/6
     df_copy.drop(to_average, axis=1, inplace=True)
     # report length of columns or percent remaining at intervals.
     if len(df_copy.columns)%10 == 0:
          print(len(df_copy.columns))

31530
31500
31380
31350
31330
31300
31270
31070
31010
30980
30950
30910
30880
30850
30630
30600
30570
30540
30510
30450
30420
30390
29810
29780
29750
29720
29690
29660
29630
29570
29220
29190
28970
28940
28860
28830
28800
28770
28610
28590
28560
28530
28490
28460
28430
28400
28370
28340
28240
28210
28180
28150
28120
28090
28060
28030
28000
27970
27940
27910
27880
27850
27820
27790
27760
27620
27590
27560
27530
27500
27370
27340
27310
27280
27250
27220
27120
27090
27070
27040
27010
26980
26950
26930
26900
26560
26540
26510
26490
26460
26430
26400
26280
26250
26220
26190
25850
25820
25760
25710
25680
25630
25620
25590
25560
25530
25500
25470
25440
25390
25360
25330
25290
25260
25230
25200
25170
25140
24820
24790
24760
24730
24700
24670
24560
24530
24500
24440
24210
24160
24130
24100
24080
24050
24020
23880
23850
23610
23580
23550
23520
23470
23350
23320
23290
23260
23200
23060
23030
22970
22940
22910
22780
22620
22600
22570
22540
22510
22480
22450
22420
22340
22310
22280
22000
21970
2188

In [37]:
avg = pd.DataFrame(averaged_data)

In [38]:
avg

Unnamed: 0,A:ALA:112-A:ASN:126,A:GLN:146-A:LEU:152,A:THR:26-A:VAL:309,A:LEU:192-A:PRO:189,A:ARG:65-A:GLN:297,A:ARG:56-A:SER:74,A:ARG:105-A:PHE:49,A:ALA:175-A:PHE:179,A:GLU:60-A:SER:62,G:ASP:19-G:PHE:65,...,A:PHE:294-C:TYR:98,G:CYS:141-G:GLU:144,G:THR:82-G:VAL:9,A:ALA:274-A:ILE:273,G:GLU:68-G:LEU:99,A:LEU:254-A:VAL:160,A:ASN:256-A:HIS:282,A:ASN:13-G:CYS:138,A:ASP:278-A:MET:227,A:GLU:109-G:GLU:119
0,0.427167,0.477833,0.239167,0.996667,0.685167,0.924,0.999,0.869333,0.999833,0.044,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.4315,0.487,0.249667,0.997667,0.6855,0.915333,0.998833,0.869,0.9995,0.051833,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.438167,0.483167,0.258167,0.9975,0.688,0.908667,0.999,0.868333,0.9995,0.060333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.440833,0.482,0.2735,0.997333,0.6875,0.902667,0.998333,0.866167,0.999167,0.055333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.431667,0.4825,0.288667,0.997167,0.681167,0.900333,0.9985,0.862667,0.999667,0.051167,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.431,0.480667,0.2945,0.997333,0.6815,0.893167,0.9985,0.859,0.999167,0.044167,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.436,0.469,0.294833,0.9975,0.680333,0.889,0.998333,0.854833,0.999167,0.036167,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.4565,0.470833,0.305,0.9965,0.675667,0.8905,0.998333,0.849667,0.9995,0.029,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.4755,0.4735,0.3095,0.997167,0.671333,0.886333,0.998,0.840333,0.998833,0.023833,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.485667,0.4655,0.320333,0.997167,0.673167,0.883667,0.997833,0.836667,0.999,0.022333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [39]:
cpca = ContactPCA(avg)

Opening the chacras.


In [47]:
to_visualize = []
for i in range(1,5):
    to_visualize.extend(cpca.sorted_norm_loadings(i).index[:20])


In [50]:
avg_cont = ContactFrequencies(avg)

In [52]:
from ChACRA.ContactAnalysis.contacts_to_pymol import to_pymol

to_pymol(to_visualize,avg_cont,cpca,
    f'/home/kanaka/from_box/atcase/chacras.pml',pc_range=(1,4))