In [3]:
import MDAnalysis as mda
import numpy as np
import pandas as pd
import sys
#sys.path.append('/Users/dburns/Library/CloudStorage/Box-Box/my_scripts')
from ChACRA.ContactAnalysis.ContactFrequencies import *
import os

In [4]:
#u = mda.Universe('/Users/dburns/Library/CloudStorage/Box-Box/1_GROUP_data/Enzymes/atcase/from_pronto/1q95/1q95_minimized_nohoh.pdb')
u = mda.Universe('../../../from_box/atcase/1q95_minimized_nohoh.pdb')
protein = u.select_atoms('protein')
u = mda.Merge(protein)



In [5]:
universe = u.copy()
residues = {seg.segid: seg.residues.resnames for seg in universe.segments}
segids = list(residues.keys())
# make a square matrix that will be filled with True values for identical subunits
array = np.zeros((len(segids),len(segids)),dtype=np.bool_)
# every subunit is identical with itself
np.fill_diagonal(array,True)
# work with it as a df
identical_table = pd.DataFrame(array, columns=segids, index=segids)



In [11]:
#contact_dir = '/Users/dburns/Library/CloudStorage/Box-Box/1_GROUP_data/Enzymes/atcase/from_nova/contacts/freqs'
contact_dir = '../../../from_box/atcase/freqs'
contact_files = [f'{contact_dir}/{file}' for file in os.listdir(contact_dir)]
contact_files.sort()
cont = make_contact_frequency_dictionary(contact_files)
cont = ContactFrequencies(pd.DataFrame(cont))

In [6]:
# Go through all pairwise combinations of subunits
for combo in combinations(segids,2):
    # not identical if lengths are different
    if len(residues[combo[0]]) != len(residues[combo[1]]):
         identical_table[combo[1]][combo[0]], identical_table[combo[0]][combo[1]] = False, False
    else:
        # catch anything that might have same number of residues but different sequences
        bool = np.all(np.equal(residues[combo[0]],residues[combo[1]]))
        # Enter True or False in both the upper tri and lower tri  
        identical_table[combo[1]][combo[0]], identical_table[combo[0]][combo[1]] =  bool, bool

# Only keep one representative row for each unique chain that gives the identical sets of subunits
identical_table.drop_duplicates(inplace=True)

identical_subunits = {}
# Add the lists of identical subunits to the dictionary
# You can deal with different sets of identical subunits in complex situations\
for i, segid in enumerate(identical_table.index):
    subunits = identical_table.T[identical_table.loc[segid]==True].index
    # if there is only one, it's just identical with itself
    if len(subunits) >= 2:
        identical_subunits[i] = list(subunits)

In [7]:
identical_table

Unnamed: 0,A,B,C,D,E,F,G,H,I,J,K,L
A,True,True,True,True,True,True,False,False,False,False,False,False
G,False,False,False,False,False,False,True,True,True,True,True,True


In [8]:
identical_subunits

{0: ['A', 'B', 'C', 'D', 'E', 'F'], 1: ['G', 'H', 'I', 'J', 'K', 'L']}

In [9]:
combos = list(combinations(identical_subunits.keys(),2))

In [12]:
df = cont.freqs
non_identical_subunit_sets = {}
combos = combinations(identical_subunits.keys(),2)
for combo in combos:
    regex1 = f"[{'|'.join(identical_subunits[combo[0]])}]:[A-Z]+:[1-9]+-[{'|'.join(identical_subunits[combo[1]])}]:[A-Z]+:[1-9]+"
    regex2 = f"[{'|'.join(identical_subunits[combo[1]])}]:[A-Z]+:[1-9]+-[{'|'.join(identical_subunits[combo[0]])}]:[A-Z]+:[1-9]+"
    regex = f"{regex1}|{regex2}"
#this will have the identical_subunit key pairs and corresponding column names
# you can create the regex using these keys and identical_subunits dictionary
non_identical_subunit_sets[combo] = list(df.filter(regex=regex, axis=1).columns)

In [36]:
regex1

'[A|B|C|D|E|F]:[A-Z]+:[1-9]+-[G|H|I|J|K|L]:[A-Z]+:[1-9]+'

In [13]:
from MDAnalysis.analysis.distances import distance_array
# can make a table of the equivalent contacts...
### NEED TO HAVE THE DISTANCES FOR ALL SUBUNIT KEYS
sorted_distances = {}

#for identical_subunits_key in identical_subunits
# find distances between identical and non identical and for all non identical, choose the closest one to priority as the rep for those
for key in identical_subunits:
    chain_distances = {}
    priority = identical_subunits[key][0]
    sel1 = u.select_atoms(f'chainID {priority}')
    for chain in identical_subunits[key][1:]:
        sel2 = u.select_atoms(f'chainID {chain}')

        min_dist = distance_array(sel1.atoms, sel2.atoms,).min()
        chain_distances[(priority, chain)] = min_dist
    sorted_distances[key]={k:v for k, v in sorted(chain_distances.items(), key=lambda x:x[1])}

# then get mixed subunit distances
# this should possibly go after determining the priority chain
# but before determing the other identical subunit priority naming since it will find which of the 
# non identical subunits is closest....
for combo in combinations(identical_subunits.keys(),2):
    chain_distances = {}
    priority = identical_subunits[combo[0]][0]
    sel1 = u.select_atoms(f'chainID {priority}')
    for chain in identical_subunits[combo[1]]:
        sel2 = u.select_atoms(f'chainID {chain}')

        min_dist = distance_array(sel1.atoms, sel2.atoms,).min()
        chain_distances[(priority, chain)] = min_dist
    sorted_distances[combo]={k:v for k, v in sorted(chain_distances.items(), key=lambda x:x[1])}

# After determining the priority nameing scheme, contact distances can be checked to find the best partner subunit with allclose(2 decimals)
# in this case - intra is going to be A-A
# adjacent will be A-C, if it also occurs AB use all close to determine if it's equivalent 



In [30]:
segids

['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L']

# calculate the distances between each chain and all others

In [14]:
sorted_all_chain_dists = {}
all_chain_dists = {chain:{} for chain in segids}
for chain in segids:
    sel1 = u.select_atoms(f'chainID {chain}')
    for chain2 in segids:
        if chain2 != chain:
            sel2 = u.select_atoms(f'chainID {chain2}')
            min_dist = distance_array(sel1.atoms, sel2.atoms,).min()
            all_chain_dists[chain][chain2] = min_dist
    sorted_all_chain_dists[chain]={k:v for k, v in sorted(all_chain_dists[chain].items(), key=lambda x:x[1])}

In [15]:
# can also use the df to identify which chain pairs have identical contact names
# basically use the priority chain's contacts to compare the incoming contacts to
sorted_all_chain_dists

{'A': {'G': 1.6783015165014643,
  'C': 1.7065504023539435,
  'B': 1.7316963647164045,
  'E': 2.276290121014368,
  'H': 2.3374572559516813,
  'K': 10.352570156109394,
  'I': 11.368680308402194,
  'D': 14.695149666859432,
  'J': 19.089641017069628,
  'F': 23.72274977485476,
  'L': 27.828149816818694},
 'B': {'C': 1.6813929744436025,
  'H': 1.6994563762386845,
  'A': 1.7316963647164045,
  'F': 1.8194105875476971,
  'I': 2.405174661796874,
  'L': 10.596713522639728,
  'G': 11.396500598235063,
  'E': 12.988872906635818,
  'K': 16.547568071963777,
  'D': 24.546360514064247,
  'J': 30.74457695856177},
 'C': {'I': 1.649130675274525,
  'B': 1.6813929744436025,
  'A': 1.7065504023539435,
  'D': 2.4535570955604586,
  'G': 2.4838233865839077,
  'J': 11.460028863512493,
  'H': 11.803342818008126,
  'F': 14.49427711966249,
  'L': 19.50460708068936,
  'E': 23.112452434865222,
  'K': 28.91113139744162},
 'D': {'J': 1.6731507547147908,
  'F': 1.7251738858528087,
  'E': 1.7337653050411017,
  'L': 2.3691

In [16]:
segids = set(u.residues.segids)
# hold the coms for each protomer 
seg_sels = {seg: u.select_atoms(f'segid {seg}') for seg in set(u.residues.segids)} # if segid in subunits

In [6]:
# access a subunit com
from itertools import combinations, permutations
triples = [[a,b,c] for a,b,c in permutations(segids, 3)]
coms = {segid: seg_sels[segid].center_of_mass() for segid in segids}


In [7]:
import numpy as np
# triple selections without duplicating angle created by vertex and permutations of the 2 points
trips = []
for segid in segids:
    trips.append([[ch1,segid,ch2] for ch1,ch2 in combinations([ch for ch in segids if ch != segid],2)])

In [8]:
trips

[[['A', 'C', 'D'], ['A', 'C', 'B'], ['D', 'C', 'B']],
 [['C', 'A', 'D'], ['C', 'A', 'B'], ['D', 'A', 'B']],
 [['C', 'D', 'A'], ['C', 'D', 'B'], ['A', 'D', 'B']],
 [['C', 'B', 'A'], ['C', 'B', 'D'], ['A', 'B', 'D']]]

In [9]:
all_seg_sel_string = ''.join([f'segid {seg} or ' for seg in list(segids)[:-1]])
all_seg_sel_string += f'segid {list(segids)[-1]}'
all_com = u.select_atoms(all_seg_sel_string).center_of_mass()

In [10]:
def get_angle(a,b,c):
    ba = a - b
    bc = c - b

    cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
    angle = np.arccos(cosine_angle)
    return angle

In [11]:
opposing_subunits = []
check = []
for seg in segids:
    if seg not in check:
        for seg2 in [segid for segid in segids if segid != seg]:
            if np.abs(180-np.rad2deg(get_angle(coms[seg],all_com,coms[seg2]))) <= 2:
                opposing_subunits.append((seg,seg2))
                check.append(seg)
                break




In [12]:
opposing_subunits

[('C', 'B'), ('A', 'D'), ('D', 'A'), ('B', 'C')]

In [80]:
np.rad2deg(get_angle(coms['A'],all_com,coms['D']))

179.92590933219583

### Use contact df to determine which chains actually make contact

In [13]:
df = cont.freqs

In [14]:
from ChACRA.ContactAnalysis.contact_functions import _parse_id

In [15]:
partner_chains = {}

for contact in df.columns:
    data = _parse_id(contact)
    if data['chaina'] not in partner_chains:
        partner_chains[data['chaina']] = set(data['chainb'])
    else:
        partner_chains[data['chaina']].add(data['chainb'])
    if data['chainb'] not in partner_chains:
        partner_chains[data['chainb']] = set(data['chaina'])
    else:
        partner_chains[data['chainb']].add(data['chaina'])

In [16]:
partner_chains

{'B': {'A', 'B', 'C', 'E', 'F', 'H', 'I'},
 'C': {'A', 'B', 'C', 'D', 'F', 'G', 'I', 'J'},
 'D': {'C', 'D', 'E', 'F', 'I', 'J', 'L'},
 'E': {'A', 'B', 'D', 'E', 'F', 'J', 'K'},
 'A': {'A', 'B', 'C', 'E', 'G', 'H'},
 'L': {'D', 'F', 'I', 'L'},
 'J': {'C', 'D', 'E', 'G', 'J'},
 'I': {'B', 'C', 'D', 'I', 'L'},
 'G': {'A', 'C', 'G', 'J'},
 'H': {'A', 'B', 'H', 'K'},
 'F': {'B', 'C', 'D', 'E', 'F', 'K', 'L'},
 'K': {'E', 'F', 'H', 'K'}}

In [17]:
sorted_distances
# nee to make this data for each chain so you can identify which A partners correspond to the rare I and J partners for C for instance

{0: {('A', 'C'): 1.7065504023539435,
  ('A', 'B'): 1.7316963647164045,
  ('A', 'E'): 2.276290121014368,
  ('A', 'D'): 14.695149666859432,
  ('A', 'F'): 23.72274977485476},
 1: {('G', 'J'): 1.7411957335158068,
  ('G', 'I'): 27.48132477068823,
  ('G', 'H'): 28.549628162117425,
  ('G', 'K'): 30.40205853744378,
  ('G', 'L'): 44.52822064542099},
 (0, 1): {('A', 'G'): 1.6783015165014643,
  ('A', 'H'): 2.3374572559516813,
  ('A', 'K'): 10.352570156109394,
  ('A', 'I'): 11.368680308402194,
  ('A', 'J'): 19.089641017069628,
  ('A', 'L'): 27.828149816818694}}

In [18]:
cols = []
for contact in df.columns:
    data = _parse_id(contact)
    if 'C' in data.values() and ('F' in data.values() or "J" in data.values()):
        cols.append(contact)
df[cols]


Unnamed: 0,C:LYS:244-J:ILE:115,C:LYS:244-J:ASN:113,C:GLN:246-J:ASN:113,C:PRO:237-F:LYS:84,C:GLN:246-J:SER:112,C:SER:238-F:LYS:84,C:GLU:239-F:LYS:83,C:SER:238-F:LYS:83,C:TYR:240-F:LYS:83
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [19]:
# use the all_chain_distance dictionary to identify the appropriate priority nameing when a non-priority chain has an unseen contact
# if a priority naming scheme is
#  not available after the regex filter gets all of the duplicate contacts, use the all_chain_distance dict to find the correct way to depict it
# this might involve doing a distance comparison for a contact between AB versus AC working down through the sorted_distances and returning
# the chain that corresponds to the minimum contact distance measurement
# this could break in a channel pore situation where neighboring and opposing are almost the same

# if a priority name is in the regex filtered stuff, then just use that as the template name
# if the priority name isn't in there, then you have to determine the proper partner
# you do this by finding which group of identical subunits each chain in the pair is from
# and then making the appropriate combination.
sorted_distances 

{0: {('A', 'C'): 1.7065504023539435,
  ('A', 'B'): 1.7316963647164045,
  ('A', 'E'): 2.276290121014368,
  ('A', 'D'): 14.695149666859432,
  ('A', 'F'): 23.72274977485476},
 1: {('G', 'J'): 1.7411957335158068,
  ('G', 'I'): 27.48132477068823,
  ('G', 'H'): 28.549628162117425,
  ('G', 'K'): 30.40205853744378,
  ('G', 'L'): 44.52822064542099},
 (0, 1): {('A', 'G'): 1.6783015165014643,
  ('A', 'H'): 2.3374572559516813,
  ('A', 'K'): 10.352570156109394,
  ('A', 'I'): 11.368680308402194,
  ('A', 'J'): 19.089641017069628,
  ('A', 'L'): 27.828149816818694}}

In [None]:
'C:PRO:237-F:LYS:84'

In [50]:
resids = _parse_id('C:GLN:146-C:LEU:152')
regex1 = f"[A-Z1-9]+:{resids['resna']}:{resids['resida']}(?!\d)-[A-Z1-9]+:{resids['resnb']}:{resids['residb']}(?!\d)"
regex2 = f"[A-Z1-9]+:{resids['resnb']}:{resids['residb']}(?!\d)-[A-Z1-9]+:{resids['resna']}:{resids['resida']}(?!\d)"
regex = f"{regex1}|{regex2}"
to_average = list(df.filter(regex=regex, axis=1).columns)

In [51]:
to_average

['C:GLN:146-C:LEU:152',
 'E:GLN:146-E:LEU:152',
 'A:GLN:146-A:LEU:152',
 'B:GLN:146-B:LEU:152',
 'D:GLN:146-D:LEU:152',
 'F:GLN:146-F:LEU:152']

In [20]:
def get_chain_group(chain, identical_subunits):
    '''
    Return the identical subunit group that the subunit is in
    '''
    for group in identical_subunits:
        if chain in identical_subunits[group]:
            return group
        
def get_chain_tuples(group1,group2):
    if group1 != group2:
        return tuple(sorted((group1,group2)))
    else:
        return group1

def check_distances(group, contact, u, sorted_distances):

    dists = {}
    chain_pairs = list(sorted_distances[group].keys())
    resids = _parse_id(contact)
    for pair in chain_pairs:
        atoma = u.select_atoms(f'chainid {pair[0]} and resnum \
                            {resids["resida"]} and name CA').positions
        atomb = u.select_atoms(f'chainid {pair[1]} and resnum \
                            {resids["residb"]} and name CA').positions
        dists[f'{pair[0]}:{resids["resna"]}:{resids["resida"]}-{pair[1]}:{resids["resnb"]}:{resids["residb"]}'] = np.linalg.norm((atoma-atomb)) 
    return {k:v for k, v in sorted(dists.items(), key=lambda x:x[1])}
    #list(distances.keys())[0]

# def get_possible_priority_name_combos(group1, group2, priority_ids, identical_subunits):
#     if priority_ids[0] in identical_subunits[group1] and priority_ids[0] in identical_subunits[group2]:
#         return [priority_ids[0], priority_ids[0]]
#     elif priority_ids[1] in identical_subunits[group1] and priority_ids[1] in identical_subunits[group2]:
#         return [priority_ids[0], priority_ids[0]]
#     elif group1 != group2:





In [21]:
chain1 = 'C'
chain2 = 'F'
group1 = get_chain_group(chain1, identical_subunits)
group2 = get_chain_group(chain2, identical_subunits)
key = get_chain_tuples(group1, group2)
distances = check_distances(key, 'C:PRO:237-F:LYS:84', u, sorted_distances)

In [25]:
all_seg_sel_string = ''.join([f'segid {seg} or ' for seg in list(segids)[:-1]])
all_seg_sel_string += f'segid {list(segids)[-1]}'
all_com = u.select_atoms(all_seg_sel_string).center_of_mass()

chain1 = 'C'
chain2 = 'F'
group1 = get_chain_group(chain1, identical_subunits)
group2 = get_chain_group(chain2, identical_subunits)
key = get_chain_tuples(group1, group2)
distances = check_distances(key, 'C:PRO:237-F:LYS:84', u, sorted_distances)

In [44]:
a = u.select_atoms('chainID C and resnum 237 and name CA').positions[0]
b = u.select_atoms('chainID F and resnum 84 and name CA').positions[0]
np.rad2deg(get_angle(a,all_com,b))

49.203095605716605

In [45]:
a = u.select_atoms('chainID C and resnum 237 and name CA').positions[0]
b = u.select_atoms('chainID A and resnum 84 and name CA').positions[0]
np.rad2deg(get_angle(a,all_com,b))

48.5597753102801

In [43]:
# check template contact distance and filter out the regex filter using this
# can use the condition to do this when the number of contacts filtered out with regex is greater than the length of identical subunits

atoma = u.select_atoms(f'chainID C and resnum 237 and name CA').positions
atomb = u.select_atoms(f'chainID F and resnum 84 and name CA').positions
np.linalg.norm((atoma-atomb))

18.497358

In [53]:
def check_angles(contacts, u, com_selection='protein'):
    '''
    calculate angles between contacting residues using a structure's center of mass as the vertex.

    contacts : list
            The contacts to retrieve angles for.
    
    u : MDA.Universe
            The universe containing the crystal structure that was used for generating the contact data.

    '''
    angles = {}
    com = u.select_atoms(com_selection).center_of_mass()
    for contact in contacts:
        resids = _parse_id(contact)
        a = u.select_atoms(f"chainid {resids['chaina']} and resnum \
                            {resids['resida']} and name CA").positions[0]
        c = u.select_atoms(f"chainid {resids['chainb']} and resnum \
                                {resids['residb']} and name CA").positions[0]
        angles[contact] = np.rad2deg(get_angle(a,com,c))
    return angles





In [63]:
angles = check_angles(to_average,u)


In [64]:
to_average


['C:GLN:146-C:LEU:152',
 'E:GLN:146-E:LEU:152',
 'A:GLN:146-A:LEU:152',
 'B:GLN:146-B:LEU:152',
 'D:GLN:146-D:LEU:152',
 'F:GLN:146-F:LEU:152']

In [68]:
def find_non_matching_angles(reference_contact, angles, cutoff=1):
    '''
    Return a list of contacts whose angles don't match the reference contact

    reference_contact : string
        The contact name whose angle will be used for comparison with the other angles.

    angles : dict
        The dictionary of contacts and corresponding angles 

    cutoff : float or int
        The maximum difference between reference_contact's angle and the comparison angle to be considered the same.
        Contacts that differ in angle above this value will be returned in the list.

    '''
    non_matching_contacts = []

    for contact in angles:
        if np.abs(angles[reference_contact]-angles[contact]) > cutoff:
            non_matching_contacts.append(contact)

    return non_matching_contacts

# New Averaging Loop

In [69]:
sorted_all_chain_dists

{'A': {'G': 1.6783015165014643,
  'C': 1.7065504023539435,
  'B': 1.7316963647164045,
  'E': 2.276290121014368,
  'H': 2.3374572559516813,
  'K': 10.352570156109394,
  'I': 11.368680308402194,
  'D': 14.695149666859432,
  'J': 19.089641017069628,
  'F': 23.72274977485476,
  'L': 27.828149816818694},
 'B': {'C': 1.6813929744436025,
  'H': 1.6994563762386845,
  'A': 1.7316963647164045,
  'F': 1.8194105875476971,
  'I': 2.405174661796874,
  'L': 10.596713522639728,
  'G': 11.396500598235063,
  'E': 12.988872906635818,
  'K': 16.547568071963777,
  'D': 24.546360514064247,
  'J': 30.74457695856177},
 'C': {'I': 1.649130675274525,
  'B': 1.6813929744436025,
  'A': 1.7065504023539435,
  'D': 2.4535570955604586,
  'G': 2.4838233865839077,
  'J': 11.460028863512493,
  'H': 11.803342818008126,
  'F': 14.49427711966249,
  'L': 19.50460708068936,
  'E': 23.112452434865222,
  'K': 28.91113139744162},
 'D': {'J': 1.6731507547147908,
  'F': 1.7251738858528087,
  'E': 1.7337653050411017,
  'L': 2.3691

8.40284

In [74]:
get_chain_distances(identical_subunits,u)

{0: {('A', 'C'): 1.7065504023539435,
  ('A', 'B'): 1.7316963647164045,
  ('A', 'E'): 2.276290121014368,
  ('A', 'D'): 14.695149666859432,
  ('A', 'F'): 23.72274977485476},
 1: {('G', 'J'): 1.7411957335158068,
  ('G', 'I'): 27.48132477068823,
  ('G', 'H'): 28.549628162117425,
  ('G', 'K'): 30.40205853744378,
  ('G', 'L'): 44.52822064542099},
 (0, 1): {('A', 'G'): 1.6783015165014643,
  ('A', 'H'): 2.3374572559516813,
  ('A', 'K'): 10.352570156109394,
  ('A', 'I'): 11.368680308402194,
  ('A', 'J'): 19.089641017069628,
  ('A', 'L'): 27.828149816818694}}

In [95]:
def establish_priority_naming(identical_subunits, u):
    segids = [j for i in identical_subunits.values() for j in i]
    all_chain_dists = []
    for subunit_set, subunits in identical_subunits.items():
        priority1 = subunits[0]
        

def establish_equivalent_chain_interactions(seg_combo, identical_subunits, u):
    '''
    Use the closest residue pairs to determine equivalent orientations.
    seg_combo : tuple
        two segid/ chains that you want to identify equivalent interactions for for the rest of the chains
        i.e. ('A','B')

    identical_subunits : dictionary
        The dictionary containing integer keys and lists of identical chains
    '''
    # get the indices of the atoms from each chain
    A = np.where(u.atoms.segids == f'{seg_combo[0]}')[0]
    B = np.where(u.atoms.segids == f'{seg_combo[1]}')[0]
    # calculate all of the distances beteen the atoms from each chain
    da = distance_array(u.atoms[A], u.atoms[B])
    # find the index of the minimum distance value
    Amin, Bmin = np.where(da == da.min())
    # get the residue id and atom name for the closest distance atoms
    resa, atoma = u.atoms[A][Amin[0]].resid, u.atoms[A][Amin[0]].name
    resb, atomb= u.atoms[B][Bmin[0]].resid, u.atoms[B][Bmin[0]].name
    # this identifies A's relationship to B.  Now you can establish this same relationship by going through all the other combos
    for key, seg_list in identical_subunits.items():
        if seg_combo[0] in seg_list and seg_combo[1] in seg_list:
            A_group, B_group = key, key
        elif seg_combo[0] in seg_list and seg_combo[1] not in seg_list:
            A_group = key
        elif seg_combo[1] in seg_list and seg_combo[0] not in seg_list:
            B_group = key
    relationships = []
    for seg1 in identical_subunits[A_group]:
        distances = {}
        for seg2 in identical_subunits[B_group]:
            if seg1 != seg2:
                test_chain_1 = 'B'  # set this as the next identical chain
                test_chain_2 = 'C'  # test against all others of the identical chain type and take min to determine the equivalent relationship of A to B
                distances[(seg1,seg2)] = np.linalg.norm(u.select_atoms(f'chainID {seg1} and resnum {resa} and name {atoma}').positions - 
                            u.select_atoms(f'chainID {seg2} and resnum {resb} and name {atomb}').positions)
        relationships.append(min(distances, key=distances.get))
    return relationships
        

In [96]:
establish_equivalent_chain_interactions(('A','B'), identical_subunits, u)

[('A', 'B'), ('B', 'C'), ('C', 'A'), ('D', 'F'), ('E', 'D'), ('F', 'E')]

In [None]:
sorted_distances ## The distances between priority_subunit and the other subunits
                # dictionary keys are integer or tuples for inter-subunit data
segids  # the list of all the segids
sorted_all_chain_dists ## the minimum distances between the segid key and all other segments 
                # the other segments are in a nested dictionary holding their distances as values
partner_chains # the dictionary of segid keys and lists of all other segids it makes contact with from the contact df
identical_subunits # the dictionary containing lists of identical subunit segids
angles # the angles between a list of contacting residues using the com as the vertex
distances # the distances between a list of contacts 

In [None]:
df_copy = df.copy()
contacts = df.columns
averaged_data = {}
# Retrieve/assign these from sorted identical subunits first indices.... although we want the other identical subunits first index to neighbor the chain in
# the first set... A needs to be next to G... This is the case for ATCASE - can just edit the chains in the PDB.... and produce a PDB for visualization along with averaging...
# identify priority name from each set of identical subunits

priority_1 = 'A'
priority_2 = 'G'
# can you use symmetry axes?
while len(df_copy.columns) > 0:
     resids = _parse_id(df_copy.columns[0])
     # intersubunit contacts can have swapped resids
     # so search with both regexes 
     regex1 = f"[A-Z1-9]+:{resids['resna']}:{resids['resida']}(?!\d)-[A-Z1-9]+:{resids['resnb']}:{resids['residb']}(?!\d)"
     regex2 = f"[A-Z1-9]+:{resids['resnb']}:{resids['residb']}(?!\d)-[A-Z1-9]+:{resids['resna']}:{resids['resida']}(?!\d)"
     regex = f"{regex1}|{regex2}"
     
     to_average = list(df.filter(regex=regex, axis=1).columns)
     #################################################################
     for contact in to_average:
          if (priority_1 in _parse_id(contact).values()) or (priority_2 in _parse_id(contact).values()):
               name = contact
               
     # if priority name not in to_average, create the name by finding the matching interaction involving the priority chain
     else:
          group1 = get_chain_group(resids['chaina'], identical_subunits)
          group2 = get_chain_group(resids['chaina'], identical_subunits)
          key = get_chain_tuples(group1, group2)

          # Have to go back and figure out which resid belongs to which chain before making the contact name
          # Have to make sure you're constructing the right contact name especially when it's between 
          # non-identical subunits (can't put chain A with a chain G residue.)
          name = check_distances(key, contact, u, sorted_distances)
          
          
     # average and add to averaged_data
     # drop the to_average columns