In [127]:
import numpy as np

# Format of the output dictionary
wnt_residue_mapping = {
    'wnt1': {
        'original_residues': {
            'residue_ids': [],
            'residue_labels': []
        },
        'wnt8a_alignment' : {}
        # 'wnt8a_alignment' : {'residue_ids': []}
    },
    'wnt3a': {
        'original_residues': {
            'residue_ids': [],
            'residue_labels': []
        },
        'wnt8a_alignment' : {}
        # 'wnt8a_alignment' : {'residue_ids': []}
    },
    'wnt5a': {
        'original_residues': {
            'residue_ids': [],
            'residue_labels': []
        },
        'wnt8a_alignment' : {}
        # 'wnt8a_alignment' : {'residue_ids': []}
    },
    'wnt8a': {
        'original_residues': {
            'residue_ids': [],
            'residue_labels': []
        },
        'wnt8a_alignment' : {}
        # 'wnt8a_alignment' : {'residue_ids': []}
    }
}

In [128]:
systems = ["1", "3a", "5a", "8a"]
resnames_from_pdb = np.empty(len(systems), dtype=object)
system_idx_from_mapping = np.empty(len(systems), dtype=object)
mapping_idx_from_mapping = np.empty(len(systems), dtype=object)
system_res_maps = np.empty(len(systems), dtype=object)

for i in range(len(systems)):
    
    # Start with a blank list
    resnames_from_pdb[i] = []
    system_idx_from_mapping[i] = []
    mapping_idx_from_mapping[i] = []
    
    # Load in the mapping files generated from 02*ipynb
    data = np.loadtxt(f"output/{systems[i]}_to_8a_map.csv", delimiter=",", skiprows=1, dtype=str)
    
    # First col = 1a/3a/5a/8a indices
    curr_idx = data[:,0].tolist()
    
    # Second sol = 8a mapping indices
    map_idx = data[:,1].tolist()
    
    # Load in the pdb files and read in the residue name
    col = np.loadtxt(f"input/Wnt{systems[i]}_align.pdb", comments=["CRYST1", "TER", "END"], dtype=str)
    if systems[i] in ["3a", "8a"]:
        resnames = np.char.add(col[:,3], col[:,5])
    else:
        resnames = np.char.add(col[:,3], col[:,4])
    
    # Read in molecule id (WNT or WNTLESS) and generate list of WNT resnames
    seg = col[:,-2]
    for res in range(len(resnames)):
        if resnames[res] not in resnames_from_pdb[i] and seg[res] == "PROA":
            resnames_from_pdb[i].append(str(resnames[res]))
            
    # Output
    resnames_from_pdb[i] = resnames_from_pdb[i]
    system_idx_from_mapping[i] = curr_idx
    mapping_idx_from_mapping[i] = map_idx
    system_res_maps[i] = dict(zip(curr_idx, map_idx))

In [129]:
# Take arrays and store in the dictionary
# Print out sizes to make sure number of elements in each dictionary item are the same
for i in range(len(systems)):
    dict_item = f"wnt{systems[i]}"
    print("Wnt" + systems[i])
    print(f"Number of Residues from PDB: {len(system_idx_from_mapping[i])}\nNumber of Residues from Map: {len(resnames_from_pdb[i])}\nNumber of aligned from map: {len(mapping_idx_from_mapping[i])}\n")
    if systems[i] != '8a':
        # wnt_residue_mapping[dict_item]['wnt8a_alignment']['residue_ids'] = mapping_idx_from_mapping[i]
        wnt_residue_mapping[dict_item]['wnt8a_alignment'] = system_res_maps[i]
    wnt_residue_mapping[dict_item]['original_residues']['residue_ids'] = system_idx_from_mapping[i]
    wnt_residue_mapping[dict_item]['original_residues']['residue_labels'] = resnames_from_pdb[i]

Wnt1
Number of Residues from PDB: 338
Number of Residues from Map: 338
Number of aligned from map: 338

Wnt3a
Number of Residues from PDB: 334
Number of Residues from Map: 334
Number of aligned from map: 334

Wnt5a
Number of Residues from PDB: 337
Number of Residues from Map: 337
Number of aligned from map: 337

Wnt8a
Number of Residues from PDB: 316
Number of Residues from Map: 316
Number of aligned from map: 316



In [130]:
# save the dictionary as a json file
import json

output_file_location = "output/wnt8a_residue_alignment_map.json"

# serialize data into file:
json.dump(wnt_residue_mapping, open(output_file_location, 'w' ))
# read data from file:
# wnt_residue_mapping = json.load(open(output_file_location))

In [131]:
## DOUBLE-CHECK ALIGNMENT
wnt_sys = 'wnt8a'
# wnt_residue_mapping.keys()
# wnt_residue_mapping[wnt_sys].keys()
# wnt_residue_mapping[wnt_sys]['original_residues'].keys()
wnt_residue_mapping[wnt_sys]['wnt8a_alignment']

{}

In [126]:
org_ids = wnt_residue_mapping[wnt_sys]['original_residues']['residue_ids']
aligned_ids = list(wnt_residue_mapping[wnt_sys]['wnt8a_alignment'].keys())
aligned_ids == org_ids

False