#### Validating features

|  *Residue-based*      | bEncode     | JSON     |
| :------------- | :----------: | -----------: |
|  **Single** | [WIP](#bencode_single)</a>   | Yes    |
| **Dual**   | Yes |  Yes  |


|  *Atom-based*      | bEncode     | JSON     |
| :------------- | :----------: | -----------: |
|  **Single** | No   | Yes    |
| **Dual**   | No |  Yes  |


<a id='cmap_single_residue'></a>
<a id='cmap_single_atomic'></a>
<a id='cmap_dual_residue'></a>
<a id='cmap_dual_atomic'></a>

<a id='lmap_single_residue'></a>
<a id='lmap_single_atomic'></a>
<a id='lmap_dual_residue'></a>
<a id='lmap_dual_atomic'></a>

<a id='zmap_dual_residue'></a>
<a id='zmap_dual_atomic'></a>



In [1]:
import sys, json

sys.path.append("/Users/guillaumelaunay/work/tmp/ccmap2/build/lib.macosx-10.9-x86_64-3.8")
import ccmap
import pyproteinsExt.structure.coordinates as PDB

folderTests="/Users/guillaumelaunay/work/tmp/ccmap2/tests"
with open(f"{folderTests}/poses_specs.json", "r") as fp:
    inputs = json.load(fp)
with open(f"{folderTests}/oracle.json", "r") as fp:
    oracle = json.load(fp)

In [2]:
def getCCasString(i, pdbRec, pdbLig):
    recIDs = pdbRec.getResID
    ligIDs = pdbLig.getResID
    
    iRec = int(i/len(ligIDs))
    iLig = i%len(ligIDs)

    return f"{recIDs[iRec]} -- {ligIDs[iLig]}"

def zmapTest(folderTests, inputs, oracle, v=None):
    parser = PDB.Parser()
    pdbREC = parser.load(file=f"{folderTests}/{inputs['receptorFile']}")
    pdbDictREC = pdbREC.atomDictorize
    pdbLIG = parser.load(file=f"{folderTests}/{inputs['ligandFile']}")     
    pdbDictLIG = pdbLIG.atomDictorize

    tests = 0
    for i in range ( len(inputs['euler']) ):
        if v:
            i = v
        e  = inputs['euler'][i]
        t  = inputs['translation'][i]
        d  = 5
        ro = inputs['recOffset']
        lo = inputs['ligOffset']
        #print(e, t)
                         
        x  = ccmap.zmap(pdbDictREC, pdbDictLIG, tuple(e), tuple(t), \
                   offsetRec=tuple(ro), offsetLig=tuple(lo), distance=d, \
                   encode=True, apply=False)
        y  = oracle['oracle'][i]
                         
        diff1 = set(x) - set(y)
        diff2 = set(y) - set(x)
        
        if len(diff1) + len(diff2) > 0:
            print(f"{tests} test WARNING")
            print("Excess in zmap")
            print( "\n".join([getCCasString(cc, pdbREC, pdbLIG) for cc in diff1]), "\n")
            if len(diff2) > 0:
                print("[FATAL] Excess in oracle")
                print( "\n".join([getCCasString(cc, pdbREC, pdbLIG) for cc in diff2]), "\n")
                return x, y
                         
        tests = tests + 1
        if v:
            break
                         
    print(f"{tests} tests successfull")
    return None, None
                         
def lzmapTest(folderTests, inputs, oracle):
                         
    d  = 5
    ro = inputs['recOffset']
    lo = inputs['ligOffset']
                         
    parser = PDB.Parser()
    pdbREC = parser.load(file=f"{folderTests}/{inputs['receptorFile']}")
    pdbDictREC = pdbREC.atomDictorize
    pdbLIG = parser.load(file=f"{folderTests}/{inputs['ligandFile']}")     
    pdbDictLIG = pdbLIG.atomDictorize
    allX = ccmap.lzmap(pdbDictREC, pdbDictLIG, inputs['euler'], inputs['translation'], \
                   offsetRec=ro, offsetLig=lo, distance=d, \
                   encode=True)
                 
    tests = 0
    for x,y in zip(allX, oracle['oracle']):                   
        diff1 = set(x) - set(y)
        diff2 = set(y) - set(x)
        
        if len(diff1) + len(diff2) > 0:
            print(f"{tests} test WARNING")
            print("Excess in zmap")
            print( "\n".join([getCCasString(cc, pdbREC, pdbLIG) for cc in diff1]), "\n")
            if len(diff2) > 0:
                print("[FATAL] Excess in oracle")
                print( "\n".join([getCCasString(cc, pdbREC, pdbLIG) for cc in diff2]), "\n")
                return x, y
                         
        tests = tests + 1
                         
    print(f"{tests} tests successfull")
    return None, None                 
                         
def applyTransform(folderTests, inputs,i, tag="default"):
    parser = PDB.Parser()
    pdbREC = parser.load(file=f"{folderTests}/{inputs['receptorFile']}")
    pdbDictREC = pdbREC.atomDictorize
    pdbLIG = parser.load(file=f"{folderTests}/{inputs['ligandFile']}")     
    pdbDictLIG = pdbLIG.atomDictorize

    e  = inputs['euler'][i]
    t  = inputs['translation'][i]
    d  = 5
    ro = inputs['recOffset']
    lo = inputs['ligOffset']
                         
    x  = ccmap.zmap(pdbDictREC, pdbDictLIG, e, t, \
           offsetRec=tuple(ro), offsetLig=lo, distance=d, \
           encode=True, apply=True)
    pdbREC.setCoordinateFromDictorize(pdbDictREC)
    pdbLIG.setCoordinateFromDictorize(pdbDictLIG)
    with open(f"{folderTests}/{tag}_receptor.pdb", "w") as fp:
        fp.write( str(pdbREC) )
    with open(f"{folderTests}/{tag}ligand.pdb", "w") as fp:
        fp.write( str(pdbLIG) )

    return pdbREC, pdbLIG

def displayCtcs(encodeVec, inputs):
    parser = PDB.Parser()
    pdbREC = parser.load(file=f"{folderTests}/{inputs['receptorFile']}")
    pdbDictREC = pdbREC.atomDictorize
    pdbLIG = parser.load(file=f"{folderTests}/{inputs['ligandFile']}")     
    pdbDictLIG = pdbLIG.atomDictorize
    
    print("Total contacts sel")
    print( "\n".join([getCCasString(cc, pdbREC, pdbLIG) for cc in encodeVec]), "\n")

<a id='bencode_dual'></a> 

## Demonstrating bEncode for single two-body PDB

##### Single call getting integer encode residue-residue contact map

In [13]:
# Setting input parameters
parser = PDB.Parser()
pdbREC = parser.load(file=f"{folderTests}/{inputs['receptorFile']}")
pdbDictREC = pdbREC.atomDictorize
pdbLIG = parser.load(file=f"{folderTests}/{inputs['ligandFile']}")     
pdbDictLIG = pdbLIG.atomDictorize
e  = inputs['euler'][0]
t  = inputs['translation'][0]
d  = 5
ro = inputs['recOffset']
lo = inputs['ligOffset']

# Computing the contact map                    
results = ccmap.zmap(pdbDictREC, pdbDictLIG, e, t, \
                     offsetRec=ro, offsetLig=lo, distance=d, \
                     encode=True)

Visualize the output with
```python 
print(results)
```

##### Single call getting json-string representation of the residue-residue contact map
Note that setting the *apply* optional attribute to True will modify the passed dictionaries of coordinates. So we can generate the corresponding stuctures.

In [3]:
# Setting input parameters
parser = PDB.Parser()
pdbREC = parser.load(file=f"{folderTests}/{inputs['receptorFile']}")
pdbDictREC = pdbREC.atomDictorize
pdbLIG = parser.load(file=f"{folderTests}/{inputs['ligandFile']}")     
pdbDictLIG = pdbLIG.atomDictorize
e  = inputs['euler'][0]
t  = inputs['translation'][0]
d  = 5
ro = inputs['recOffset']
lo = inputs['ligOffset']

# Computing the contact map  
results = ccmap.zmap(pdbDictREC, pdbDictLIG, e, t, \
                     offsetRec=tuple(ro), offsetLig=lo, distance=d, \
                     encode=False, apply=True)

# Dump the conformation specific PDB files
pdbREC.setCoordinateFromDictorize(pdbDictREC)
pdbLIG.setCoordinateFromDictorize(pdbDictLIG)
with open(f"{folderTests}/demo_receptor.pdb", "w") as fp:
    fp.write( str(pdbREC) )
with open(f"{folderTests}/demo_ligand.pdb", "w") as fp:
    fp.write( str(pdbLIG) )

Visualize the output with
```python 
print(results)
```

## No Transformation

<a id='cmap_single'></a> 

## Testing bEncode for two-body PDB

Let's use the previously generated structures

In [7]:
# Setting input parameters
parser = PDB.Parser()
pdbRECa = parser.load(file=f"{folderTests}/demo_receptor.pdb")
pdbDictRECa = pdbREC.atomDictorize
pdbLIGa = parser.load(file=f"{folderTests}/demo_ligand.pdb")     
pdbDictLIGa = pdbLIG.atomDictorize

results_a = ccmap.cmap(pdbDictRECa, y=pdbDictLIGa, d=4.5, encode=False)

Visualize the output with
```python 
print(results_a)
```
which should be identical to the previous one

#### Atomic Contact maps
```json
{ "type":"atomic", "data" : [] }
```

Where `"data"` is a json array of 2-uples. Each 2-uple encoding a pair of atom in contact. Atoms are specified as 4 elements array of the shape `[NAME, RESN, RESID, SEGID]`.


#### Getting the atomic contact map between two bodies

In [None]:
ccmap.cmap(pdbDictRECa, y=pdbDictLIGa, d=4.5, atomic=True)

#### Getting the "self" atomic contact map of one body

In [12]:
results_c = ccmap.cmap(pdbDictRECa, d=4.5, atomic=True)

Visualize the output with
```python 
print(results_c)
```

#### Getting the  atomic contact map between two bodies

In [14]:
results_d = ccmap.cmap(pdbDictRECa, y = pdbDictLIGa, d=4.5, atomic=True)

Visualize the output with
```python 
print(results_c)
```

In [3]:
x, y = lzmapTest(folderTests, inputs, oracle)

Building encoding results
Excess in zmap
HIS 124 :B -- VAL 40 :A
HIS 124 :B -- THR 42 :A 

Excess in zmap
HIS 124 :B -- VAL 40 :A
HIS 124 :B -- THR 42 :A 

Excess in zmap
HIS 124 :B -- VAL 40 :A
HIS 124 :B -- THR 42 :A 

Excess in zmap
HIS 124 :B -- LYS 123 :A
HIS 124 :B -- GDP 220 :A
HIS 124 :B -- GLY 20 :A
HIS 124 :B -- THR 21 :A
HIS 124 :B -- ASP 91 :A
HIS 124 :B -- THR 93 :A
HIS 124 :B -- SER 94 :A 

Excess in zmap
HIS 124 :B -- LYS 167 :A
HIS 124 :B -- LEU 168 :A
HIS 124 :B -- ILE 169 :A
HIS 124 :B -- GLY 170 :A
HIS 124 :B -- PHE 11 :A
HIS 124 :B -- ASN 114 :A
HIS 124 :B -- GLN 84 :A
HIS 124 :B -- PRO 116 :A 

Excess in zmap
HIS 124 :B -- PHE 176 :A
HIS 124 :B -- VAL 177 :A
HIS 124 :B -- LEU 50 :A
HIS 124 :B -- VAL 51 :A
HIS 124 :B -- PHE 52 :A
HIS 124 :B -- HIS 53 :A
HIS 124 :B -- THR 54 :A
HIS 124 :B -- ALA 178 :A
HIS 124 :B -- MET 179 :A
HIS 124 :B -- PRO 180 :A
HIS 124 :B -- ILE 59 :A
HIS 124 :B -- HIS 30 :A 

Excess in zmap
HIS 124 :B -- LYS 123 :A
HIS 124 :B -- GLY 20 :A
HIS

'{ "type":"atomic", "data" : [[ [ "CG", "TRP", "41 ", "A" ], [ "OE2", "GLU", "36 ", "A" ],  4.16 ], [ [ "CD1", "TRP", "41 ", "A" ], [ "CD", "GLU", "36 ", "A" ],  4.22 ], [ [ "CD1", "TRP", "41 ", "A" ], [ "OE2", "GLU", "36 ", "A" ],  3.17 ], [ [ "NE1", "TRP", "41 ", "A" ], [ "OE2", "GLU", "36 ", "A" ],  3.85 ], [ [ "O", "TRP", "41 ", "A" ], [ "CD", "GLU", "36 ", "A" ],   4.3 ], [ [ "O", "TRP", "41 ", "A" ], [ "OE2", "GLU", "36 ", "A" ],  4.15 ], [ [ "CB", "GLU", "42 ", "A" ], [ "NZ", "LYS", "60 ", "A" ],  4.44 ], [ [ "O", "GLU", "42 ", "A" ], [ "NZ", "LYS", "60 ", "A" ],  4.49 ], [ [ "CD", "GLU", "42 ", "A" ], [ "CD", "GLN", "8 ", "A" ],  4.33 ], [ [ "CD", "GLU", "42 ", "A" ], [ "OE1", "GLN", "8 ", "A" ],  4.36 ], [ [ "CD", "GLU", "42 ", "A" ], [ "NE2", "GLN", "8 ", "A" ],  3.42 ], [ [ "OE1", "GLU", "42 ", "A" ], [ "NZ", "LYS", "60 ", "A" ],  2.53 ], [ [ "NH1", "ARG", "120 ", "A" ], [ "NZ", "LYS", "60 ", "A" ],   3.6 ], [ [ "NZ", "LYS", "60 ", "A" ], [ "OE1", "GLU", "42 ", "A" ],  2.53 

In [47]:
pdbRec, pdbLig = applyTransform(folderTests, inputs,1, tag="1A2K_pose_n1")
displayCtcs(x, inputs)

Total contacts sel
GLY 2 :A -- GDP 220 :A
ASP 3 :A -- GDP 220 :A
ASP 3 :A -- LYS 123 :A
ASP 3 :A -- ASP 91 :A
ASP 3 :A -- GLY 20 :A
LYS 4 :A -- GLY 20 :A
LYS 4 :A -- LYS 71 :A
LYS 4 :A -- GLY 19 :A
GLU 8 :A -- LYS 71 :A
SER 12 :A -- LYS 71 :A
SER 12 :A -- GLY 68 :A
SER 12 :A -- TYR 72 :A
ILE 15 :A -- TYR 72 :A
GLN 16 :A -- LYS 71 :A
GLN 16 :A -- TYR 72 :A
GLN 16 :A -- GLY 73 :A
GLN 20 :A -- TYR 72 :A
SER 67 :A -- ARG 76 :A
ILE 68 :A -- GLN 69 :A
ILE 68 :A -- GLY 44 :A
ILE 68 :A -- TYR 72 :A
THR 69 :A -- GLN 69 :A
THR 69 :A -- ALA 41 :A
THR 69 :A -- VAL 45 :A
THR 69 :A -- GLY 44 :A
THR 69 :A -- VAL 40 :A
THR 69 :A -- LEU 43 :A
THR 69 :A -- THR 42 :A
ALA 70 :A -- ALA 41 :A
ALA 70 :A -- GLY 44 :A
ALA 70 :A -- TYR 72 :A
ALA 70 :A -- ALA 67 :A
ALA 70 :A -- TYR 39 :A
GLN 71 :A -- GLY 68 :A
GLN 71 :A -- TYR 72 :A
GLN 71 :A -- ALA 67 :A
GLN 71 :A -- GLN 69 :A
VAL 86 :A -- ALA 41 :A
VAL 86 :A -- TYR 39 :A
GLY 87 :A -- ALA 41 :A
GLN 88 :A -- THR 42 :A
GLN 88 :A -- ARG 76 :A
ILE 96 :A -- ARG 76 :

In [7]:
parser = PDB.Parser()
pdbREC = parser.load(file=f"{folderTests}/{inputs['receptorFile']}")
pdbDictREC = pdbREC.atomDictorize
pdbLIG = parser.load(file=f"{folderTests}/{inputs['ligandFile']}")     
pdbDictLIG = pdbLIG.atomDictorize

i  = 0
e  = inputs['euler'][i]
t  = inputs['translation'][i]
d  = 5
ro = inputs['recOffset']
lo = inputs['ligOffset']
                         
x  = ccmap.lzmap(pdbDictREC, pdbDictLIG, [e], [t], \
           offsetRec=ro, offsetLig=lo, distance=d, \
           encode=True)#, apply=True)
x

Building encoding results


[[7947,
  8122,
  8174,
  8556,
  8557,
  8555,
  8962,
  8961,
  10585,
  10788,
  10789,
  10785,
  11800,
  11949,
  18848,
  18927,
  19050,
  19332,
  19337,
  24008,
  23958,
  23956,
  24159,
  24158,
  24362,
  24360,
  24361,
  24412,
  24564,
  24566,
  24615,
  24616,
  24617,
  24567,
  24769,
  24818,
  24819,
  24817,
  24816,
  24972,
  25132,
  25175,
  25335,
  25225,
  25334,
  25220,
  25340,
  25221,
  25407,
  38574,
  38572,
  38571,
  38777,
  42021,
  42022,
  42023,
  42225,
  42224,
  42428,
  44457,
  44458,
  44660,
  44661,
  44863,
  49329,
  24565,
  25177,
  11397,
  11601,
  24608,
  18723,
  18926,
  25222,
  25223,
  25224,
  25021]]

In [4]:
parser = PDB.Parser()
pdbREC = parser.load(file=f"{folderTests}/{inputs['receptorFile']}")
pdbDictREC = pdbREC.atomDictorize
pdbLIG = parser.load(file=f"{folderTests}/{inputs['ligandFile']}")     
pdbDictLIG = pdbLIG.atomDictorize

i  = 0
e  = inputs['euler'][i]
t  = inputs['translation'][i]
d  = 5
ro = inputs['recOffset']
lo = inputs['ligOffset']
                         
x  = ccmap.lzmap(pdbDictREC, pdbDictLIG, [tuple(e)], [tuple(t)], \
           offsetRec=ro, offsetLig=lo, distance=d, \
           encode=True)

Unpacking -1.961403 2.066354 -2.354699 // 7.200000 16.800000 28.800000
Building encoding results


76

In [4]:
import sys, json

sys.path.append("/Users/guillaumelaunay/work/tmp/ccmap2/build/lib.macosx-10.9-x86_64-3.8")
import ccmap
import pyproteinsExt.structure.coordinates as PDB

folderRoot="/Users/guillaumelaunay/work/tmp/ccmap2/tests"
inputs = json.loads(f"{folderRoot}/poses_specs.json")


e = (-1.9614028226597258, 2.066353592203531, -2.354698828551145)
t = (7.199999999999999, 16.8, 28.799999999999997)
lo = (-67.006, 0.11, -77.27)
ro = (-27.553, -8.229, -80.604)



parser = PDB.Parser()
pdbDictREC = parser.load(file=f"{folderRoot}/1A2K_r_u.pdb").atomDictorize
pdbDictLIG = parser.load(file=f"{folderRoot}/1A2K_l_u.pdb").atomDictorize

pdbDictRECa = parser.load(file=f"{folderRoot}/1A2K_r_u.pdb").atomDictorize
pdbDictLIGb = parser.load(file=f"{folderRoot}/1A2K_l_u.pdb").atomDictorize
for i in range(4):
    d1=ccmap.zmap(pdbDictREC, pdbDictLIG, e, t, offsetRec=list(ro), offsetLig=list(lo), distance=5.0, encode=True, apply=False)
    print(len(d1), ":", d1[0],d1[-1])

d2=ccmap.lzmap(pdbDictRECa, pdbDictLIGb, [e], [t], offsetRec=list(ro), offsetLig=list(lo), distance=5.0, encode=True)
print(len(d2[0]), ":", d2[0][0],d2[0][-1])


pdbDictREC['x'][0]

69 : 7947 49329
69 : 7947 49329
69 : 7947 49329
69 : 7947 49329
Unpacking -1.961403 2.066354 -2.354699 // 7.200000 16.800000 28.800000
Building encoding results
69 : 7947 49329


25.503

In [2]:
eList    = [(-1.9614028226597258, 2.066353592203531, -2.354698828551145), (-0.7010661915165238, 0.951968804736611, -0.5323630719195723)]
tList = [(7.199999999999999, 16.8, 28.799999999999997), (21.599999999999998, -7.199999999999999, -20.4)]
ligOffset = [-67.006, 0.11, -77.27]
recOffset = [-27.553, -8.229, -80.604]

import sys, threading

sys.path.append("/Users/guillaumelaunay/work/tmp/ccmap2/build/lib.macosx-10.9-x86_64-3.8")

import json, ccmap
import pyproteinsExt.structure.coordinates as PDB
folderRoot="/Users/guillaumelaunay/work/tmp/ccmap2/data/lzmap"

parser = PDB.Parser()
pdbDictREC = parser.load(file=f"{folderRoot}/1A2K_r_u.pdb").atomDictorize
pdbDictLIG = parser.load(file=f"{folderRoot}/1A2K_l_u.pdb").atomDictorize

with open(f"{folderRoot}/euler_translate_1A2K_1000.json", 'rb') as fp:
    vectors = json.load(fp)
eulers     = [tuple(_) for _ in vectors['euler']]
translations  = [tuple(_) for _ in vectors['translation']]

nThreads = 5
n=200

def lzThread(d, eulers, translations):
    data = ccmap.lzmap(pdbDictREC, pdbDictLIG, eulers[:n], translations[:n], \
            offsetRec=recOffset, offsetLig=ligOffset, distance=d, encode=True)
    return data

dValues= [ 5.0 for i in range(nThreads) ]
threadPool = [threading.Thread(args=(d,), target=lzThread) for d in dValues]



print(f"Starting {nThreads} threads")

for th in threadPool:
    th.start()

for th in threadPool:
    th.join()

    
#th.join()

#results=[None, None, None]
#for i,d in enumerate(dValues):
#    print(i)
 #   results[i] = ccmap.lzmap(pdbDictREC, pdbDictLIG, eulers[:n], translations[:n], \
 #           offsetRec=recOffset, offsetLig=ligOffset, distance=d, encode=True)


Starting 5 threads
Unpacking 200  euler and translation poses with bEncode:true and D=5.000000
Unpacking 200  euler and translation poses with bEncode:true and D=5.000000
Unpacking 200  euler and translation poses with bEncode:true and D=5.000000
Unpacking 200  euler and translation poses with bEncode:true and D=5.000000
Unpacking 200  euler and translation poses with bEncode:true and D=5.000000


ccmap_compute_zdock_pose_listccmap_compute_zdock_pose_listccmap_compute_zdock_pose_listccmap_compute_zdock_pose_listccmap_compute_zdock_pose_list

Building encoding results
Building encoding results
Building encoding results
Building encoding results
Building encoding results


In [4]:
#residueCCmap