DeepRank · CunliangGeng · Aug 12, 2019 · Jul 2, 2019 · Jul 2, 2019 · Jul 5, 2019
diff --git a/deeprank/learn/NeuralNet.py b/deeprank/learn/NeuralNet.py
@@ -975,7 +975,7 @@ def _plot_boxplot_class(self,figname):
                 for pts,t in zip(out,tar):
                     r = F.softmax(torch.FloatTensor(pts), dim=0).data.numpy()
                     data[t].append(r[1])
-                    confusion[t][r[1]>0.5] += 1
+                    confusion[t][bool(r[1]>0.5)] += 1
 
                 #print("  {:5s}: {:s}".format(l,str(confusion)))
 

diff --git a/deeprank/learn/rankingMetrics.py b/deeprank/learn/rankingMetrics.py
@@ -16,8 +16,8 @@ def hitrate(rs):
 
     Example:
 
-    >>> r = [0,1,1]
-    >>> hit_rate(r,nr)
+    >>> rs = [0,1,1]
+    >>> hitrate(r)
 
 
     Attributes:
@@ -27,14 +27,35 @@ def hitrate(rs):
     Returns:
         hirate (array): [recall@1,recall@2,...]
     """
-    nr = np.max((1,np.sum(rs)))
+    rs = np.array(rs)
+    nr = np.max((1, np.sum(rs)))
     return np.cumsum(rs) / nr
 
 
+def success(rs):
+    """Success for positions ≤ k.
+
+    Example:
+    >>> rs = [0, 0, 1, 0, 1, 0]
+    >>> success(rs)
+    [0, 0, 1, 1, 1, 1]
+
+    Args:
+        rs (array): binary relevance array
+
+    Returns:
+        success (array): [success@≤1, success@≤2,...]
+    """
+    success = np.cumsum(rs) > 0
+
+    return success.astype(np.int)
+
+
 def avprec(rs):
-    return [average_precision(rs[:i]) for i in range(1,len(rs))]
+    return [average_precision(rs[:i]) for i in range(1, len(rs))]
+
 
-def recall(rs,nr):
+def recall(rs, nr):
     """recall rate
     First element is rank 1, Relevance is binray
 
@@ -56,6 +77,7 @@ def recall(rs,nr):
 
     return np.sum(rs)/nr
 
+
 def mean_reciprocal_rank(rs):
     """Score is reciprocal of the rank of the first relevant item
 
@@ -272,4 +294,4 @@ def ndcg_at_k(r, k, method=0):
     dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
     if not dcg_max:
         return 0.
-    return dcg_at_k(r, k, method) / dcg_max
+    return dcg_at_k(r, k, method) / dcg_max
diff --git a/deeprank/utils/count_hits.py b/deeprank/utils/count_hits.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+# Li Xue
+# 12-Jul-2019 21:13
+
+# Count the hits.
+
+import h5py
+
+# def count_hits_from_input(hdf5_DIR = None):
+#
+#
+# def main():
+#
+#     hdf5_DIR = '/home/lixue/DBs/BM5-haddock24/hdf5_withGridFeature'
+#     count_hits_from_input(hdf5_DIR)
+
+#h5FL = '/projects/0/deeprank/BM5/hdf5/hdf5_woGridFeature/000_1ACB.hdf5'
+h5FL = '/projects/0/deeprank/BM5/hdf5/hdf5_withGridFeature/013_2ABZ.hdf5'
+
+f = h5py.File(h5FL, 'r')
+
+modelIDs = list(f)
+
+for modelID in modelIDs:
+    BIN_CLASS = f[modelID + '/targets/BIN_CLASS'][()]
+    DOCKQ = f[modelID + '/targets/DOCKQ'][()]
+    FNAT =  f[modelID + '/targets/FNAT'][()]
+    IRMSD =  f[modelID + '/targets/IRMSD'][()]
+
+    if IRMSD <=4:
+        print(f"modelID: {modelID}, BIN: {BIN_CLASS}, irmsd: {IRMSD}")
+
diff --git a/deeprank/utils/get_h5subset.py b/deeprank/utils/get_h5subset.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# Li Xue
+#  2-May-2019 11:24
+#
+# Extract five molecules from each hdf5 file and write into a new h5 file.
+#
+# This script is used to generate a small set for debugging.
+
+import sys
+import h5py
+import os
+
+h5FL = sys.argv[1]#'001_1GPW.hdf5'
+outDIR = sys.argv[2] # '.../'
+
+filename = os.path.basename(h5FL)
+new_h5FL = outDIR + filename
+
+f_in = h5py.File(h5FL, 'r')
+f_out = h5py.File(new_h5FL,'w')
+modelIDs = list(f_in)
+
+for x in modelIDs[0:5]:
+    print(x)
+    f_in.copy(f_in[x],f_out)
+list(f_out)
+f_in.close()
+f_out.close()
+
+print(f"{new_h5FL} generated.")
+
diff --git a/deeprank/utils/get_subset.py b/deeprank/utils/get_subset.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# Li Xue
+#  2-May-2019 11:24
+#
+# Extract five molecules from each hdf5 file and write into a new h5 file.
+#
+# This script is used to generate a small set for debugging.
+
+import sys
+import h5py
+import os
+
+h5FL = sys.argv[1]#'001_1GPW.hdf5'
+outDIR = sys.argv[2] # '.../'
+
+filename = os.path.basename(h5FL)
+new_h5FL = outDIR + filename
+
+f_in = h5py.File(h5FL, 'r')
+f_out = h5py.File(new_h5FL,'w')
+modelIDs = list(f_in)
+
+for x in modelIDs[0:5]:
+    print(x)
+    f_in.copy(f_in[x],f_out)
+list(f_out)
+f_in.close()
+f_out.close()
+
+print(f"{new_h5FL} generated.")
+
diff --git a/deeprank/utils/plot_hitrate.py b/deeprank/utils/plot_hitrate.py