Skip to content
This repository has been archived by the owner on Nov 28, 2023. It is now read-only.

added some utility codes #87

Merged
merged 35 commits into from
Aug 12, 2019
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
34d66d3
updated plot_utils.py
LilySnow Jul 2, 2019
adf0460
add SuccessHitRate.py to utils
LilySnow Jul 2, 2019
eb40e12
updates on plot_utils.py
LilySnow Jul 5, 2019
8e3e77b
updated plot_utils.py
LilySnow Jul 8, 2019
7829a47
add count_hits.py
LilySnow Jul 12, 2019
a513ae5
updatedcount_hits.py
LilySnow Jul 13, 2019
a02e32d
added run_slurmFLs.py for cartesius
LilySnow Jul 13, 2019
31103db
updated run_slurmFLs.py
LilySnow Jul 15, 2019
5035ed3
fixed issue85
LilySnow Jul 16, 2019
7b6c0f5
rename get_subset.py get_h5subset.py
LilySnow Jul 16, 2019
a95b7f8
tidy up code
LilySnow Jul 16, 2019
a0789a6
fixed the x-label of success rate plot
LilySnow Jul 16, 2019
4300fe6
update success function
CunliangGeng Jul 22, 2019
dd338a8
tidied up code
LilySnow Jul 25, 2019
a9bc9ac
Merge branch 'successrate' of https://github.com/DeepRank/deeprank in…
LilySnow Jul 25, 2019
e0642e1
tidy up code
LilySnow Jul 25, 2019
18ce03a
added counting cases_wo_hits to plot_utils.py
LilySnow Jul 25, 2019
23efdfd
Update rankingMetrics.py
CunliangGeng Jul 26, 2019
03b8590
Update rankingMetrics.py
CunliangGeng Jul 26, 2019
7a5c1ad
Delete count_hits.py
CunliangGeng Jul 26, 2019
413800c
Update get_h5subset.py
CunliangGeng Jul 26, 2019
23fa906
Delete get_subset.py
CunliangGeng Jul 26, 2019
9ad85e4
added unitest for plot_util.py
LilySnow Jul 26, 2019
6cf93c1
Merge branch 'successrate' of https://github.com/DeepRank/deeprank in…
LilySnow Jul 26, 2019
c44b9f4
add rpy2
CunliangGeng Jul 26, 2019
5705205
Update get_h5subset.py
CunliangGeng Jul 26, 2019
dac611e
Update test_hitrate_successrate.py
CunliangGeng Jul 26, 2019
c2dd6ab
Update setup.py
CunliangGeng Jul 26, 2019
1a0a2b1
Update setup.py
CunliangGeng Jul 26, 2019
67c0404
test import
CunliangGeng Jul 29, 2019
36e6cde
split plot_utils to calcualtion and plot
CunliangGeng Jul 29, 2019
058e867
Merge pull request #95 from DeepRank/test_successrate
CunliangGeng Jul 29, 2019
2ed7614
update h5subset
CunliangGeng Jul 29, 2019
435f02b
update plot_utils
CunliangGeng Jul 29, 2019
3a21ae1
update test_hitrate_sucrate
CunliangGeng Jul 29, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion deeprank/learn/NeuralNet.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,7 +975,7 @@ def _plot_boxplot_class(self,figname):
for pts,t in zip(out,tar):
r = F.softmax(torch.FloatTensor(pts), dim=0).data.numpy()
data[t].append(r[1])
confusion[t][r[1]>0.5] += 1
confusion[t][bool(r[1]>0.5)] += 1

#print(" {:5s}: {:s}".format(l,str(confusion)))

Expand Down
34 changes: 28 additions & 6 deletions deeprank/learn/rankingMetrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ def hitrate(rs):

Example:

>>> r = [0,1,1]
>>> hit_rate(r,nr)
>>> rs = [0,1,1]
>>> hitrate(r)


Attributes:
Expand All @@ -27,14 +27,35 @@ def hitrate(rs):
Returns:
hirate (array): [recall@1,recall@2,...]
"""
nr = np.max((1,np.sum(rs)))
rs = np.array(rs)
nr = np.max((1, np.sum(rs)))
return np.cumsum(rs) / nr


def success(rs):
"""Success for positions ≤ k.

Example:
>>> rs = [0, 0, 1, 0, 1, 0]
>>> success(rs)
[0, 0, 1, 1, 1, 1]

Args:
rs (array): binary relevance array

Returns:
success (array): [success@≤1, success@≤2,...]
"""
success = np.cumsum(rs) > 0

return success.astype(np.int)


def avprec(rs):
return [average_precision(rs[:i]) for i in range(1,len(rs))]
return [average_precision(rs[:i]) for i in range(1, len(rs))]


def recall(rs,nr):
def recall(rs, nr):
"""recall rate
First element is rank 1, Relevance is binray

Expand All @@ -56,6 +77,7 @@ def recall(rs,nr):

return np.sum(rs)/nr


def mean_reciprocal_rank(rs):
"""Score is reciprocal of the rank of the first relevant item

Expand Down Expand Up @@ -272,4 +294,4 @@ def ndcg_at_k(r, k, method=0):
dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
if not dcg_max:
return 0.
return dcg_at_k(r, k, method) / dcg_max
return dcg_at_k(r, k, method) / dcg_max
32 changes: 32 additions & 0 deletions deeprank/utils/count_hits.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env python
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will replace count_hits.py with my code for checking statistics including hits, which is more general.

# Li Xue
# 12-Jul-2019 21:13

# Count the hits.

import h5py

# def count_hits_from_input(hdf5_DIR = None):
#
#
# def main():
#
# hdf5_DIR = '/home/lixue/DBs/BM5-haddock24/hdf5_withGridFeature'
# count_hits_from_input(hdf5_DIR)

#h5FL = '/projects/0/deeprank/BM5/hdf5/hdf5_woGridFeature/000_1ACB.hdf5'
h5FL = '/projects/0/deeprank/BM5/hdf5/hdf5_withGridFeature/013_2ABZ.hdf5'

f = h5py.File(h5FL, 'r')

modelIDs = list(f)

for modelID in modelIDs:
BIN_CLASS = f[modelID + '/targets/BIN_CLASS'][()]
DOCKQ = f[modelID + '/targets/DOCKQ'][()]
FNAT = f[modelID + '/targets/FNAT'][()]
IRMSD = f[modelID + '/targets/IRMSD'][()]

if IRMSD <=4:
print(f"modelID: {modelID}, BIN: {BIN_CLASS}, irmsd: {IRMSD}")

31 changes: 31 additions & 0 deletions deeprank/utils/get_h5subset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env python
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will be updated to be able to extract not only 5 but first N models.

# Li Xue
# 2-May-2019 11:24
#
# Extract five molecules from each hdf5 file and write into a new h5 file.
#
# This script is used to generate a small set for debugging.

import sys
import h5py
import os

h5FL = sys.argv[1]#'001_1GPW.hdf5'
outDIR = sys.argv[2] # '.../'

filename = os.path.basename(h5FL)
new_h5FL = outDIR + filename

f_in = h5py.File(h5FL, 'r')
f_out = h5py.File(new_h5FL,'w')
modelIDs = list(f_in)

for x in modelIDs[0:5]:
print(x)
f_in.copy(f_in[x],f_out)
list(f_out)
f_in.close()
f_out.close()

print(f"{new_h5FL} generated.")

31 changes: 31 additions & 0 deletions deeprank/utils/get_subset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env python
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will be deleted. It is same as get_h5subset.py.

# Li Xue
# 2-May-2019 11:24
#
# Extract five molecules from each hdf5 file and write into a new h5 file.
#
# This script is used to generate a small set for debugging.

import sys
import h5py
import os

h5FL = sys.argv[1]#'001_1GPW.hdf5'
outDIR = sys.argv[2] # '.../'

filename = os.path.basename(h5FL)
new_h5FL = outDIR + filename

f_in = h5py.File(h5FL, 'r')
f_out = h5py.File(new_h5FL,'w')
modelIDs = list(f_in)

for x in modelIDs[0:5]:
print(x)
f_in.copy(f_in[x],f_out)
list(f_out)
f_in.close()
f_out.close()

print(f"{new_h5FL} generated.")

207 changes: 0 additions & 207 deletions deeprank/utils/plot_hitrate.py

This file was deleted.

Loading