Skip to content
Permalink
Browse files

efficiency in _iter_train_pairs:

 - replacing random.shuffle with random.choice (#4)
 - using set for looking up if candidate negative sample appears as a
   positive sample
  • Loading branch information...
seanmacavaney committed Jul 31, 2019
1 parent a40feaf commit a378205ca46c99f376d27944c928a1ce116e3184
Showing with 4 additions and 5 deletions.
  1. +4 −5 data.py
@@ -67,14 +67,13 @@ def _iter_train_pairs(model, dataset, train_pairs, qrels):
pos_ids = [did for did in train_pairs[qid] if qrels.get(qid, {}).get(did, 0) > 0]
if len(pos_ids) == 0:
continue
random.shuffle(pos_ids)
pos_id = pos_ids[0]
pos_id = random.choice(pos_ids)
pos_ids_lookup = set(pos_ids)
pos_ids = set(pos_ids)
neg_ids = [did for did in train_pairs[qid] if did not in pos_ids]
neg_ids = [did for did in train_pairs[qid] if did not in pos_ids_lookup]
if len(neg_ids) == 0:
continue
random.shuffle(neg_ids)
neg_id = neg_ids[0]
neg_id = random.choice(neg_ids)
query_tok = model.tokenize(ds_queries[qid])
pos_doc = ds_docs.get(pos_id)
neg_doc = ds_docs.get(neg_id)

0 comments on commit a378205

Please sign in to comment.
You can’t perform that action at this time.