Skip to content

Commit

Permalink
efficiency in _iter_train_pairs:
Browse files Browse the repository at this point in the history
 - replacing random.shuffle with random.choice (#4)
 - using set for looking up if candidate negative sample appears as a
   positive sample
  • Loading branch information
seanmacavaney committed Jul 31, 2019
1 parent a40feaf commit a378205
Showing 1 changed file with 4 additions and 5 deletions.
9 changes: 4 additions & 5 deletions data.py
Expand Up @@ -67,14 +67,13 @@ def _iter_train_pairs(model, dataset, train_pairs, qrels):
pos_ids = [did for did in train_pairs[qid] if qrels.get(qid, {}).get(did, 0) > 0]
if len(pos_ids) == 0:
continue
random.shuffle(pos_ids)
pos_id = pos_ids[0]
pos_id = random.choice(pos_ids)
pos_ids_lookup = set(pos_ids)
pos_ids = set(pos_ids)
neg_ids = [did for did in train_pairs[qid] if did not in pos_ids]
neg_ids = [did for did in train_pairs[qid] if did not in pos_ids_lookup]
if len(neg_ids) == 0:
continue
random.shuffle(neg_ids)
neg_id = neg_ids[0]
neg_id = random.choice(neg_ids)
query_tok = model.tokenize(ds_queries[qid])
pos_doc = ds_docs.get(pos_id)
neg_doc = ds_docs.get(neg_id)
Expand Down

0 comments on commit a378205

Please sign in to comment.