In [1]:
import os
import argparse
import subprocess
import random
import tempfile
from tqdm import tqdm
import torch
import modeling
import data
import pytrec_eval
from statistics import mean
from collections import defaultdict



SEED = 42
LR = 0.001
BERT_LR = 2e-5
MAX_EPOCH = 100
BATCH_SIZE = 16
BATCHES_PER_EPOCH = 32
GRAD_ACC_SIZE = 2
#other possibilities: ndcg
VALIDATION_METRIC = 'P_20'
PATIENCE = 20 # how many epochs to wait for validation improvement

torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
random.seed(SEED)


MODEL_MAP = {
    'vanilla_bert': modeling.VanillaBertRanker,
    'cedr_pacrr': modeling.CedrPacrrRanker,
    'cedr_knrm': modeling.CedrKnrmRanker,
    'cedr_drmm': modeling.CedrDrmmRanker
}



In [2]:
def train_iteration(model, optimizer, dataset, train_pairs, qrels):
    
    total = 0
    model.train()
    total_loss = 0.
    with tqdm('training', total=BATCH_SIZE * BATCHES_PER_EPOCH, ncols=80, desc='train', leave=False) as pbar:
        for record in data.iter_train_pairs(model, dataset, train_pairs, qrels, GRAD_ACC_SIZE):
            scores = model(record['query_tok'],
                           record['query_mask'],
                           record['doc_tok'],
                           record['doc_mask'])
            count = len(record['query_id']) // 2
            scores = scores.reshape(count, 2)
            loss = torch.mean(1. - scores.softmax(dim=1)[:, 0]) # pariwse softmax
            loss.backward()
            total_loss += loss.item()
            total += count
            if total % BATCH_SIZE == 0:
                optimizer.step()
                optimizer.zero_grad()
            pbar.update(count)
            if total >= BATCH_SIZE * BATCHES_PER_EPOCH:
                return total_loss


def validate(model, dataset, run, valid_qrels, epoch):
    run_scores = run_model(model, dataset, run)
    metric = VALIDATION_METRIC
    if metric.startswith("P_"):
        metric = "P"
    trec_eval = pytrec_eval.RelevanceEvaluator(valid_qrels, {metric})
    eval_scores = trec_eval.evaluate(run_scores)
    print(eval_scores)
    return mean([d[VALIDATION_METRIC] for d in eval_scores.values()])


def run_model(model, dataset, run, desc='valid'):
    rerank_run = defaultdict(dict)
    with torch.no_grad(), tqdm(total=sum(len(r) for r in run.values()), ncols=80, desc=desc, leave=False) as pbar:
        model.eval()
        for records in data.iter_valid_records(model, dataset, run, BATCH_SIZE):
            scores = model(records['query_tok'],
                           records['query_mask'],
                           records['doc_tok'],
                           records['doc_mask'])
            for qid, did, score in zip(records['query_id'], records['doc_id'], scores):
                rerank_run[qid][did] = score.item()
            pbar.update(len(records['query_id']))
    return rerank_run
    

def write_run(rerank_run, runf):
    '''
        Utility method to write a file to disk. Now unused
    '''
    with open(runf, 'wt') as runfile:
        for qid in rerank_run:
            scores = list(sorted(rerank_run[qid].items(), key=lambda x: (x[1], x[0]), reverse=True))
            for i, (did, score) in enumerate(scores):
                runfile.write(f'{qid} 0 {did} {i+1} {score} run\n')




In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MODEL_MAP["vanilla_bert"]().to(device)

In [4]:
dataset = data.read_datafiles([open("../data/robust/queries.tsv", "rt"), open("../data/trec45_docs.txt", "rt")])
qrels = data.read_qrels_dict(open("../data/robust/qrels", "rt"))
train_pairs = data.read_pairs_dict(open("../data/trec45.fold1.train.pairs", "rt"))
valid_run = data.read_run_dict(open("../data/trec45.fold1.test.pairs", "rt"))

loading datafile (by line): 103089it [00:02, 27652.32it/s]

skipping line: `doc	FR941017-2-00027`


loading datafile (by line): 112060it [00:02, 26402.51it/s]

skipping line: `doc	FR940414-2-00083`
skipping line: `doc	FR940906-2-00080`


                                                          

In [6]:
params = [(k, v) for k, v in model.named_parameters() if v.requires_grad]
non_bert_params = {'params': [v for k, v in params if not k.startswith('bert.')]}
bert_params = {'params': [v for k, v in params if k.startswith('bert.')], 'lr': BERT_LR}
optimizer = torch.optim.Adam([non_bert_params, bert_params], lr=LR)

In [None]:
epoch = 0
top_valid_score = None
print(f'Starting training, upto {MAX_EPOCH} epochs, patience {PATIENCE} LR={LR} BERT_LR={BERT_LR}', flush=True)
for epoch in range(MAX_EPOCH):

    loss = train_iteration(model, optimizer, dataset, train_pairs, qrels)
    print(f'train epoch={epoch} loss={loss}')
    
    if epoch > 0 and epoch % 100 == 0:
        valid_score = validate(model, dataset, valid_run, qrels, epoch)
        print(f'validation epoch={epoch} score={valid_score}')

        if top_valid_score is None or valid_score > top_valid_score:
            top_valid_score = valid_score
            print('new top validation score, saving weights', flush=True)
            model.save(os.path.join(model_out_dir, 'weights.p'))
            top_valid_score_epoch = epoch
        if top_valid_score is not None and epoch - top_valid_score_epoch > PATIENCE:
            print(f'no validation improvement since {top_valid_score_epoch}, early stopping', flush=True)
            break

Starting training, upto 100 epochs, patience 20 LR=0.001 BERT_LR=2e-05


train:  20%|██████▊                           | 102/512 [00:36<02:31,  2.70it/s]

no positive labels for query 672 


train:  50%|█████████████████▏                | 258/512 [01:32<01:36,  2.62it/s]

no positive labels for query 672 


train:   0%|                                            | 0/512 [00:00<?, ?it/s]

train epoch=0 loss=55.30670562386513


train:  16%|█████▋                             | 84/512 [00:28<02:16,  3.13it/s]

no positive labels for query 672 


train:  52%|█████████████████▊                | 268/512 [01:33<01:29,  2.72it/s]

no positive labels for query 672 


train:  86%|█████████████████████████████     | 438/512 [02:34<00:27,  2.68it/s]

no positive labels for query 672 


train:   0%|                                            | 0/512 [00:00<?, ?it/s]

train epoch=1 loss=45.86682218313217


train:  29%|█████████▉                        | 150/512 [00:53<01:37,  3.70it/s]

no positive labels for query 672 


train:  57%|███████████████████▎              | 290/512 [01:41<01:21,  2.73it/s]

no positive labels for query 672 


train:  85%|████████████████████████████▉     | 436/512 [02:32<00:25,  2.98it/s]

no positive labels for query 672 


train:   0%|                                            | 0/512 [00:00<?, ?it/s]

train epoch=2 loss=44.66483294963837


train:  10%|███▌                               | 52/512 [00:19<02:49,  2.71it/s]

no positive labels for query 672 


train:  75%|█████████████████████████▋        | 386/512 [02:21<00:39,  3.20it/s]

no positive labels for query 672 


train:  82%|███████████████████████████▊      | 418/512 [02:32<00:34,  2.73it/s]

no positive labels for query 672 


train:   0%|                                            | 0/512 [00:00<?, ?it/s]

train epoch=3 loss=43.895596116781235


train:  34%|███████████▋                      | 176/512 [00:59<01:57,  2.85it/s]

no positive labels for query 672 


train:  73%|████████████████████████▊         | 374/512 [02:08<00:50,  2.74it/s]

no positive labels for query 672 


train: 100%|█████████████████████████████████▊| 510/512 [02:57<00:00,  2.73it/s]

no positive labels for query 672 


train:   0%|                                            | 0/512 [00:00<?, ?it/s]

train epoch=4 loss=36.274590253829956


train:   1%|▍                                   | 6/512 [00:02<03:07,  2.70it/s]

no positive labels for query 672 


train:  39%|█████████████▏                    | 198/512 [01:14<02:02,  2.57it/s]

no positive labels for query 672 


train:  79%|██████████████████████████▊       | 404/512 [02:26<00:38,  2.80it/s]

no positive labels for query 672 


train:   0%|                                            | 0/512 [00:00<?, ?it/s]

train epoch=5 loss=33.09615474939346


train:  30%|██████████                        | 152/512 [00:51<02:02,  2.93it/s]

no positive labels for query 672 


train:  53%|█████████████████▉                | 270/512 [01:32<01:21,  2.97it/s]

no positive labels for query 672 


train:  89%|██████████████████████████████▍   | 458/512 [02:40<00:20,  2.64it/s]

no positive labels for query 672 


train:   0%|                                            | 0/512 [00:00<?, ?it/s]

train epoch=6 loss=42.27794000506401


train:  21%|███████▏                          | 108/512 [00:38<02:03,  3.28it/s]

no positive labels for query 672 


train:  77%|██████████████████████████        | 392/512 [02:18<00:40,  2.95it/s]

no positive labels for query 672 


train:   0%|                                            | 0/512 [00:00<?, ?it/s]

train epoch=7 loss=36.1418998837471


train:  24%|████████▏                         | 124/512 [00:45<02:24,  2.69it/s]

no positive labels for query 672 


train:  75%|█████████████████████████▎        | 382/512 [02:17<00:47,  2.73it/s]

no positive labels for query 672 


train:   0%|                                            | 0/512 [00:00<?, ?it/s]

train epoch=8 loss=29.864446610212326


train:  18%|██████▍                            | 94/512 [00:33<02:29,  2.79it/s]

no positive labels for query 672 


train:  66%|██████████████████████▍           | 338/512 [01:56<00:58,  2.99it/s]

no positive labels for query 672 


train:   0%|                                            | 0/512 [00:00<?, ?it/s]

train epoch=9 loss=36.376192420721054


train:  19%|██████▋                            | 98/512 [00:34<02:07,  3.25it/s]

no positive labels for query 672 


train:  48%|████████████████▎                 | 246/512 [01:27<01:39,  2.68it/s]

no positive labels for query 672 


valid:   0%|                                          | 0/93907 [00:00<?, ?it/s]

train epoch=10 loss=31.25358495116234


valid:  27%|███████▌                    | 25312/93907 [36:07<1:13:11, 15.62it/s]

In [17]:
total = 0
model.train()
total_loss = 0.
with tqdm('training', total=BATCH_SIZE * BATCHES_PER_EPOCH, ncols=80, desc='train', leave=False) as pbar:
    for record in data.iter_train_pairs(model, dataset, train_pairs, qrels, GRAD_ACC_SIZE):
        scores = model(record['query_tok'],
                       record['query_mask'],
                       record['doc_tok'],
                       record['doc_mask'])
        count = len(record['query_id']) // 2
        scores = scores.reshape(count, 2)
        loss = torch.mean(1. - scores.softmax(dim=1)[:, 0]) # pariwse softmax
        loss.backward()
        total_loss += loss.item()
        total += count
        if total % BATCH_SIZE == 0:
            optimizer.step()
            optimizer.zero_grad()
        pbar.update(count)
       

                                                                                

RuntimeError: Expected object of device type cuda but got device type cpu for argument #3 'index' in call to _th_index_select

In [22]:
model(record['query_tok'],record['query_mask'],record['doc_tok'],record['doc_mask'])

RuntimeError: Expected object of device type cuda but got device type cpu for argument #3 'index' in call to _th_index_select

In [20]:
train_pairs

{'301': {'FBIS4-41991': 1,
  'FBIS4-38364': 1,
  'FBIS4-55395': 1,
  'FBIS4-7811': 1,
  'FBIS3-23986': 1,
  'FBIS3-24143': 1,
  'FBIS3-37418': 1,
  'FBIS4-22471': 1,
  'FBIS4-46734': 1,
  'FBIS4-16951': 1,
  'FBIS3-19646': 1,
  'FBIS3-21961': 1,
  'FBIS4-46846': 1,
  'FBIS4-68801': 1,
  'FBIS3-18129': 1,
  'FBIS4-46780': 1,
  'FBIS3-58058': 1,
  'FBIS3-58028': 1,
  'FBIS3-24277': 1,
  'FBIS4-19535': 1,
  'FBIS4-7688': 1,
  'FBIS3-41247': 1,
  'FBIS3-26415': 1,
  'FBIS3-19199': 1,
  'FBIS4-40260': 1,
  'FBIS4-45477': 1,
  'FBIS4-43801': 1,
  'FBIS4-43965': 1,
  'FBIS4-10817': 1,
  'FBIS4-41541': 1,
  'LA121990-0141': 1,
  'LA102290-0116': 1,
  'LA041789-0010': 1,
  'FBIS3-24145': 1,
  'FBIS4-40936': 1,
  'FBIS4-22596': 1,
  'LA080489-0020': 1,
  'FBIS4-18122': 1,
  'FBIS3-22085': 1,
  'FBIS3-9399': 1,
  'FBIS4-16126': 1,
  'FBIS3-21770': 1,
  'LA100790-0068': 1,
  'FBIS4-54904': 1,
  'LA032790-0103': 1,
  'FBIS3-17175': 1,
  'FBIS4-9451': 1,
  'FBIS4-41396': 1,
  'FBIS3-19420': 1,
  'FB

In [7]:
import train
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = train.MODEL_MAP["vanilla_bert"]().to(device)
dataset = data.read_datafiles([open("../data/robust/queries.tsv"),
                               open("../data/trec45_docs.txt")])
run = data.read_run_dict(open("../data/trec45.fold1.test.pairs"))
model.load("models/vbert/weights.p")

loading datafile (by line): 114936it [00:01, 107983.50it/s]

skipping line: `doc	FR941017-2-00027`
skipping line: `doc	FR940414-2-00083`
skipping line: `doc	FR940906-2-00080`


                                                           

In [8]:
pred = train.run_model(model, dataset, run, desc='rerank')

                                                                                

KeyboardInterrupt: 

In [9]:
P = {'302': {'P_5': 0.6, 'P_10': 0.5, 'P_15': 0.6, 'P_20': 0.55, 'P_30': 0.4666666666666667, 'P_100': 0.35, 'P_200': 0.205, 'P_500': 0.098, 'P_1000': 0.062}, '303': {'P_5': 0.0, 'P_10': 0.2, 'P_15': 0.13333333333333333, 'P_20': 0.2, 'P_30': 0.2, 'P_100': 0.1, 'P_200': 0.05, 'P_500': 0.02, 'P_1000': 0.01}, '309': {'P_5': 0.0, 'P_10': 0.0, 'P_15': 0.0, 'P_20': 0.0, 'P_30': 0.0, 'P_100': 0.0, 'P_200': 0.0, 'P_500': 0.002, 'P_1000': 0.001}, '316': {'P_5': 0.8, 'P_10': 0.8, 'P_15': 0.8, 'P_20': 0.8, 'P_30': 0.8333333333333334, 'P_100': 0.25, 'P_200': 0.125, 'P_500': 0.05, 'P_1000': 0.025}, '317': {'P_5': 0.6, 'P_10': 0.3, 'P_15': 0.26666666666666666, 'P_20': 0.25, 'P_30': 0.16666666666666666, 'P_100': 0.09, 'P_200': 0.045, 'P_500': 0.024, 'P_1000': 0.012}, '319': {'P_5': 0.2, 'P_10': 0.4, 'P_15': 0.3333333333333333, 'P_20': 0.3, 'P_30': 0.3, 'P_100': 0.19, 'P_200': 0.155, 'P_500': 0.078, 'P_1000': 0.09}, '323': {'P_5': 1.0, 'P_10': 0.7, 'P_15': 0.5333333333333333, 'P_20': 0.45, 'P_30': 0.3333333333333333, 'P_100': 0.14, 'P_200': 0.08, 'P_500': 0.034, 'P_1000': 0.018}, '331': {'P_5': 0.8, 'P_10': 0.9, 'P_15': 0.9333333333333333, 'P_20': 0.9, 'P_30': 0.8333333333333334, 'P_100': 0.55, 'P_200': 0.415, 'P_500': 0.184, 'P_1000': 0.107}, '336': {'P_5': 0.2, 'P_10': 0.2, 'P_15': 0.13333333333333333, 'P_20': 0.1, 'P_30': 0.1, 'P_100': 0.03, 'P_200': 0.015, 'P_500': 0.01, 'P_1000': 0.01}, '341': {'P_5': 1.0, 'P_10': 0.9, 'P_15': 0.6666666666666666, 'P_20': 0.55, 'P_30': 0.5, 'P_100': 0.31, 'P_200': 0.175, 'P_500': 0.096, 'P_1000': 0.054}, '356': {'P_5': 0.0, 'P_10': 0.0, 'P_15': 0.06666666666666667, 'P_20': 0.05, 'P_30': 0.03333333333333333, 'P_100': 0.02, 'P_200': 0.01, 'P_500': 0.004, 'P_1000': 0.003}, '357': {'P_5': 0.8, 'P_10': 0.6, 'P_15': 0.6666666666666666, 'P_20': 0.6, 'P_30': 0.6333333333333333, 'P_100': 0.48, 'P_200': 0.33, 'P_500': 0.236, 'P_1000': 0.167}, '370': {'P_5': 0.0, 'P_10': 0.0, 'P_15': 0.0, 'P_20': 0.1, 'P_30': 0.1, 'P_100': 0.26, 'P_200': 0.32, 'P_500': 0.228, 'P_1000': 0.141}, '373': {'P_5': 0.8, 'P_10': 0.6, 'P_15': 0.4666666666666667, 'P_20': 0.45, 'P_30': 0.36666666666666664, 'P_100': 0.12, 'P_200': 0.06, 'P_500': 0.024, 'P_1000': 0.019}, '378': {'P_5': 0.2, 'P_10': 0.1, 'P_15': 0.06666666666666667, 'P_20': 0.05, 'P_30': 0.03333333333333333, 'P_100': 0.04, 'P_200': 0.045, 'P_500': 0.03, 'P_1000': 0.022}, '381': {'P_5': 0.4, 'P_10': 0.2, 'P_15': 0.13333333333333333, 'P_20': 0.1, 'P_30': 0.06666666666666667, 'P_100': 0.03, 'P_200': 0.025, 'P_500': 0.016, 'P_1000': 0.012}, '383': {'P_5': 1.0, 'P_10': 0.6, 'P_15': 0.4666666666666667, 'P_20': 0.45, 'P_30': 0.3, 'P_100': 0.15, 'P_200': 0.1, 'P_500': 0.054, 'P_1000': 0.03}, '392': {'P_5': 0.8, 'P_10': 0.8, 'P_15': 0.8, 'P_20': 0.75, 'P_30': 0.6, 'P_100': 0.34, 'P_200': 0.26, 'P_500': 0.176, 'P_1000': 0.103}, '394': {'P_5': 0.6, 'P_10': 0.3, 'P_15': 0.2, 'P_20': 0.15, 'P_30': 0.1, 'P_100': 0.04, 'P_200': 0.025, 'P_500': 0.012, 'P_1000': 0.006}, '406': {'P_5': 0.8, 'P_10': 0.5, 'P_15': 0.3333333333333333, 'P_20': 0.35, 'P_30': 0.23333333333333334, 'P_100': 0.09, 'P_200': 0.055, 'P_500': 0.024, 'P_1000': 0.013}, '410': {'P_5': 0.8, 'P_10': 0.9, 'P_15': 0.9333333333333333, 'P_20': 0.85, 'P_30': 0.9, 'P_100': 0.41, 'P_200': 0.22, 'P_500': 0.112, 'P_1000': 0.061}, '411': {'P_5': 0.8, 'P_10': 0.6, 'P_15': 0.6, 'P_20': 0.5, 'P_30': 0.43333333333333335, 'P_100': 0.15, 'P_200': 0.075, 'P_500': 0.032, 'P_1000': 0.017}, '414': {'P_5': 0.2, 'P_10': 0.3, 'P_15': 0.2, 'P_20': 0.15, 'P_30': 0.1, 'P_100': 0.06, 'P_200': 0.095, 'P_500': 0.048, 'P_1000': 0.029}, '426': {'P_5': 1.0, 'P_10': 1.0, 'P_15': 0.8666666666666667, 'P_20': 0.8, 'P_30': 0.6333333333333333, 'P_100': 0.39, 'P_200': 0.28, 'P_500': 0.156, 'P_1000': 0.091}, '428': {'P_5': 0.4, 'P_10': 0.4, 'P_15': 0.4, 'P_20': 0.4, 'P_30': 0.3333333333333333, 'P_100': 0.21, 'P_200': 0.17, 'P_500': 0.11, 'P_1000': 0.096}, '433': {'P_5': 0.0, 'P_10': 0.0, 'P_15': 0.0, 'P_20': 0.0, 'P_30': 0.0, 'P_100': 0.0, 'P_200': 0.0, 'P_500': 0.002, 'P_1000': 0.004}, '447': {'P_5': 0.2, 'P_10': 0.1, 'P_15': 0.2, 'P_20': 0.15, 'P_30': 0.1, 'P_100': 0.05, 'P_200': 0.035, 'P_500': 0.02, 'P_1000': 0.011}, '448': {'P_5': 0.2, 'P_10': 0.3, 'P_15': 0.2, 'P_20': 0.15, 'P_30': 0.1, 'P_100': 0.08, 'P_200': 0.065, 'P_500': 0.034, 'P_1000': 0.02}, '601': {'P_5': 0.6, 'P_10': 0.3, 'P_15': 0.2, 'P_20': 0.15, 'P_30': 0.1, 'P_100': 0.03, 'P_200': 0.015, 'P_500': 0.006, 'P_1000': 0.005}, '607': {'P_5': 0.2, 'P_10': 0.2, 'P_15': 0.3333333333333333, 'P_20': 0.25, 'P_30': 0.16666666666666666, 'P_100': 0.07, 'P_200': 0.035, 'P_500': 0.018, 'P_1000': 0.009}, '608': {'P_5': 0.0, 'P_10': 0.0, 'P_15': 0.0, 'P_20': 0.0, 'P_30': 0.0, 'P_100': 0.01, 'P_200': 0.045, 'P_500': 0.03, 'P_1000': 0.021}, '612': {'P_5': 0.8, 'P_10': 0.7, 'P_15': 0.5333333333333333, 'P_20': 0.5, 'P_30': 0.36666666666666664, 'P_100': 0.11, 'P_200': 0.055, 'P_500': 0.03, 'P_1000': 0.015}, '617': {'P_5': 0.2, 'P_10': 0.2, 'P_15': 0.2, 'P_20': 0.25, 'P_30': 0.26666666666666666, 'P_100': 0.18, 'P_200': 0.12, 'P_500': 0.056, 'P_1000': 0.041}, '619': {'P_5': 0.8, 'P_10': 0.7, 'P_15': 0.5333333333333333, 'P_20': 0.65, 'P_30': 0.5, 'P_100': 0.18, 'P_200': 0.09, 'P_500': 0.042, 'P_1000': 0.021}, '635': {'P_5': 0.6, 'P_10': 0.6, 'P_15': 0.6666666666666666, 'P_20': 0.6, 'P_30': 0.4, 'P_100': 0.17, 'P_200': 0.085, 'P_500': 0.034, 'P_1000': 0.017}, '641': {'P_5': 0.8, 'P_10': 0.6, 'P_15': 0.5333333333333333, 'P_20': 0.45, 'P_30': 0.36666666666666664, 'P_100': 0.25, 'P_200': 0.19, 'P_500': 0.092, 'P_1000': 0.047}, '642': {'P_5': 0.2, 'P_10': 0.2, 'P_15': 0.26666666666666666, 'P_20': 0.2, 'P_30': 0.13333333333333333, 'P_100': 0.09, 'P_200': 0.075, 'P_500': 0.052, 'P_1000': 0.038}, '646': {'P_5': 0.2, 'P_10': 0.2, 'P_15': 0.2, 'P_20': 0.15, 'P_30': 0.16666666666666666, 'P_100': 0.09, 'P_200': 0.05, 'P_500': 0.02, 'P_1000': 0.01}, '647': {'P_5': 0.8, 'P_10': 0.5, 'P_15': 0.4, 'P_20': 0.35, 'P_30': 0.3, 'P_100': 0.13, 'P_200': 0.085, 'P_500': 0.04, 'P_1000': 0.02}, '654': {'P_5': 0.4, 'P_10': 0.2, 'P_15': 0.13333333333333333, 'P_20': 0.1, 'P_30': 0.06666666666666667, 'P_100': 0.07, 'P_200': 0.065, 'P_500': 0.034, 'P_1000': 0.02}, '656': {'P_5': 1.0, 'P_10': 0.9, 'P_15': 0.8666666666666667, 'P_20': 0.75, 'P_30': 0.6333333333333333, 'P_100': 0.44, 'P_200': 0.255, 'P_500': 0.114, 'P_1000': 0.067}, '662': {'P_5': 1.0, 'P_10': 0.8, 'P_15': 0.6, 'P_20': 0.45, 'P_30': 0.3, 'P_100': 0.09, 'P_200': 0.045, 'P_500': 0.018, 'P_1000': 0.01}, '665': {'P_5': 0.2, 'P_10': 0.2, 'P_15': 0.3333333333333333, 'P_20': 0.25, 'P_30': 0.2, 'P_100': 0.09, 'P_200': 0.06, 'P_500': 0.026, 'P_1000': 0.017}, '669': {'P_5': 0.0, 'P_10': 0.0, 'P_15': 0.0, 'P_20': 0.0, 'P_30': 0.0, 'P_100': 0.03, 'P_200': 0.015, 'P_500': 0.008, 'P_1000': 0.014}, '670': {'P_5': 0.2, 'P_10': 0.2, 'P_15': 0.2, 'P_20': 0.25, 'P_30': 0.16666666666666666, 'P_100': 0.13, 'P_200': 0.07, 'P_500': 0.028, 'P_1000': 0.014}, '679': {'P_5': 1.0, 'P_10': 0.5, 'P_15': 0.3333333333333333, 'P_20': 0.25, 'P_30': 0.2, 'P_100': 0.06, 'P_200': 0.03, 'P_500': 0.012, 'P_1000': 0.006}, '684': {'P_5': 0.6, 'P_10': 0.3, 'P_15': 0.4, 'P_20': 0.3, 'P_30': 0.23333333333333334, 'P_100': 0.12, 'P_200': 0.115, 'P_500': 0.05, 'P_1000': 0.028}, '690': {'P_5': 0.2, 'P_10': 0.1, 'P_15': 0.06666666666666667, 'P_20': 0.05, 'P_30': 0.03333333333333333, 'P_100': 0.02, 'P_200': 0.02, 'P_500': 0.008, 'P_1000': 0.007}, '692': {'P_5': 0.4, 'P_10': 0.2, 'P_15': 0.3333333333333333, 'P_20': 0.25, 'P_30': 0.2, 'P_100': 0.16, 'P_200': 0.09, 'P_500': 0.044, 'P_1000': 0.023}}


In [12]:
prec = []
for q in P:
    prec.append(P[q]["P_20"])

In [14]:
sum(prec)/len(prec)

0.3336734693877551