In [95]:
import json
from elasticsearch import Elasticsearch
from elasticsearch.helpers import parallel_bulk
import os
from tqdm import tqdm_notebook as tqdm
import time
from lxml import etree
from sklearn.metrics import r2_score
from datetime import timedelta
import numpy as np

In [96]:
def create_es_action(index, doc_id, document):
    return {
        '_index': index,
        '_id': doc_id,
        '_source': document
    }

def pretty_print_result(search_result, fields=None):
    if fields is None:
        fields = []
    res = search_result['hits']
    print(f'Total documents: {res["total"]["value"]}')
    for hit in res['hits']:
        print(f'Doc {hit["_id"]}, score is {hit["_score"]}')
        for field in fields:
            print(f'{field}: {hit["_source"][field]}')


def get_score(search_result):
    res = []
    for hit in search_result['hits']['hits']:
        res.append((hit["_id"], hit["_score"]))
    res.sort(key = lambda x: x[1], reverse = True)
    return res


class Index:
    def __init__(self, index, settings):
        self.index_name = index
        self.settings = settings
        self.es = Elasticsearch([{'host': 'localhost', 'port': 9200, 'timeout': 360}])
        if self.es.indices.exists(index=index):
            self.es.indices.delete(index=index)
        self.es.indices.create(index=index, body=settings)

    def es_actions_generator(self, path_to_docs):
        for doc_name in tqdm(os.listdir(path_to_docs)):
            with open(f"{path_to_docs}/{doc_name}", "r", encoding="utf-8") as inf:
                doc_id = int(''.join(list(filter(str.isdigit, doc_name))))
                doc = json.load(inf)           
            yield create_es_action(self.index_name, doc_id, doc)


    def add_documents(self, path_to_docs):
        try:
            for ok, result in parallel_bulk(self.es, self.es_actions_generator(path_to_docs), queue_size=4, thread_count=4,
                                        chunk_size=1000):
                  if not ok:
                     print(result)
        except Exception as e: 
            print(e)

    def get_doc_by_id(self, doc_id):
        return self.es.get(index=self.index_name, id=doc_id)['_source']

    def search(self, query, *args):
        return self.es.search(index=self.index_name, body=query, size=20)
        # note that size set to 20 just because default value is 10 and we know that we have 12 docs and 10 < 12 < 20



In [97]:
settings_1 = {
    "mappings": {
        "properties": {
            "text": {
                "type": "text"
            }
        }
    }
}

In [98]:
settings = {
        'mappings': {
            'properties': {
                'text': {
                    'type': 'text',
                    'analyzer': 'russian_complex',
                    'search_analyzer': 'russian_complex'
                }
            }
            
        },
        "settings": {
        "analysis" : {
            "analyzer" : {
                "my_analyzer" : {
                    "tokenizer" : "standard",
                    "filter" : ["lowercase", "russian_snow", "english_snow"]
                },
                'russian_complex': {
                    'char_filter': [
                        'yont'
                    ],
                    'tokenizer': 'word_longer_2',
                    'filter': [
                        'lowercase',
                        'russian_snow'
                    ]
                }
            },
            'char_filter': {
                'yont': {
                    'type': 'mapping',
                    'mappings': [
                        'ё => е'
                    ]
                }
            },
            'tokenizer': {
                'word_longer_2': {
                    'type': 'pattern',
                    'pattern': '[a-zA-Z_0-9\u0400-\u04FF]{2,}',
                    'group': 0
                },
                'white_20': {
                    'type': 'whitespace',
                    'max_token_length': 5
                }
            },
            "filter" : {
                "russian_snow" : {
                    "type" : "snowball",
                    "language" : "Russian"
                },
                "english_snow" : {
                    "type" : "snowball",
                    "language" : "English"
                }
            }
        }
    }
}

In [99]:
index = Index("docs", settings_1)

In [64]:
start = time.time()
index.add_documents("res/json")
elapsed = time.time() - start
print(str(timedelta(seconds=elapsed)))

HBox(children=(IntProgress(value=0, max=200001), HTML(value='')))


0:04:22.547225


In [65]:
stem_index = Index('stem_docs', settings)

In [66]:
start = time.time()
stem_index.add_documents("res/json")
elapsed = time.time() - start
print(str(timedelta(seconds=elapsed)))

HBox(children=(IntProgress(value=0, max=200001), HTML(value='')))


0:05:26.785592


In [67]:
stem_index.get_doc_by_id('1000039')

{'text': 'сервисы | о компании | партнерам Описание Рубрикатор Мобильный переводчик SMS-ИНФО - услуга, позволяющая с помощью SMS-запроса оперативно получать информацию различного \nхарактера в текстовом виде. Информация SMS-ИНФО мобильна, доступна 24 часа в сутки на всей территории \nзоны действия сети VELCOM. Ответ на запрос приходит оперативно - в течение нескольких секунд. Для получения информации необходимо отправить SMS-запрос (латиницей или кириллицей) на номер 511. Кроме того, можно сформировать \nтакой SMS-запрос с помощью SIM-карты нового образца с VELCOM-меню. Таблица транслитерации (соответствия \nрусских букв латинским) представлена в разделе Инструкция по SMS-ИНФО. Полный список рубрик SMS-ИНФО можно получить, отправив SMS-запрос HELP на номер 511. Стоимость одного запроса на номер 511 составляет 110 рублей без учета НДС и налога с продаж; для абонентов. Подробнее об услуге SMS-ИНФО читайте в разделе Инструкция по SMS-ИНФО. Внимание! Информацию можно получать и в виде MMS-

In [100]:
class Query:
    def __init__(self, task_id, query, relevant_docs):
        self.task_id = task_id
        self.query = query
        self.relevant_docs = relevant_docs


In [101]:

def json_query(query):
    return {
    'query': {
        'bool': {
            'should': [
                {
                    'match': {
                        'text': query.query
                    }
                }
            ]
        }
    }
}
        


In [102]:
def pagerank_query(query):
    return  {
    'query': {
        'bool': {
            'should': [
                {
                    'match': {
                        'text': query.query
                    }
                },
                {
                    'rank_feature': {
                        'field': 'pagerank',
                        'saturation': {
                            'pivot': 10
                        },
                        'boost': '5.0'
                    }
                }
            ]
        }
    }
}

In [120]:
class Metrics:
    def __init__(self, p, r, r_precision, map_score):
        self.r = r
        self.p = p
        self.r_precision = r_precision
        self.map_score = map_score
    
    def __str__(self):
        return f"r = {self.r}\np = {self.p}\nr_precision = {self.r_precision}\nMAP = {self.map_score}"
    
    __repr__ = __str__


class SearchQualityChecker:
    def __init__(self, queries, index):
        self.queries = queries
        self.index = index
        self.results = {}
        self.metrics = {}
        
    def get_results(self, get_query=json_query):
        r_precision_total = 0
        map_score_total = 0
        r_total = 0
        p_total = 0
        for q in tqdm(self.queries):
            res = self.index.search(get_query(q))
            print(q.task_id)
            pretty_print_result(res)
            scores = get_score(res)
            metric = Metrics(p=self.p(20, q, scores), r=self.r(20, q, scores), r_precision=self.r_precision(q, scores),
                            map_score=self.map_score(q, scores, 20))
            p_total += metric.p
            r_total += metric.r
            r_precision_total += metric.r_precision
            map_score_total += metric.map_score
            self.metrics[q.task_id] = metric
        Q = len(self.queries)
        print(Q)
        return Metrics(p=p_total / Q, r=r_total / Q, r_precision=r_precision_total / Q, map_score=map_score_total / Q)
    
    def r_precision(self, query, search_res_score):
        return self.r(len(query.relevant_docs), query, search_res_score)
    
    def map_score(self, query, search_res_score, n):
        m = 0
        for k in range(1, n):       
            m += self.p(k, query, search_res_score)
        R = len(query.relevant_docs)
        return m / n
    
    def p(self, k, query, search_res_score):
        r = 0
        for doc, _ in search_res_score[:k]:
            if doc in query.relevant_docs:
                r += 1
        return r / k
    
    def r(self, k, query, search_res_score):
        R = len(query.relevant_docs)
        r = 0
        for doc, _ in search_res_score[:k]:
            if doc in query.relevant_docs:
                r += 1
        return r / R if R != 0 else 0 if len(search_res_score) > 0 else 1



In [117]:
def get_relevance():
    res = {}
    xml_tree = etree.parse("data/or_relevant-minus_table.xml")
    root = xml_tree.getroot()
    for task in root.getchildren():
        relevant_docs = set()
        for document in task.getchildren():
            if document.get("relevance") == "vital":
                relevant_docs.add(document.get("id"))
        res[task.get("id")] = relevant_docs
    print(len(res))
    return res

task_id_to_q = {}
def generate_queries_plain_texts():
    relevances = get_relevance()
    xml_tree = etree.parse("data/web2008_adhoc.xml")
    root = xml_tree.getroot()
    res = []
    for task in root.getchildren():
        if task.get("id") is not None:
            for query_text in task.getchildren():
                try:
                    task_id_to_q[task.get("id")] = query_text.text
                    res.append(Query(task.get("id"), query_text.text, relevances[task.get("id")]))
                except:
                    pass
    print(len(res))
    return res

def find_diff_metrics(quality_checker, other_checker, k=20, comp=lambda x : x.map_score):
     res = []
     for task_id in quality_checker.metrics:
        metric = quality_checker.metrics[task_id]
        other_metric = other_checker.metrics[task_id]
        res.append((task_id_to_q[task_id], abs(comp(metric) - comp(other_metric)), metric, other_metric))
     res.sort(reverse=True, key=lambda x: x[1])
     return res

In [112]:
relev = get_relevance()

547


In [113]:
relev['arw30000']

KeyError: 'arw30000'

In [121]:
queries = generate_queries_plain_texts()
#print(queries)
quality_checker = SearchQualityChecker(queries, index)
plain_text_res = quality_checker.get_results()

547
547


HBox(children=(IntProgress(value=0, max=547), HTML(value='')))

arw49633
Total documents: 0
arw49662
Total documents: 0
arw49674
Total documents: 0
arw49692
Total documents: 0
arw49707
Total documents: 0
arw49720
Total documents: 0
arw49723
Total documents: 0
arw49731
Total documents: 0
arw49740
Total documents: 0
arw49745
Total documents: 0
arw49782
Total documents: 0
arw49838
Total documents: 0
arw49843
Total documents: 0
arw49893
Total documents: 0
arw49917
Total documents: 0
arw49924
Total documents: 0
arw49936
Total documents: 0
arw49938
Total documents: 0
arw49944
Total documents: 0
arw49950
Total documents: 0
arw49953
Total documents: 0
arw49958
Total documents: 0
arw49983
Total documents: 0
arw50017
Total documents: 0
arw50040
Total documents: 0
arw50064
Total documents: 0
arw50082
Total documents: 0
arw50104
Total documents: 0
arw50109
Total documents: 0
arw50122
Total documents: 0
arw50191
Total documents: 0
arw50197
Total documents: 0
arw50201
Total documents: 0
arw50227
Total documents: 0
arw50231
Total documents: 0
arw50276
Total docum

arw55770
Total documents: 0
arw55786
Total documents: 0
arw55807
Total documents: 0
arw55811
Total documents: 0
arw55815
Total documents: 0
arw55856
Total documents: 0
arw55859
Total documents: 0
arw55894
Total documents: 0
arw55895
Total documents: 0
arw55963
Total documents: 0
arw55989
Total documents: 0
arw55998
Total documents: 0
arw55999
Total documents: 0
arw56015
Total documents: 0
arw56036
Total documents: 0
arw56043
Total documents: 0
arw56056
Total documents: 0
arw56058
Total documents: 0
arw56068
Total documents: 0
arw56082
Total documents: 0
arw56114
Total documents: 0
arw56131
Total documents: 0
arw56143
Total documents: 0
arw56153
Total documents: 0
arw56155
Total documents: 0
arw56173
Total documents: 0
arw56192
Total documents: 0
arw56202
Total documents: 0
arw56218
Total documents: 0
arw56232
Total documents: 0
arw56247
Total documents: 0
arw56345
Total documents: 0
arw56405
Total documents: 0
arw56407
Total documents: 0
arw56409
Total documents: 0
arw56439
Total docum

In [122]:
plain_text_res

r = 0.09506398537477148
p = 0.0
r_precision = 0.09506398537477148
MAP = 0.0

In [115]:
from pymystem3 import Mystem
from nltk.corpus import stopwords
import nltk
#nltk.download('punkt')
#nltk.download('stopwords')
from string import punctuation
from nltk.corpus import stopwords

russian_stopwords = stopwords.words("russian")
english_stopwords = stopwords.words("english")
black_list = ["°", "№", "©", "...", "//", "://", "</", "\">", "=\"", "=\'", "\r", "\n", "\t"]
stem = Mystem()

def lemmatize(text):
    words = nltk.word_tokenize(text.lower())
    tokens = []
    for word in words:
        tokens.extend(stem.lemmatize(word))
    tokens = [token for token in tokens if token != " " and token.strip() not in punctuation \
              and token not in russian_stopwords and token not in english_stopwords \
              and token not in black_list \
              and token.find("\r") == -1 \
              and token.find("\n") == -1 \
              and token.find("\t") == -1 \
              and not (token.isdigit() and len(token) == 1)]
    return ' '.join(tokens)

def generate_queries_lemmas():
    relevances = get_relevance()
    xml_tree = etree.parse("data/web2008_adhoc.xml")
    root = xml_tree.getroot()
    res = []
    for task in tqdm(root.getchildren()):
        if task.get("id") is not None:
            for query_text in task.getchildren():
                try:
                    res.append(Query(task.get("id"), lemmatize(query_text.text), relevances[task.get("id")]))
                except:
                    pass
    print(len(res))
    return res



In [116]:
queries_lemmas = generate_queries_lemmas()

547


HBox(children=(IntProgress(value=0, max=29232), HTML(value='')))


547


In [78]:
queries_lemmas[8].query

'инвесткапиталбанк'

In [79]:
lemma_index = Index("lemma_docs", settings_1)

start = time.time()
lemma_index.add_documents("data/json_filtered_tokens_texts")
elapsed = time.time() - start
print(str(timedelta(seconds=elapsed)))

HBox(children=(IntProgress(value=0, max=200000), HTML(value='')))


0:03:15.451904


In [123]:
lemma_quality_checker = SearchQualityChecker(queries_lemmas, lemma_index)
lemma_res = lemma_quality_checker.get_results()

HBox(children=(IntProgress(value=0, max=547), HTML(value='')))

arw49633
Total documents: 5080
Doc 914238, score is 13.799336
Doc 1393503, score is 13.701229
Doc 1407642, score is 13.604507
Doc 567839, score is 13.571222
Doc 1385740, score is 13.571222
Doc 1504166, score is 12.54204
Doc 1016244, score is 12.132923
Doc 993272, score is 11.921361
Doc 990390, score is 11.921361
Doc 359461, score is 11.921361
Doc 360857, score is 11.359218
Doc 495806, score is 10.947564
Doc 854005, score is 10.142758
Doc 691503, score is 9.414591
Doc 1340878, score is 9.019914
Doc 1319901, score is 7.9332757
Doc 35586, score is 7.8946466
Doc 114021, score is 7.8873105
Doc 599962, score is 7.8450575
Doc 863522, score is 7.8382044
arw49662
Total documents: 0
arw49674
Total documents: 10000
Doc 1503640, score is 22.287449
Doc 674640, score is 22.287449
Doc 876069, score is 21.912388
Doc 49202, score is 20.201603
Doc 575107, score is 19.590178
Doc 1109272, score is 19.590178
Doc 664883, score is 19.063917
Doc 74265, score is 18.712236
Doc 284872, score is 18.100986
Doc 131

arw49944
Total documents: 10000
Doc 7317, score is 20.949793
Doc 942354, score is 20.436937
Doc 624744, score is 19.993721
Doc 1173677, score is 19.758795
Doc 1480002, score is 19.615366
Doc 362460, score is 19.473494
Doc 1339324, score is 19.33974
Doc 1309622, score is 19.226002
Doc 1369472, score is 19.144844
Doc 1296174, score is 19.084396
Doc 1339827, score is 19.04328
Doc 721485, score is 19.01279
Doc 811305, score is 18.912838
Doc 1295722, score is 18.862757
Doc 1479613, score is 18.62207
Doc 1401652, score is 18.451096
Doc 1314801, score is 18.273771
Doc 857813, score is 18.250877
Doc 378916, score is 17.994081
Doc 696409, score is 17.863583
arw49950
Total documents: 10000
Doc 866689, score is 24.263437
Doc 1299659, score is 24.263437
Doc 489340, score is 24.096928
Doc 494355, score is 23.283482
Doc 1012870, score is 22.982218
Doc 269146, score is 22.97311
Doc 586658, score is 22.898985
Doc 586432, score is 22.726059
Doc 583987, score is 22.726059
Doc 448137, score is 22.604153


arw50276
Total documents: 10000
Doc 4053, score is 22.949055
Doc 675632, score is 19.641657
Doc 878847, score is 19.247711
Doc 771209, score is 19.166359
Doc 422593, score is 18.929113
Doc 26891, score is 18.830061
Doc 47141, score is 18.696009
Doc 649450, score is 18.668884
Doc 1449392, score is 18.485468
Doc 815863, score is 18.419012
Doc 417668, score is 18.243214
Doc 1365208, score is 18.068588
Doc 679077, score is 17.922443
Doc 988276, score is 17.8506
Doc 280888, score is 17.714481
Doc 968919, score is 17.714481
Doc 41888, score is 17.666801
Doc 1253795, score is 17.597158
Doc 474809, score is 17.428158
Doc 522355, score is 17.425522
arw50280
Total documents: 9877
Doc 858190, score is 19.791473
Doc 969597, score is 17.541035
Doc 251914, score is 17.37619
Doc 983109, score is 17.069805
Doc 1136235, score is 16.337282
Doc 872253, score is 16.337282
Doc 1378648, score is 15.773499
Doc 47426, score is 15.609777
Doc 878497, score is 15.38065
Doc 880747, score is 14.811039
Doc 1435138,

arw50554
Total documents: 10000
Doc 345185, score is 24.099396
Doc 262690, score is 24.025612
Doc 345151, score is 24.025612
Doc 1014941, score is 23.87983
Doc 975894, score is 23.87983
Doc 811298, score is 23.512405
Doc 942482, score is 22.724968
Doc 1013100, score is 22.278471
Doc 976116, score is 22.197683
Doc 975879, score is 22.185574
Doc 1099902, score is 21.845137
Doc 262686, score is 21.760603
Doc 345180, score is 21.760603
Doc 1100437, score is 21.694735
Doc 29035, score is 21.60046
Doc 975925, score is 21.44722
Doc 583410, score is 21.39835
Doc 117519, score is 21.18834
Doc 1390461, score is 21.174843
Doc 345130, score is 21.060555
arw50570
Total documents: 1035
Doc 942328, score is 38.501312
Doc 117451, score is 36.326347
Doc 565497, score is 21.746073
Doc 870682, score is 20.051968
Doc 366438, score is 16.123363
Doc 117450, score is 15.802971
Doc 1357039, score is 14.343457
Doc 14701, score is 14.127384
Doc 1457840, score is 14.074379
Doc 271110, score is 14.073408
Doc 1415

arw50882
Total documents: 10000
Doc 555823, score is 16.200846
Doc 1461224, score is 16.167263
Doc 1369813, score is 16.167263
Doc 721769, score is 16.167263
Doc 55205, score is 16.167263
Doc 582597, score is 15.59793
Doc 629298, score is 15.402403
Doc 1463068, score is 15.402403
Doc 986592, score is 15.402403
Doc 723113, score is 15.402403
Doc 500408, score is 15.262093
Doc 1132232, score is 15.248726
Doc 495990, score is 15.1806755
Doc 264600, score is 14.902269
Doc 497097, score is 14.851916
Doc 1017338, score is 14.669085
Doc 550647, score is 14.657657
Doc 1295896, score is 14.651387
Doc 1370494, score is 14.547202
Doc 825538, score is 14.448936
arw50885
Total documents: 10000
Doc 48644, score is 15.000532
Doc 1215048, score is 14.944739
Doc 356431, score is 14.935413
Doc 701450, score is 14.935413
Doc 860954, score is 14.926313
Doc 1249603, score is 14.907803
Doc 1029799, score is 14.907803
Doc 355577, score is 14.796501
Doc 539471, score is 14.796501
Doc 1353433, score is 14.7336

arw51157
Total documents: 5143
Doc 558549, score is 15.908091
Doc 1124634, score is 15.759075
Doc 788121, score is 14.731487
Doc 48499, score is 14.170832
Doc 792826, score is 13.820897
Doc 852519, score is 13.820897
Doc 768594, score is 13.2666855
Doc 1039164, score is 13.033407
Doc 779954, score is 12.964983
Doc 1505165, score is 12.7337475
Doc 1038947, score is 12.7337475
Doc 403567, score is 12.698229
Doc 833413, score is 12.525492
Doc 1224097, score is 12.525492
Doc 1392649, score is 12.421162
Doc 1038325, score is 12.047079
Doc 691863, score is 12.047079
Doc 1505749, score is 11.944106
Doc 774276, score is 11.750982
Doc 1505191, score is 11.588743
arw51174
Total documents: 10000
Doc 885688, score is 31.16859
Doc 793299, score is 28.242865
Doc 596996, score is 28.242865
Doc 793342, score is 28.242865
Doc 887417, score is 28.242865
Doc 793329, score is 28.242865
Doc 1011275, score is 28.242865
Doc 887482, score is 27.980974
Doc 298217, score is 27.78096
Doc 1102411, score is 27.659

arw51477
Total documents: 1683
Doc 639392, score is 10.228449
Doc 1374302, score is 10.141502
Doc 618335, score is 10.113364
Doc 375805, score is 10.093764
Doc 1101508, score is 10.059125
Doc 1373132, score is 9.992448
Doc 374724, score is 9.990441
Doc 1294693, score is 9.980067
Doc 1308517, score is 9.975995
Doc 1267556, score is 9.952096
Doc 1373656, score is 9.943589
Doc 1276679, score is 9.943589
Doc 839066, score is 9.91173
Doc 329954, score is 9.91173
Doc 374725, score is 9.891414
Doc 1350151, score is 9.881308
Doc 1101504, score is 9.879406
Doc 1100350, score is 9.879406
Doc 618951, score is 9.82263
Doc 592921, score is 9.802347
arw51494
Total documents: 1993
Doc 1319611, score is 15.2352295
Doc 1320903, score is 14.197548
Doc 54367, score is 13.92263
Doc 1418701, score is 13.779297
Doc 1271051, score is 13.576499
Doc 646826, score is 11.856376
Doc 1136211, score is 11.851261
Doc 1512351, score is 11.6971655
Doc 984319, score is 11.505921
Doc 647176, score is 11.407853
Doc 10914

arw51835
Total documents: 924
Doc 1014077, score is 11.099433
Doc 893724, score is 10.944355
Doc 574918, score is 10.852521
Doc 1299084, score is 10.627455
Doc 52205, score is 10.564227
Doc 28454, score is 10.538191
Doc 1222770, score is 10.378982
Doc 1484824, score is 10.220455
Doc 1211339, score is 10.189329
Doc 1209978, score is 10.158392
Doc 1331299, score is 10.127642
Doc 295447, score is 10.1170025
Doc 435396, score is 10.006482
Doc 1382285, score is 9.957268
Doc 840302, score is 9.913015
Doc 443498, score is 9.903468
Doc 606670, score is 9.883395
Doc 607881, score is 9.854006
Doc 286656, score is 9.772656
Doc 825659, score is 9.772656
arw51844
Total documents: 10000
Doc 283844, score is 20.656525
Doc 1036895, score is 20.56541
Doc 707211, score is 19.891476
Doc 307027, score is 19.802536
Doc 9119, score is 19.74901
Doc 284803, score is 19.694746
Doc 1136520, score is 19.01958
Doc 687759, score is 17.054613
Doc 1142025, score is 16.944338
Doc 12468, score is 16.896263
Doc 284793,

arw52120
Total documents: 10000
Doc 926878, score is 5.9104567
Doc 565376, score is 5.8994484
Doc 1167383, score is 5.893324
Doc 1100877, score is 5.8875256
Doc 984467, score is 5.886509
Doc 1444069, score is 5.883734
Doc 1032398, score is 5.883159
Doc 1435135, score is 5.8782015
Doc 1278726, score is 5.8771243
Doc 1415992, score is 5.8770185
Doc 1321986, score is 5.8758526
Doc 76983, score is 5.873372
Doc 118903, score is 5.8733625
Doc 801450, score is 5.8728943
Doc 564909, score is 5.8709545
Doc 768014, score is 5.8695006
Doc 1437202, score is 5.866928
Doc 33590, score is 5.866148
Doc 804464, score is 5.8650675
Doc 74923, score is 5.863978
arw52125
Total documents: 3316
Doc 261838, score is 14.896479
Doc 1227844, score is 14.736691
Doc 1225588, score is 14.439638
Doc 1100542, score is 13.993925
Doc 667525, score is 13.146842
Doc 1506536, score is 12.821537
Doc 1376937, score is 12.794041
Doc 1385812, score is 12.691777
Doc 453319, score is 12.532604
Doc 664704, score is 12.485756
Doc

arw52331
Total documents: 521
Doc 1226725, score is 20.658468
Doc 288999, score is 16.594387
Doc 288644, score is 16.594387
Doc 543444, score is 15.504138
Doc 521964, score is 14.44684
Doc 1513428, score is 13.28931
Doc 418377, score is 12.968007
Doc 1488993, score is 12.600571
Doc 1099936, score is 12.444351
Doc 950129, score is 12.380592
Doc 968309, score is 12.380592
Doc 895303, score is 12.380592
Doc 563161, score is 12.380592
Doc 968322, score is 12.380592
Doc 950162, score is 12.380592
Doc 895154, score is 12.380592
Doc 563162, score is 12.380592
Doc 1498650, score is 12.380592
Doc 968253, score is 12.380592
Doc 17259, score is 12.380592
arw52373
Total documents: 10000
Doc 959208, score is 16.702938
Doc 1423129, score is 14.989463
Doc 1483303, score is 14.779626
Doc 868908, score is 14.779155
Doc 1124572, score is 14.26338
Doc 1335960, score is 14.198751
Doc 1246999, score is 14.180422
Doc 1124571, score is 14.121132
Doc 395247, score is 13.951416
Doc 95262, score is 13.951416
Do

arw52592
Total documents: 4173
Doc 1261371, score is 21.248539
Doc 784261, score is 15.739177
Doc 1314929, score is 13.297675
Doc 1005332, score is 12.557145
Doc 817121, score is 12.233827
Doc 413204, score is 12.233827
Doc 1512156, score is 12.21697
Doc 962179, score is 11.470317
Doc 458835, score is 11.416898
Doc 767192, score is 11.080731
Doc 1382701, score is 11.080731
Doc 1032258, score is 8.222207
Doc 1247882, score is 8.138321
Doc 631012, score is 8.125585
Doc 1042858, score is 8.041361
Doc 933281, score is 7.9765353
Doc 1491007, score is 7.9363275
Doc 402831, score is 7.9227595
Doc 1246209, score is 7.875208
Doc 917282, score is 7.8569083
arw52658
Total documents: 3145
Doc 268139, score is 8.963035
Doc 342742, score is 8.946811
Doc 558824, score is 8.946811
Doc 1244342, score is 8.929818
Doc 379989, score is 8.834843
Doc 117359, score is 8.824602
Doc 444199, score is 8.811391
Doc 1099581, score is 8.79842
Doc 1468188, score is 8.792615
Doc 1425658, score is 8.792065
Doc 1276366

arw52863
Total documents: 10000
Doc 591389, score is 16.92527
Doc 33277, score is 14.07008
Doc 341184, score is 13.733011
Doc 1011258, score is 13.15017
Doc 583282, score is 12.259573
Doc 1515916, score is 11.629486
Doc 688754, score is 11.4819565
Doc 265299, score is 11.4819565
Doc 1397037, score is 11.4819565
Doc 379873, score is 11.45671
Doc 975422, score is 11.218019
Doc 581253, score is 11.197847
Doc 1166134, score is 11.197847
Doc 981747, score is 11.197847
Doc 805177, score is 10.92746
Doc 805183, score is 10.669821
Doc 908606, score is 10.637989
Doc 1038157, score is 10.424051
Doc 971519, score is 10.424051
Doc 1056130, score is 9.964982
arw52883
Total documents: 4264
Doc 1479463, score is 16.616117
Doc 420648, score is 13.302734
Doc 1110190, score is 12.657314
Doc 942962, score is 12.618049
Doc 1166417, score is 12.423098
Doc 908724, score is 12.1996565
Doc 107137, score is 11.980234
Doc 1411589, score is 11.959089
Doc 1449279, score is 11.959089
Doc 1111558, score is 11.95908

arw53207
Total documents: 10000
Doc 82135, score is 14.379086
Doc 1225746, score is 13.890604
Doc 428042, score is 12.768988
Doc 391149, score is 12.105948
Doc 1056685, score is 12.086409
Doc 1474358, score is 12.086409
Doc 1456018, score is 12.078672
Doc 15150, score is 12.005146
Doc 519948, score is 11.998784
Doc 632994, score is 11.895075
Doc 865017, score is 11.823892
Doc 544069, score is 11.823892
Doc 1042036, score is 11.805725
Doc 1156804, score is 11.801494
Doc 1358083, score is 11.740604
Doc 924194, score is 11.718252
Doc 55314, score is 11.6778
Doc 903451, score is 11.620518
Doc 687995, score is 11.620518
Doc 445638, score is 11.604671
arw53247
Total documents: 1590
Doc 981278, score is 12.550236
Doc 1332346, score is 10.402282
Doc 444291, score is 10.402282
Doc 555576, score is 10.236873
Doc 683471, score is 10.12512
Doc 809497, score is 9.901794
Doc 1184156, score is 9.742247
Doc 281502, score is 9.708272
Doc 91987, score is 9.705616
Doc 282357, score is 9.64899
Doc 863767,

arw53549
Total documents: 3
Doc 1437026, score is 16.391418
Doc 525231, score is 8.960033
Doc 1185848, score is 7.534296
arw53563
Total documents: 5210
Doc 1381890, score is 38.314686
Doc 1013443, score is 36.698112
Doc 263062, score is 36.13031
Doc 93823, score is 34.459103
Doc 696729, score is 33.97647
Doc 267560, score is 33.80158
Doc 499580, score is 30.693363
Doc 1454039, score is 29.708
Doc 445941, score is 29.235655
Doc 768014, score is 28.719696
Doc 1197037, score is 27.31491
Doc 268686, score is 27.265263
Doc 1139244, score is 27.2006
Doc 1020588, score is 27.069475
Doc 499583, score is 27.041935
Doc 1120266, score is 27.028072
Doc 1410685, score is 26.747646
Doc 547869, score is 26.658535
Doc 1271863, score is 26.635775
Doc 1487726, score is 26.554281
arw53589
Total documents: 9632
Doc 1145863, score is 26.57812
Doc 482545, score is 26.480091
Doc 1183772, score is 26.480091
Doc 641930, score is 26.480091
Doc 361548, score is 26.480091
Doc 641929, score is 26.480091
Doc 939899

arw53738
Total documents: 10000
Doc 625082, score is 20.00069
Doc 1519082, score is 19.307243
Doc 1256027, score is 18.96116
Doc 809115, score is 18.052792
Doc 808365, score is 17.826563
Doc 1293069, score is 17.826563
Doc 1195083, score is 17.826563
Doc 1195090, score is 17.062023
Doc 628187, score is 16.95065
Doc 930312, score is 16.849298
Doc 627440, score is 16.849298
Doc 1251553, score is 16.742628
Doc 970670, score is 16.742628
Doc 113300, score is 16.650959
Doc 1206835, score is 16.650959
Doc 1206045, score is 16.650959
Doc 809113, score is 16.650959
Doc 1519063, score is 16.650959
Doc 809111, score is 16.650959
Doc 1195879, score is 16.650959
arw53743
Total documents: 10000
Doc 266492, score is 28.578897
Doc 571757, score is 25.441935
Doc 497005, score is 24.684238
Doc 41664, score is 24.610907
Doc 822662, score is 24.070648
Doc 1064868, score is 23.119371
Doc 976533, score is 23.10943
Doc 266528, score is 22.34615
Doc 981144, score is 22.155186
Doc 1281936, score is 21.563026


arw53873
Total documents: 10000
Doc 930268, score is 21.071514
Doc 1436188, score is 19.856262
Doc 972400, score is 19.512135
Doc 844834, score is 17.967022
Doc 666473, score is 15.398507
Doc 1103555, score is 15.255659
Doc 444737, score is 15.01556
Doc 595603, score is 14.969457
Doc 1473437, score is 14.705008
Doc 91323, score is 14.487661
Doc 1467282, score is 14.270422
Doc 861931, score is 14.208132
Doc 781831, score is 13.960293
Doc 1131960, score is 13.960293
Doc 19214, score is 13.665497
Doc 19995, score is 13.665497
Doc 844818, score is 13.52719
Doc 467082, score is 13.457834
Doc 1509861, score is 13.309282
Doc 1516282, score is 13.283537
arw53874
Total documents: 10000
Doc 1243992, score is 15.1493435
Doc 363194, score is 14.330003
Doc 1082011, score is 14.247816
Doc 1384168, score is 13.161551
Doc 413648, score is 13.161551
Doc 888436, score is 12.72921
Doc 1323148, score is 12.584463
Doc 814253, score is 12.352543
Doc 624489, score is 12.314097
Doc 23662, score is 12.28207
Do

arw54134
Total documents: 10000
Doc 969894, score is 16.908789
Doc 280687, score is 16.67922
Doc 969886, score is 16.568022
Doc 895187, score is 16.069885
Doc 262508, score is 15.50986
Doc 1258459, score is 15.372868
Doc 951893, score is 15.361183
Doc 951891, score is 15.093506
Doc 105927, score is 15.04859
Doc 265139, score is 14.611222
Doc 497084, score is 14.6102915
Doc 71349, score is 14.534732
Doc 262386, score is 14.328196
Doc 269153, score is 14.318507
Doc 847343, score is 14.063364
Doc 764607, score is 13.970462
Doc 494544, score is 13.913701
Doc 449840, score is 13.792343
Doc 865166, score is 13.741689
Doc 264951, score is 13.7053585
arw54165
Total documents: 406
Doc 1319916, score is 13.346436
Doc 34112, score is 12.681206
Doc 1414662, score is 12.615314
Doc 72621, score is 12.568396
Doc 511351, score is 12.525935
Doc 1041256, score is 12.408085
Doc 1509752, score is 12.385901
Doc 724649, score is 12.377616
Doc 1479374, score is 12.299952
Doc 987189, score is 12.166935
Doc 49

arw54453
Total documents: 880
Doc 116408, score is 22.060673
Doc 1312963, score is 20.435413
Doc 516202, score is 20.065842
Doc 332681, score is 20.065842
Doc 58070, score is 18.712202
Doc 1111998, score is 18.712202
Doc 1157207, score is 16.29727
Doc 1355403, score is 16.29727
Doc 59174, score is 16.29727
Doc 1464746, score is 15.480217
Doc 695575, score is 14.987844
Doc 1311788, score is 14.868698
Doc 1312801, score is 14.868698
Doc 330980, score is 13.787298
Doc 58076, score is 13.40342
Doc 469973, score is 12.92076
Doc 632292, score is 12.447621
Doc 56367, score is 11.6807375
Doc 1028846, score is 11.641571
Doc 1112190, score is 11.424415
arw54459
Total documents: 9778
Doc 997536, score is 15.256807
Doc 994863, score is 15.070854
Doc 376757, score is 15.064968
Doc 1350719, score is 14.934696
Doc 1513382, score is 14.849888
Doc 995648, score is 14.771549
Doc 1507697, score is 14.658503
Doc 1369662, score is 14.629578
Doc 98247, score is 14.625345
Doc 1351462, score is 14.613853
Doc 

arw54588
Total documents: 10000
Doc 562653, score is 18.594326
Doc 818227, score is 13.822156
Doc 1482597, score is 13.440811
Doc 543417, score is 13.286085
Doc 874482, score is 12.609288
Doc 112463, score is 12.31119
Doc 1517136, score is 11.99952
Doc 264644, score is 11.547705
Doc 1302457, score is 11.547705
Doc 659744, score is 11.145444
Doc 978815, score is 11.145444
Doc 1267492, score is 10.561011
Doc 1502225, score is 10.561011
Doc 248799, score is 10.194263
Doc 331500, score is 10.13008
Doc 960438, score is 10.123819
Doc 1011962, score is 10.090495
Doc 806334, score is 9.748201
Doc 294121, score is 9.31957
Doc 926512, score is 9.296198
arw54589
Total documents: 1837
Doc 1101769, score is 19.35949
Doc 1362922, score is 16.441317
Doc 1403043, score is 16.284895
Doc 806960, score is 14.623015
Doc 942390, score is 14.016721
Doc 616401, score is 13.858178
Doc 475876, score is 12.486859
Doc 714536, score is 11.955623
Doc 303540, score is 11.782636
Doc 1237117, score is 11.578497
Doc 6

arw54795
Total documents: 10000
Doc 1514583, score is 21.322556
Doc 392486, score is 19.513378
Doc 406998, score is 16.880152
Doc 258059, score is 16.74067
Doc 505600, score is 16.648506
Doc 1257021, score is 16.489983
Doc 1318317, score is 16.422287
Doc 570572, score is 16.417261
Doc 1458625, score is 16.105452
Doc 565526, score is 16.004381
Doc 62634, score is 15.944899
Doc 1296076, score is 15.519772
Doc 1207608, score is 15.458894
Doc 1326458, score is 15.448164
Doc 1243458, score is 15.364578
Doc 1284975, score is 15.184522
Doc 484652, score is 15.157489
Doc 975481, score is 15.101499
Doc 667106, score is 14.959204
Doc 292189, score is 14.942378
arw54841
Total documents: 10000
Doc 597131, score is 10.376363
Doc 794362, score is 10.319612
Doc 886594, score is 10.245647
Doc 1266884, score is 10.181144
Doc 888628, score is 10.160238
Doc 597993, score is 10.024829
Doc 1268169, score is 10.024829
Doc 886782, score is 10.024829
Doc 794373, score is 9.948458
Doc 888464, score is 9.948458

Doc 1170196, score is 9.413507
Doc 567484, score is 9.361765
arw55045
Total documents: 10000
Doc 294198, score is 17.473225
Doc 851771, score is 17.473225
Doc 293146, score is 17.473225
Doc 866291, score is 17.138798
Doc 1136535, score is 17.039326
Doc 488507, score is 16.872597
Doc 870823, score is 16.608818
Doc 1186747, score is 16.448257
Doc 560338, score is 16.110167
Doc 1136508, score is 16.110167
Doc 1361496, score is 16.081768
Doc 535407, score is 15.986673
Doc 716636, score is 15.986673
Doc 866241, score is 15.813996
Doc 561030, score is 15.813996
Doc 431617, score is 15.605247
Doc 866363, score is 15.480501
Doc 509853, score is 15.259752
Doc 560713, score is 15.229319
Doc 316156, score is 15.200188
arw55066
Total documents: 90
Doc 1076586, score is 15.054999
Doc 992280, score is 14.894327
Doc 925266, score is 14.143327
Doc 1272307, score is 13.952637
Doc 1171073, score is 13.525525
Doc 820472, score is 13.525525
Doc 1129721, score is 13.525525
Doc 1319907, score is 13.082497
D

arw55421
Total documents: 10000
Doc 1259817, score is 12.3737335
Doc 77600, score is 12.068944
Doc 968524, score is 11.974531
Doc 595660, score is 11.816653
Doc 855971, score is 11.816653
Doc 479680, score is 11.79944
Doc 1180765, score is 11.79944
Doc 479679, score is 11.79944
Doc 1146756, score is 11.79944
Doc 856702, score is 11.79944
Doc 981402, score is 11.706053
Doc 1420178, score is 11.648146
Doc 941972, score is 11.63689
Doc 791852, score is 11.607586
Doc 645613, score is 11.453756
Doc 863571, score is 11.350966
Doc 1487548, score is 11.268817
Doc 457621, score is 11.217819
Doc 1276341, score is 11.18487
Doc 258945, score is 11.18487
arw55448
Total documents: 3814
Doc 618280, score is 23.191616
Doc 624875, score is 23.09771
Doc 1293738, score is 22.570492
Doc 1119095, score is 18.18261
Doc 707765, score is 17.47686
Doc 1256684, score is 14.785578
Doc 1519676, score is 14.710144
Doc 278452, score is 13.781896
Doc 323839, score is 13.281343
Doc 1043705, score is 12.716349
Doc 660

arw55609
Total documents: 3371
Doc 1113573, score is 8.872518
Doc 1470678, score is 8.808384
Doc 382495, score is 8.808384
Doc 1139263, score is 8.808384
Doc 1036231, score is 8.808384
Doc 281095, score is 8.808384
Doc 620792, score is 8.808384
Doc 1470680, score is 8.808384
Doc 306315, score is 8.808384
Doc 462448, score is 8.808384
Doc 303233, score is 8.808384
Doc 1470679, score is 8.808384
Doc 1033185, score is 8.808384
Doc 459319, score is 8.808384
Doc 1409795, score is 8.808384
Doc 618633, score is 8.808384
Doc 1036232, score is 8.808384
Doc 281091, score is 8.808384
Doc 705078, score is 8.808384
Doc 1409800, score is 8.808384
arw55639
Total documents: 10000
Doc 1511348, score is 12.959768
Doc 1228774, score is 12.156329
Doc 380837, score is 11.800053
Doc 1510490, score is 11.667295
Doc 554036, score is 11.657457
Doc 704927, score is 11.500686
Doc 1035811, score is 11.500686
Doc 881280, score is 11.480803
Doc 1347837, score is 11.206587
Doc 1274451, score is 10.967402
Doc 486101,

arw55894
Total documents: 10000
Doc 387729, score is 19.060238
Doc 1135815, score is 17.191551
Doc 556704, score is 17.131777
Doc 580620, score is 16.360083
Doc 295083, score is 15.975343
Doc 1084416, score is 15.908112
Doc 1218891, score is 15.898284
Doc 1299420, score is 15.756528
Doc 394683, score is 15.747303
Doc 792448, score is 15.711765
Doc 1082342, score is 15.053974
Doc 772519, score is 15.053974
Doc 1460362, score is 15.028725
Doc 547177, score is 14.841218
Doc 1460353, score is 14.808573
Doc 1426697, score is 14.694637
Doc 93260, score is 14.528345
Doc 723159, score is 14.195434
Doc 1268829, score is 14.165369
Doc 551700, score is 14.149717
arw55895
Total documents: 1101
Doc 872151, score is 20.316324
Doc 1066363, score is 14.486357
Doc 1300160, score is 14.383087
Doc 1361553, score is 14.383087
Doc 678952, score is 14.051785
Doc 1043316, score is 12.742228
Doc 1043510, score is 12.742228
Doc 79768, score is 12.704931
Doc 865773, score is 12.704931
Doc 109302, score is 12.70

arw56173
Total documents: 10000
Doc 1489138, score is 17.7325
Doc 381986, score is 17.67261
Doc 1377453, score is 17.614037
Doc 1490424, score is 17.484669
Doc 1283818, score is 17.321686
Doc 578503, score is 16.33094
Doc 991842, score is 16.250666
Doc 44152, score is 16.16038
Doc 869040, score is 16.16038
Doc 1112775, score is 16.16038
Doc 1217591, score is 16.00217
Doc 1262714, score is 15.902371
Doc 549928, score is 15.88541
Doc 296674, score is 15.690415
Doc 1315604, score is 15.38333
Doc 296570, score is 15.279762
Doc 1487170, score is 15.237532
Doc 289362, score is 15.048439
Doc 623515, score is 15.001775
Doc 1214944, score is 14.9790125
arw56192
Total documents: 4265
Doc 1000332, score is 8.2919855
Doc 1411661, score is 8.2919855
Doc 503424, score is 8.262338
Doc 1366126, score is 8.25256
Doc 525314, score is 8.242855
Doc 378775, score is 8.238884
Doc 1422083, score is 8.233512
Doc 645335, score is 8.230492
Doc 1055180, score is 8.224
Doc 791703, score is 8.222268
Doc 810540, sc

arw56631
Total documents: 10000
Doc 798670, score is 18.68607
Doc 1264432, score is 18.437872
Doc 593280, score is 17.718445
Doc 476510, score is 17.718445
Doc 1480963, score is 17.649654
Doc 1503640, score is 17.524794
Doc 5353, score is 17.35132
Doc 1144510, score is 17.318552
Doc 572168, score is 17.244217
Doc 477259, score is 17.195377
Doc 1090591, score is 17.148563
Doc 858890, score is 17.062614
Doc 703091, score is 16.929153
Doc 1349952, score is 16.895844
Doc 12354, score is 16.565033
Doc 674640, score is 16.558487
Doc 1116795, score is 16.502981
Doc 797343, score is 16.489782
Doc 1418798, score is 16.438068
Doc 798669, score is 16.31219
arw56634
Total documents: 10000
Doc 351063, score is 17.210978
Doc 350656, score is 17.210978
Doc 1242876, score is 17.210978
Doc 1468994, score is 15.352023
Doc 1143179, score is 15.352023
Doc 287866, score is 14.837293
Doc 1229151, score is 14.138861
Doc 941107, score is 13.929463
Doc 336077, score is 13.726273
Doc 335790, score is 13.726273


Doc 376505, score is 21.310488
Doc 1183156, score is 20.877449
Doc 1008521, score is 20.877449
Doc 93895, score is 20.608633
Doc 457547, score is 20.4875
Doc 669633, score is 20.352888
Doc 650809, score is 19.572681
Doc 373526, score is 18.808802
Doc 1339053, score is 18.665237
Doc 289526, score is 18.524584
Doc 1031893, score is 18.506453
Doc 1365780, score is 18.472458
Doc 1088691, score is 18.32307
Doc 1232461, score is 18.283592
Doc 1339051, score is 17.876635
Doc 373512, score is 17.845263
Doc 724644, score is 17.845263
arw56865
Total documents: 10000
Doc 1298824, score is 15.355054
Doc 32213, score is 15.159924
Doc 870119, score is 15.132101
Doc 1001471, score is 15.1004505
Doc 819119, score is 15.037994
Doc 409451, score is 15.035363
Doc 1134971, score is 14.990128
Doc 911877, score is 14.955579
Doc 781520, score is 14.947057
Doc 37290, score is 14.943733
Doc 930794, score is 14.877909
Doc 1167300, score is 14.875786
Doc 1217813, score is 14.850342
Doc 104378, score is 14.850306

arw57198
Total documents: 28
Doc 1366402, score is 16.88967
Doc 1315462, score is 16.88967
Doc 446830, score is 16.88967
Doc 861688, score is 14.254751
Doc 607342, score is 14.048165
Doc 45591, score is 12.29699
Doc 861110, score is 12.29699
Doc 963071, score is 12.29699
Doc 1275012, score is 12.203628
Doc 85865, score is 12.203628
Doc 937573, score is 12.203628
Doc 88949, score is 11.642551
Doc 78531, score is 11.163991
Doc 1518417, score is 10.249338
Doc 1217081, score is 10.2221575
Doc 1252081, score is 9.85064
Doc 982932, score is 9.85064
Doc 625691, score is 9.0739
Doc 900963, score is 9.0739
Doc 1513639, score is 8.157559
arw57204
Total documents: 0
arw57208
Total documents: 23
Doc 869933, score is 16.324434
Doc 560699, score is 16.04657
Doc 1361711, score is 15.516119
Doc 1136944, score is 14.507965
Doc 1135481, score is 13.942958
Doc 560380, score is 13.6390915
Doc 295504, score is 13.069434
Doc 1299025, score is 12.889977
Doc 1187196, score is 12.545455
Doc 1361207, score is 1

arw57561
Total documents: 10000
Doc 1480605, score is 21.129482
Doc 36100, score is 21.112293
Doc 1446336, score is 21.09491
Doc 273796, score is 21.078665
Doc 1001803, score is 21.024513
Doc 1411853, score is 21.010721
Doc 409328, score is 21.010252
Doc 406460, score is 20.999863
Doc 30625, score is 20.977322
Doc 1001794, score is 20.974422
Doc 34045, score is 20.96778
Doc 409886, score is 20.961567
Doc 1131853, score is 20.960703
Doc 21036, score is 20.952192
Doc 908577, score is 20.949484
Doc 1128472, score is 20.932533
Doc 819585, score is 20.927853
Doc 270458, score is 20.926498
Doc 273787, score is 20.924923
Doc 1415168, score is 20.91123
arw57564
Total documents: 10000
Doc 1272965, score is 10.672628
Doc 1419080, score is 10.592924
Doc 1500222, score is 10.592724
Doc 863013, score is 10.569754
Doc 1435299, score is 10.567914
Doc 53337, score is 10.55538
Doc 53444, score is 10.55414
Doc 985067, score is 10.55386
Doc 1353811, score is 10.530202
Doc 366701, score is 10.479406
Doc 1

arw57780
Total documents: 10000
Doc 66224, score is 14.772831
Doc 1136428, score is 14.086187
Doc 442500, score is 13.733908
Doc 792566, score is 13.610376
Doc 37738, score is 13.584221
Doc 671326, score is 13.584221
Doc 756, score is 13.500149
Doc 1256, score is 13.500149
Doc 1064754, score is 13.441223
Doc 1283696, score is 13.434716
Doc 1472200, score is 13.399244
Doc 530247, score is 13.389842
Doc 872815, score is 13.3070545
Doc 37737, score is 13.301415
Doc 901270, score is 13.282023
Doc 53480, score is 13.246046
Doc 986928, score is 13.246046
Doc 1196346, score is 13.242689
Doc 1295306, score is 13.225278
Doc 495229, score is 13.167212
arw57796
Total documents: 545
Doc 288197, score is 11.972996
Doc 826937, score is 11.900883
Doc 941027, score is 11.900883
Doc 1211020, score is 11.89547
Doc 1486360, score is 11.830142
Doc 891675, score is 11.699818
Doc 869844, score is 11.635059
Doc 1300219, score is 11.635059
Doc 449962, score is 11.611083
Doc 1211150, score is 11.258378
Doc 821

Doc 820667, score is 12.002942
Doc 319083, score is 11.688693
Doc 69947, score is 11.333111
Doc 295338, score is 11.271469
arw58075
Total documents: 3
Doc 440227, score is 14.791274
Doc 1502239, score is 8.960033
Doc 794459, score is 8.960033
arw58091
Total documents: 6854
Doc 465440, score is 16.277695
Doc 1369844, score is 16.207653
Doc 1215858, score is 15.603117
Doc 489486, score is 14.764254
Doc 579824, score is 13.774291
Doc 1311702, score is 13.76248
Doc 606020, score is 13.263706
Doc 1304366, score is 13.187832
Doc 1174799, score is 12.720526
Doc 789473, score is 12.718945
Doc 1303808, score is 12.553311
Doc 1522815, score is 12.553311
Doc 1383860, score is 12.316208
Doc 1062867, score is 12.316208
Doc 1438138, score is 12.21697
Doc 1062876, score is 12.114094
Doc 1307866, score is 12.041233
Doc 1508, score is 12.041233
Doc 1472142, score is 12.041233
Doc 1483681, score is 12.041233
arw58105
Total documents: 10000
Doc 254958, score is 14.551487
Doc 306674, score is 14.495684
Do

Doc 1184393, score is 10.463611
Doc 1212955, score is 10.144438
Doc 290692, score is 9.941316
Doc 1413460, score is 9.715718
Doc 1185524, score is 9.641542
Doc 1375331, score is 9.580658
Doc 499930, score is 9.493524
Doc 373197, score is 9.479729
Doc 10556, score is 9.404857
Doc 1102138, score is 9.366734
Doc 837585, score is 9.3624525
Doc 1438155, score is 9.308499
Doc 1342135, score is 9.255008
Doc 1038512, score is 9.241669
Doc 1429272, score is 9.182791
Doc 269044, score is 9.172964
Doc 291730, score is 9.1679325
Doc 1343238, score is 9.1679325
Doc 1008617, score is 9.126504
Doc 1317114, score is 9.12624
arw58367
Total documents: 10000
Doc 289175, score is 23.005093
Doc 1207656, score is 22.657616
Doc 565677, score is 21.709888
Doc 587740, score is 21.450203
Doc 65401, score is 21.422377
Doc 345295, score is 21.272724
Doc 581596, score is 21.26323
Doc 345398, score is 21.056969
Doc 496746, score is 20.31652
Doc 1013397, score is 20.305555
Doc 97296, score is 20.131762
Doc 265994, s

arw58635
Total documents: 10000
Doc 1175993, score is 20.202055
Doc 1432566, score is 19.585688
Doc 13664, score is 19.400328
Doc 1338748, score is 19.320623
Doc 374549, score is 18.909096
Doc 1192644, score is 18.682201
Doc 251909, score is 18.611181
Doc 884083, score is 18.14953
Doc 670299, score is 18.115309
Doc 681982, score is 17.99147
Doc 112544, score is 17.98618
Doc 1027149, score is 17.974731
Doc 884024, score is 17.961994
Doc 324434, score is 17.940313
Doc 1036637, score is 17.819166
Doc 1102292, score is 17.707811
Doc 930956, score is 17.658352
Doc 628778, score is 17.322922
Doc 618395, score is 17.14383
Doc 1180989, score is 17.129427
arw58642
Total documents: 10000
Doc 293718, score is 16.622305
Doc 1016103, score is 16.19177
Doc 1491107, score is 15.559959
Doc 809588, score is 15.497698
Doc 688000, score is 15.3197775
Doc 938863, score is 15.302586
Doc 1037761, score is 15.271928
Doc 63780, score is 15.113415
Doc 884211, score is 15.037497
Doc 443660, score is 15.009531
D

arw58958
Total documents: 6653
Doc 534475, score is 32.570084
Doc 690561, score is 32.206234
Doc 614546, score is 31.906248
Doc 1275812, score is 29.957657
Doc 1147078, score is 29.417034
Doc 621531, score is 29.090626
Doc 620719, score is 28.861893
Doc 386081, score is 28.741215
Doc 1470406, score is 28.640118
Doc 1471385, score is 28.582157
Doc 1033730, score is 28.57412
Doc 283819, score is 28.495167
Doc 459477, score is 28.284939
Doc 1007008, score is 28.121979
Doc 688660, score is 27.73383
Doc 532539, score is 27.621464
Doc 601244, score is 25.93834
Doc 76461, score is 25.93834
Doc 601524, score is 25.93834
Doc 862312, score is 25.93834
arw58970
Total documents: 10000
Doc 16942, score is 13.816998
Doc 942385, score is 13.163893
Doc 257487, score is 13.000159
Doc 1401472, score is 12.903826
Doc 626159, score is 12.842594
Doc 1421173, score is 12.696351
Doc 1402179, score is 12.518756
Doc 388790, score is 12.4870615
Doc 910530, score is 12.426571
Doc 1262228, score is 12.303615
Doc 

In [81]:
lemma_res

r = 0.2535327962534067
p = 0.3577696526508227
r_precision = 0.2121663681252442
MAP = 0.38583429879378817

In [124]:
find_diff_metrics(quality_checker, lemma_quality_checker)

[('борьба с терроризмом', 0.95, r = 0.0
  p = 0.0
  r_precision = 0.0
  MAP = 0.0, r = 0.29411764705882354
  p = 1.0
  r_precision = 0.29411764705882354
  MAP = 0.95), ('форд фокус', 0.95, r = 0.0
  p = 0.0
  r_precision = 0.0
  MAP = 0.0, r = 0.4
  p = 1.0
  r_precision = 0.4
  MAP = 0.95), ('Вакансии медицина фармация', 0.95, r = 0.0
  p = 0.0
  r_precision = 0.0
  MAP = 0.0, r = 0.37037037037037035
  p = 1.0
  r_precision = 0.37037037037037035
  MAP = 0.95), ('гипоксия', 0.95, r = 0.0
  p = 0.0
  r_precision = 0.0
  MAP = 0.0, r = 0.3389830508474576
  p = 1.0
  r_precision = 0.3389830508474576
  MAP = 0.95), ('Фитнес клуб Палестра', 0.95, r = 0.0
  p = 0.0
  r_precision = 0.0
  MAP = 0.0, r = 0.18867924528301888
  p = 1.0
  r_precision = 0.18867924528301888
  MAP = 0.95), ('Спа центры', 0.95, r = 0.0
  p = 0.0
  r_precision = 0.0
  MAP = 0.0, r = 0.2898550724637681
  p = 1.0
  r_precision = 0.2898550724637681
  MAP = 0.95), ('газпромбанк', 0.95, r = 0.0
  p = 0.0
  r_precision = 0.0

In [119]:
id_to_pagerank = {}
with open('res/pagerank.txt','r') as f:
    for line in f:
        docId, docURL, rank = line.split()
        id_to_pagerank[int(docId)] = float(rank)

In [None]:
len(id_to_pagerank)


In [None]:
 for doc_name in tqdm(os.listdir("data/json_filtered_tokens_texts")):
        with open(f"data/json_filtered_tokens_texts/{doc_name}", "r+", encoding="utf-8") as inf:
            doc_id = int(''.join(list(filter(str.isdigit, doc_name))))
            doc = json.load(inf)
            try:
                doc["pagerank"] = id_to_pagerank.get(doc_id)
            except:
                pass
            inf.seek(0)        # <--- should reset file position to the beginning.
            json.dump(doc, inf, indent=4, ensure_ascii=False)
            inf.truncate()

In [None]:
settings_with_pagerank = {
    "mappings": {
        "properties": {
            "text": {
                "type": "text"
            },
            "pagerank": {
                "type": "rank_feature"
            }
        }
    }
}

In [None]:
pr_index = Index("pagerank_index", settings_with_pagerank)

In [None]:
pr_index.add_documents("data/json_filtered_tokens_texts")

In [None]:
pr_quality_checker = SearchQualityChecker(queries_lemmas, pr_index)
pr_res = pr_quality_checker.get_results(pagerank_query)

In [None]:
pr_res

In [None]:
pr_index.get_doc_by_id(1000039)

In [None]:
settings_titles = {
        'mappings': {
            'properties': {
                'content': {
                    'type': 'text',
                },
                'title': {
                    'type': 'text'
                }
            }
            
        }
    }