In [1]:
import pandas as pd
import numpy as np
pd.set_option('display.max_colwidth', 150)

import pyterrier as pt
import os

import string

In [2]:
if not pt.started():
    pt.init()

PyTerrier 0.7.1 has loaded Terrier 5.6 (built by craigmacdonald on 2021-09-17 13:27)


In [3]:
corpusDf = pd.read_csv('query_11_20_annotation.csv')
corpusDf = corpusDf.astype({'id': str})
corpusDf['text'] = corpusDf['text'].str.replace('\n', ' ')
corpusDf = corpusDf.rename(columns={'id': 'docno', 'label': 'readability'})
corpusDf.head()

Unnamed: 0,text,readability,int-1,int-2,int-3,int-4,int-5,int-6,int-7,int-8,...,int-12,int-13,int-14,int-15,int-16,int-17,int-18,int-19,int-20,docno
0,"Hi! I've been meaning to write for ages and finally today I'm actually doing something about it. Not that I'm trying to make excuses for myself, i...",B2,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,...,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0
1,﻿It was not so much how hard people found the challenge but how far they would go to avoid it that left researchers gobsmacked. The task? To sit i...,B2,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1
2,"Keith recently came back from a trip to Chicago, Illinois. This midwestern metropolis is found along the shore of Lake Michigan. During his visit,...",B2,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
3,"The Griffith Observatory is a planetarium, and an exhibit hall located in Los Angeles's Griffith Park. It features several astronomical displays a...",B2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,3
4,-LRB- The Hollywood Reporter -RRB- It's official: AMC's The Walking Dead companion series has a title. Executive producer Robert Kirkman announced...,B2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4


In [4]:
queryDf = pd.read_csv('all_queries.csv')
queryDf = queryDf.rename(columns={'QueryId': 'qid', 'Query Description': 'query'})
queryDf

Unnamed: 0,qid,query,Readability
0,1,What is father's love like?,1
1,1,What is father's love like?,0
2,1,What is father's love like?,2
3,2,Is climbing mountains happy?,1
4,3,Why do people eat apples?,1
5,4,Suggestions on where to travel.,1
6,5,How do I repair my phone?,1
7,6,Clothes to wear in winter.,1
8,7,What is a healthy lifestyle?,1
9,8,How do I learn to drive cars?,1


In [5]:
# preprocessing queries
## eliminating punctuations
queryDf['query'] = queryDf['query'].apply(lambda s: s.translate(str.maketrans('', '', string.punctuation)))
queryDf

Unnamed: 0,qid,query,Readability
0,1,What is fathers love like,1
1,1,What is fathers love like,0
2,1,What is fathers love like,2
3,2,Is climbing mountains happy,1
4,3,Why do people eat apples,1
5,4,Suggestions on where to travel,1
6,5,How do I repair my phone,1
7,6,Clothes to wear in winter,1
8,7,What is a healthy lifestyle,1
9,8,How do I learn to drive cars,1


In [6]:
topics = queryDf.drop(columns=['Readability'])
topics = topics.astype({'qid': str})
# choose training data
train_topics = topics[15:]
train_topics = train_topics.reset_index()
train_topics = train_topics.drop(columns=['index'])
train_topics

Unnamed: 0,qid,query
0,14,What kind of advice would you give to a freshman at your college
1,15,How did the elderly in your family adjust to the age of the Internet
2,16,Any tips for travelling abroad
3,17,Share a scientific explanation for a common natural phenomenon
4,18,How do international students view America
5,19,Has any of your family members experienced the Second World War What was it like
6,20,What kind of advice would you give to a new employee at your company


In [7]:
# choose test data
test_topics = topics[12:14]
test_topics

Unnamed: 0,qid,query
12,11,Whats your favorite cooking recipe
13,12,How did you come up with names for your pets


In [8]:
qid = str(topics['qid'][0])
labelColName = 'int-' + qid
qrels = pd.DataFrame(data={'qid': qid, 'docno': corpusDf['docno'], 'label': corpusDf[labelColName]})

for qid in topics['qid'][1:]:
    qid = str(qid)
    # qid = '12'
    # 'int-12' column in corpusDf means the 12th query
    labelColName = 'int-' + qid
    tempDf = pd.DataFrame(data={'qid': qid, 'docno': corpusDf['docno'], 'label': corpusDf[labelColName]})
    qrels = qrels.append(tempDf, ignore_index=True)
qrels = qrels.astype({'label': int})
# qrels['iteration'] = '2.5'
qrels

Unnamed: 0,qid,docno,label
0,1,0,1
1,1,1,0
2,1,2,0
3,1,3,0
4,1,4,0
...,...,...,...
32863,20,1489,0
32864,20,1490,0
32865,20,1491,0
32866,20,1492,0


In [9]:
index_dir = './first_index'
indexer = pt.DFIndexer(index_dir, blocks=True, overwrite=True)
index_ref = indexer.index(corpusDf["text"], corpusDf["text"], corpusDf["docno"], corpusDf['readability'])
index_ref.toString()

'./first_index/data.properties'

In [10]:
index = pt.IndexFactory.of(index_ref)

In [11]:
bm25 = pt.BatchRetrieve(index, wmodel="BM25")

In [12]:
bm25.search("What is your favorite cooking recipe")[:10]

Unnamed: 0,qid,docid,docno,rank,score,query
0,1,32,32,0,11.718433,What is your favorite cooking recipe
1,1,17,17,1,10.66591,What is your favorite cooking recipe
2,1,30,30,2,10.5086,What is your favorite cooking recipe
3,1,1475,1475,3,10.484189,What is your favorite cooking recipe
4,1,1061,1061,4,9.975495,What is your favorite cooking recipe
5,1,651,651,5,9.770045,What is your favorite cooking recipe
6,1,820,820,6,9.616181,What is your favorite cooking recipe
7,1,1054,1054,7,9.46745,What is your favorite cooking recipe
8,1,1127,1127,8,9.031279,What is your favorite cooking recipe
9,1,643,643,9,8.219655,What is your favorite cooking recipe


In [13]:
pt.Experiment(
    [bm25],
    train_topics,
    qrels,
    names=['bm25'],
    eval_metrics=["map", "ndcg", "ndcg_cut_5", "ndcg_cut_10"])

Unnamed: 0,name,map,ndcg,ndcg_cut_5,ndcg_cut_10
0,bm25,0.884952,0.806663,0.360986,0.446218


In [14]:
sdm = pt.rewrite.SDM()
qe = pt.rewrite.Bo1QueryExpansion(index)

In [15]:
# solve the metadata confusion, could use readability level as a feature to train
pt.text.get_text(index, ["text"])

<pyterrier.transformer.ApplyGenericTransformer at 0x2af2a3b7e790>

In [16]:
RANK_CUTOFF = 100
SEED=42

int_feats = (bm25 % RANK_CUTOFF) >> pt.text.get_text(index, ["readability"]) >> (
    pt.transformer.IdentityTransformer()
    ** # sequential dependence
    (sdm >> bm25)
#     ** # match the intermediate readability
#     (pt.apply.doc_score(lambda row: int("B" in row["readability"])*20))
)

# for reference, lets record the feature names here too
fnames=["BM25", "SDM", "Readability"]

In [17]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(n_estimators=400, verbose=1, random_state=SEED, n_jobs=2)

rf_pipe = int_feats >> pt.ltr.apply_learned_model(rf)

%time rf_pipe.fit(train_topics, qrels)

[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.1s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.2s


CPU times: user 3.21 s, sys: 282 ms, total: 3.5 s
Wall time: 2.95 s


[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.4s finished


In [18]:
bm25_top100 = bm25 % RANK_CUTOFF
pt.Experiment(
    [rf_pipe, bm25_top100],
    topics,
    qrels, 
    names=['rf', 'bm25'],
    eval_metrics=["map", "ndcg", "ndcg_cut_10", "mrt"])

[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.1s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.2s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.4s finished


Unnamed: 0,name,map,ndcg,ndcg_cut_10,mrt
0,rf,0.712573,0.807924,0.872683,300.298351
1,bm25,0.735113,0.771651,0.773166,202.399461


In [22]:
import fastrank

train_request = fastrank.TrainRequest.coordinate_ascent()

params = train_request.params
params.init_random = True
params.normalize = True
params.seed = 1234567

ca_pipe = int_feats >> pt.ltr.apply_learned_model(train_request, form='fastrank')

%time ca_pipe.fit(train_topics, qrels)

AssertionError: 

In [23]:
# first compute the portion of matched readability level
# use qid, docid, score to match expected readability level (define a weight function) to adjust rank
# then recompute ndcg, map, the portion of matched readability level, etc. by hand to compare

In [24]:
import onir_pt
import pyterrier_doc2query

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [25]:
knrm = onir_pt.reranker('knrm', 'wordvec_hash', text_field='text')

config file not found: config
[2021-12-11 22:31:32,288][WordvecHashVocab][DEBUG] [starting] reading cached at /home/zyzhe/data/onir/vocab/wordvec_hash/fasttext-wiki-news-300d-1M.p
[2021-12-11 22:31:33,764][WordvecHashVocab][DEBUG] [finished] reading cached at /home/zyzhe/data/onir/vocab/wordvec_hash/fasttext-wiki-news-300d-1M.p [1.49s]


In [26]:
br = pt.BatchRetrieve(index) % RANK_CUTOFF
pipeline1 = br >> pt.text.get_text(index, 'text') >> knrm
pipeline1.fit(
    train_topics,
    qrels)

[2021-12-11 22:31:41,047][onir_pt][DEBUG] using GPU (deterministic)
[2021-12-11 22:31:45,172][onir_pt][DEBUG] [starting] training
[2021-12-11 22:31:45,174][onir_pt][DEBUG] [starting] train pairs


                                                                             

[2021-12-11 22:31:49,119][onir_pt][DEBUG] [finished] train pairs: [3.94s] [1024it] [259.71it/s]


                                          

[2021-12-11 22:31:49,123][onir_pt][DEBUG] [finished] training [3.95s]
[2021-12-11 22:31:49,124][onir_pt][INFO] training   it=0 loss=0.2157


In [27]:
pt.Experiment(
    [rf_pipe, bm25_top100, pipeline1],
    test_topics,
    qrels,
    names=['RF', 'BM25', 'DPH >> KNRM'],
    eval_metrics=["map", "ndcg", 'ndcg_cut.10', 'P.10', 'mrt']
)

[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.1s finished


[2021-12-11 22:31:50,417][onir_pt][DEBUG] using GPU (deterministic)
[2021-12-11 22:31:50,419][onir_pt][DEBUG] [starting] batches


                                                        

[2021-12-11 22:31:50,550][onir_pt][DEBUG] [finished] batches: [129ms] [50it] [387.72it/s]


Unnamed: 0,name,map,ndcg,ndcg_cut.10,P.10,mrt
0,RF,0.649307,0.722958,0.60167,0.9,292.7775
1,BM25,0.672997,0.804989,0.884659,1.0,156.41565
2,DPH >> KNRM,0.577674,0.632757,0.377966,0.75,243.933837


In [28]:
del knrm # clear out memory from KNRM (useful for GPU)
bert = onir_pt.reranker('vanilla_transformer', 'bert', text_field='text', vocab_config={'train': True})

In [29]:
pipeline2 = bm25 >> pt.text.get_text(index, 'text') >> bert
pipeline2.fit(
    train_topics,
    qrels)

[2021-12-11 22:31:58,493][onir_pt][DEBUG] using GPU (deterministic)
[2021-12-11 22:31:58,630][onir_pt][DEBUG] [starting] training
[2021-12-11 22:31:58,632][onir_pt][DEBUG] [starting] train pairs


                                                                         

RuntimeError: CUDA out of memory. Tried to allocate 70.00 MiB (GPU 0; 15.78 GiB total capacity; 13.90 GiB already allocated; 45.50 MiB free; 14.38 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
pt.Experiment(
    [rf_pipe, bm25_top100, pipeline1, pipeline2],
    test_topics,
    qrels,
    names=['RF', 'BM25', 'DPH >> KNRM', 'BM25 >> BERT'],
    eval_metrics=["map", "ndcg", 'ndcg_cut.10', 'P.10', 'mrt']
)

In [30]:
example_results = rf_pipe.search(topics['query'][13])
example_results[:10]

[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


Unnamed: 0,qid,docid,docno,score,query,readability,features,rank
24,1,995,995,2.96325,How did you come up with names for your pets,B1,"[5.113012050337778, 5.113012050337778]",0
2,1,548,548,2.5875,How did you come up with names for your pets,A2,"[15.432539997331556, 15.585670577626193]",1
16,1,1028,1028,2.295,How did you come up with names for your pets,A1,"[5.905342808881625, 5.905342808881625]",2
25,1,362,362,1.7175,How did you come up with names for your pets,A2,"[5.097060048155976, 5.097060048155976]",3
9,1,82,82,1.6525,How did you come up with names for your pets,B2,"[10.351873941780271, 10.351873941780271]",4
1,1,723,723,1.64,How did you come up with names for your pets,C1,"[16.189571317158613, 16.27085192611112]",5
27,1,311,311,1.3425,How did you come up with names for your pets,A2,"[5.028265931716659, 5.099897412595251]",6
43,1,1476,1476,1.297096,How did you come up with names for your pets,C2,"[4.329630568960405, 4.329630568960405]",7
42,1,37,37,1.282096,How did you come up with names for your pets,B2,"[4.332982536013016, 4.332982536013016]",8
5,1,365,365,1.165,How did you come up with names for your pets,A2,"[11.889049577273891, 11.889049577273891]",9


In [69]:
# phase 2 (?): re-rank the top 100 results from models in phase 1
def code2num(code):
    if 'A' in code or code == 'ele':
        return 0
    if 'B' in code or code == 'int':
        return 1
    if 'C' in code or code == 'adv':
        return 2
    print("Invalid code")
    return -1


def rerank(model, query, readability, top_N):
    results = model.search(query)
    rs = np.array(results['readability'].apply(lambda x: code2num(x)))
    R = code2num(readability)
    b = 5
    weights = 1/((rs-R)*(rs-R)+b)
    results['rerank_score'] = results['score'] * weights
    results = results.sort_values(by=['rerank_score'], ascending=False)
    results = results.reset_index()
    print(weights)
    return results[:top_N]

In [71]:
example_results_2 = rerank(rf_pipe, topics['query'][13], 'int', 10)
example_results_2

[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


[0.2        0.16666667 0.16666667 0.16666667 0.2        0.16666667
 0.16666667 0.16666667 0.2        0.16666667 0.2        0.16666667
 0.2        0.16666667 0.16666667 0.16666667 0.16666667 0.16666667
 0.16666667 0.16666667 0.16666667 0.16666667 0.16666667 0.16666667
 0.16666667 0.16666667 0.16666667 0.16666667 0.16666667 0.16666667
 0.2        0.2        0.16666667 0.16666667 0.16666667 0.16666667
 0.2        0.16666667 0.16666667 0.16666667 0.16666667 0.2
 0.16666667 0.16666667 0.16666667 0.2        0.16666667 0.2
 0.2        0.2        0.16666667 0.16666667 0.2        0.16666667
 0.16666667 0.16666667 0.16666667 0.2        0.16666667 0.16666667
 0.16666667 0.2        0.16666667 0.16666667 0.2        0.2
 0.16666667 0.2        0.16666667 0.16666667 0.16666667 0.2
 0.16666667 0.2        0.16666667 0.2        0.16666667 0.2
 0.2        0.16666667 0.16666667 0.2        0.16666667 0.16666667
 0.2        0.2        0.16666667 0.16666667 0.16666667 0.16666667
 0.2        0.2        0.16666

Unnamed: 0,index,qid,docid,docno,score,query,readability,features,rank,rerank_score
0,24,1,995,995,2.96325,How did you come up with names for your pets,B1,"[5.113012050337778, 5.113012050337778]",0,0.59265
1,2,1,548,548,2.5875,How did you come up with names for your pets,A2,"[15.432539997331556, 15.585670577626193]",1,0.43125
2,16,1,1028,1028,2.295,How did you come up with names for your pets,A1,"[5.905342808881625, 5.905342808881625]",2,0.3825
3,9,1,82,82,1.6525,How did you come up with names for your pets,B2,"[10.351873941780271, 10.351873941780271]",4,0.3305
4,25,1,362,362,1.7175,How did you come up with names for your pets,A2,"[5.097060048155976, 5.097060048155976]",3,0.28625
5,1,1,723,723,1.64,How did you come up with names for your pets,C1,"[16.189571317158613, 16.27085192611112]",5,0.273333
6,42,1,37,37,1.282096,How did you come up with names for your pets,B2,"[4.332982536013016, 4.332982536013016]",8,0.256419
7,6,1,997,997,1.15,How did you come up with names for your pets,B1,"[11.305271781907903, 11.305271781907903]",10,0.23
8,27,1,311,311,1.3425,How did you come up with names for your pets,A2,"[5.028265931716659, 5.099897412595251]",6,0.22375
9,23,1,80,80,1.0925,How did you come up with names for your pets,B2,"[5.278943898393761, 5.423070183221565]",12,0.2185


In [77]:
# phase 1 metric writing
def naive_metric_phase1(model, topics, readability, corpusDf, top_N):
    readability_code = ''
    if readability == 'int':
        readability_code = 'B'
    if readability == 'adv':
        readability_code = 'C'
    if readability == 'ele':
        readability_code = 'A'
    for i in range(len(topics)):
        query = topics['query'][i]
        top_results = model.search(query)[:top_N]
        col_name = 'int-' + topics['qid'][i]
        total_true_scores = 0
        matched_readability_docs = 0
        for docid in top_results['docid']:
            total_true_scores += int(corpusDf.loc[corpusDf['docno'] == str(docid)].reset_index()[col_name][0])
            # print(docid)
            temp = corpusDf.loc[corpusDf['docno'] == str(docid)].reset_index()
            # print(temp)
            if readability_code in temp['readability'][0]:
                matched_readability_docs += 1
        print("For query ", i, query)
        print("The total scores is ", total_true_scores)
        print("The matched readability document portion is ", matched_readability_docs/top_N)
        print("\n")

def naive_metric(results, qid, readability, corpusDf, top_N):
    readability_code = ''
    if readability == 'int':
        readability_code = 'B'
    if readability == 'adv':
        readability_code = 'C'
    if readability == 'ele':
        readability_code = 'A'
    top_results = results[:top_N]
    col_name = 'int-' + str(qid)
    total_true_scores = 0
    matched_readability_docs = 0
    for docid in top_results['docid']:
        temp = corpusDf.loc[corpusDf['docno'] == str(docid)].reset_index()
        # print(temp)
        total_true_scores += int(temp[col_name][0])
        print(docid)
        if readability_code in temp['readability'][0]:
            matched_readability_docs += 1
    return (total_true_scores, matched_readability_docs/top_N)

In [84]:
# qid is 12 for query 13 (12 is the order of distinct queries (start from 1), 
# 13 is the order of distinct queries and readability (start from 0) )
naive_metric(example_results_2, 12, 'int', corpusDf, 10)

995
548
1028
82
362
723
37
997
311
80


(13, 0.5)

In [79]:
naive_metric_phase1(rf_pipe, topics, 'int', corpusDf, 10)

[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  0  What is fathers love like
The total scores is  9
The matched readability document portion is  0.5




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  1  What is fathers love like
The total scores is  9
The matched readability document portion is  0.5




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  2  What is fathers love like
The total scores is  9
The matched readability document portion is  0.5




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  3  Is climbing mountains happy
The total scores is  10
The matched readability document portion is  0.4




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  4  Why do people eat apples
The total scores is  7
The matched readability document portion is  0.3




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  5  Suggestions on where to travel
The total scores is  9
The matched readability document portion is  0.7




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  6  How do I repair my phone
The total scores is  9
The matched readability document portion is  0.3




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  7  Clothes to wear in winter
The total scores is  10
The matched readability document portion is  0.6




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  8  What is a healthy lifestyle
The total scores is  10
The matched readability document portion is  0.7




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  9  How do I learn to drive cars
The total scores is  9
The matched readability document portion is  0.3




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  10  Is programming becoming important now
The total scores is  10
The matched readability document portion is  0.4




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  11  Is the sea beautiful
The total scores is  10
The matched readability document portion is  0.4




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  12  Whats your favorite cooking recipe
The total scores is  18
The matched readability document portion is  0.3




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  13  How did you come up with names for your pets
The total scores is  15
The matched readability document portion is  0.3




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  14  How do you resolve conflicts with your loved ones
The total scores is  5
The matched readability document portion is  0.3




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  15  What kind of advice would you give to a freshman at your college
The total scores is  15
The matched readability document portion is  0.4




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  16  How did the elderly in your family adjust to the age of the Internet
The total scores is  20
The matched readability document portion is  0.6




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  17  Any tips for travelling abroad
The total scores is  21
The matched readability document portion is  0.8




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  18  Share a scientific explanation for a common natural phenomenon
The total scores is  42
The matched readability document portion is  0.5




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  19  How do international students view America
The total scores is  15
The matched readability document portion is  0.5




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  20  Has any of your family members experienced the Second World War What was it like
The total scores is  21
The matched readability document portion is  0.7




[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    0.0s finished


For query  21  What kind of advice would you give to a new employee at your company
The total scores is  26
The matched readability document portion is  0.5


