In [1]:
import pandas as pd
import numpy as np
import csv
import nltk
from nltk.stem import PorterStemmer
from nltk.tokenize import WordPunctTokenizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

In [2]:
nltk.download("stopwords")
from nltk.corpus import stopwords
esw = stopwords.words("english")
from string import punctuation

#remove "num", because "num" has the highest term frequency(45538) at the orinal file, 
#the second most frequent term only has a freq. of 3750
esw = esw + ['abstract', 'ci', 'hr','l','pubmed', 'num'] 

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\D070678\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [3]:
from collections import defaultdict
import random
import datetime
import math

In [4]:
####read file

#read train.docs file
train_doc_file = pd.read_csv("train.docs", encoding = 'utf-8', sep='\t', header=None)
train_doc_file.columns = ['id', 'text']

#read train.nontopic-titles.queries file
train_query_file = pd.read_csv("train.nontopic-titles.queries", encoding = 'utf-8', sep='\t', header=None)
train_query_file.columns = ['id', 'text']

#read example query file, only one query
#train_query_file = pd.read_csv("example.queries", encoding = 'utf-8', sep='\t', header=None)
#train_query_file.columns = ['id', 'text']



# Part 1: build functions for Random Leader IR method
texts are stemmed
1. preprocessing
2. build DTM, query vector
3. randomly select doc leader, and cluster the docs through cosine similarity
4. IR

# preprocessing functions

In [5]:
## def preprocessing functions which fit with our dataset
# input file: train_doc_file["text"]/ train_query_file["text"]

def removeStopwords(file):
    i = 0
    for el in file:
        el = ' '.join([word for word in el.lower().split() if word not in esw])
        file[i] = el
        i += 1
def removePunctuation(file):
    i = 0
    for el in file:
        el = ' '.join([word for word in el.lower().split() if word not in punctuation])
        file[i] = el
        i += 1

def stemming(file):
    ps = PorterStemmer()
    i = 0
    for el in file:
        el = ' '.join([ps.stem(word) for word in el.lower().split() ])
        file[i] = el
        i += 1
        


# DTM, Query vector generating Functions

In [6]:
##### build functions to generate document-term matrix

## get DTM, weighted by tfidf, the sqrt of the query vector is 1
# therefore, in the retrive phase, 
# dot product of the doc vector and query vector can be used to represent the cosine similarity

# input format: train_doc_file.text

 ## get DTM, weighted by tfidf
def get_DTM_tfidf(file):
    train_count_vect = CountVectorizer() #remove english stopwords
    X_train_counts = train_count_vect.fit_transform(file)
    vocabulary = list(train_count_vect.vocabulary_.keys())
    #print(vocabulary)

    tfidf_transformer = TfidfTransformer()
    
    X_train_tfidf = CountVectorizer(vocabulary = vocabulary)
    X_train_tfidf = X_train_tfidf.fit_transform(file)
    
    X_train_tfidf = tfidf_transformer.fit_transform(X_train_tfidf)
    return X_train_tfidf


## get DTM, weighted by term frequency
def get_DTM_tf(file):
    train_count_vect = CountVectorizer() #remove english stopwords
    X_train_counts = train_count_vect.fit_transform(file)
    vocabulary = list(train_count_vect.vocabulary_.keys())

    X_train_tf = CountVectorizer(vocabulary = vocabulary)
    X_train_tf = X_train_tf.fit_transform(file)
    return X_train_tf


In [7]:
###Generate the query vector
# get_QueryVector_tfidf helps to get the tiidf weighted query vector, the sqrt of the query vector is 1
# therefore, in the retrive phase, 
# dot product of the doc vector and query vector can be used to represent the cosine similarity

def get_QueryVector_tfidf(queryFile, docFile):
    train_count_vect = CountVectorizer() #remove english stopwords
    X_train_counts = train_count_vect.fit_transform(docFile)
    vocabulary = list(train_count_vect.vocabulary_.keys())
    
    if type(queryFile) is str:
        query_vect = []
        query = queryFile
        query = query.split()
        frequency = defaultdict(int)
        for el in vocabulary:
            if el in query:
                frequency[el]+= 1
            else:
                frequency[el] = 0
        query_vect = list(dict(frequency).values())/np.linalg.norm(list(dict(frequency).values()))
        
    else:
        tfidf_transformer = TfidfTransformer()
        
        query_vect = CountVectorizer(analyzer = "word", vocabulary = vocabulary)
        query_vect = query_vect.fit_transform(queryFile)
        
        query_vect = tfidf_transformer.fit_transform(query_vect)
    return query_vect



###Generate the query vector, weighted by term frequency
def get_QueryVector(queryFile, docFile):
    train_count_vect = CountVectorizer() #remove english stopwords
    X_train_counts = train_count_vect.fit_transform(docFile)
    vocabulary = list(train_count_vect.vocabulary_.keys())
    
    if type(queryFile) is str:
        query_vect = []
        query = queryFile
        query = query.split()
        frequency = defaultdict(int)
        for el in vocabulary:
            if el in query:
                frequency[el]+= 1
            else:
                frequency[el] = 0
        query_vect = list(dict(frequency).values())
        
    else:
        query_vect = CountVectorizer(analyzer = "word", vocabulary = vocabulary)
        query_vect = query_vect.fit_transform(queryFile)
        
    return query_vect

# functions for Preclustering through randomly selected doc-leaders

In [8]:
### information retrieve 
# return1: the randomly selected docLeaders index list, 
# return2: doc_clustering for each docLeader 

# input1: DTM_tfidf: tfidf weighted Document-term-matrix
# input2: preprocessed document file: train_doc_file 
# input3: docLeaders number (int)
def preclusteringByRandomLeader(doc_File, leaderNumber):
    
    train_tfidf = get_DTM_tfidf(doc_File.text)
    documents_id = list(doc_File['id'])
    leaderIndex = []
    doc_clustering = []
    
    for i in range(leaderNumber):
        leaderIndex.append(random.randint(0,train_tfidf.shape[0])) # randomly select doc leaders index
        doc_clustering.append([]) # initiate topic clustering

    for i in range(train_tfidf.shape[0]):
        sims = []
        #if i not in index:
        for el in leaderIndex:
            sims.append(np.dot(train_tfidf[i,], train_tfidf[el,].transpose())[0,0])
            #print(sims)    
            #maxSim = max(sims)
        maxsimindex = sims.index(max(sims))
        doc_clustering[maxsimindex].append(documents_id[i]) 
    
    return leaderIndex,doc_clustering 


# Functions for IR within most similar doc-leader's clustering

In [9]:
### IR algorithm, return query results through the cosine similarity between 

def IRqueryByLeaders(leaderIndex, doc_File, doc_clustering, query_File):    
    ### get the similarity of query with each doc leader
    index = leaderIndex
    doc_clustering = doc_clustering
    
    train_tfidf = get_DTM_tfidf(doc_File.text)
    query_vector = get_QueryVector_tfidf(query_File.text, doc_File.text)
    
    documents_id = list(doc_File['id'])
    query_id = list(query_File['id'])
    r = []
    
    for q in range(query_vector.shape[0]):
        
        sims_leaders = []
        for el in index:
            sims_leaders.append(np.dot(query_vector[q], train_tfidf[el,].transpose())[0,0])

       
        maxsimindex = sims_leaders.index(max(sims_leaders)) # get the most similarity clustering index for each query
        #print(doc_clustering[maxsimindex])

        sims_docs = []
        #if len(doc_clustering[maxsimindex])> 3:
            #get_sims_docs = []
        for el in doc_clustering[maxsimindex]:
            sims_docs.append(np.dot(query_vector[q], train_tfidf[documents_id.index(el),].transpose())[0,0]) 
            # get the similarty of query&docs in the most similarity clustering index
        
        IR_doc_sims = []
        IR_doc = []
        for i in range(len(doc_clustering[maxsimindex])):
            if sims_docs[i]>0:
                IR_doc_sims.append(sims_docs[i]) #get the non-zero similarity
                IR_doc.append(doc_clustering[maxsimindex][i]) #get the index of the docs with non-zero similarity  


        #IR_doc= []
        #for el in d:
            #IR_doc.append(doc_clustering[maxsimindex][el])


        #print(sims_leaders, sims_docs, d, IR_doc, IR_doc_sims)

        #print("Doc", "Similarity")
        if len(IR_doc_sims) >0 and len(IR_doc)>0:
            IR_doc_sims, IR_doc= zip(*sorted(zip(IR_doc_sims, IR_doc), reverse=True))# rank the results
        
        for j in range(len(IR_doc)):
            
            r.append([str(query_id[q]) ,  str(IR_doc[j]), IR_doc_sims[j]])
          

    return r
    #t2 = datetime.datetime.now().time()
    
    #print("time:",  t1, t2 )
    #return sims_docs
    
    #sims_docs_normalized = sims_docs/total
           
        
    

# Part 2: Get Functions Run !

# 1. preprocessing

In [10]:
### preprocessing

train_doc_file["text"] = train_doc_file['text'].str.replace('/', ' or ')

train_doc_file["text"] = train_doc_file['text'].str.replace('-', ' and ')

train_query_file["text"] = train_query_file['text'].str.replace('/', ' or ')

train_query_file["text"] = train_query_file['text'].str.replace('-', ' and ')


### remove stopwords
removeStopwords(train_doc_file['text'])
removeStopwords(train_query_file['text'])

### stemming
stemming(train_doc_file['text'])
stemming(train_query_file['text'])

### remove punctuation
removePunctuation(train_doc_file['text'])
removePunctuation(train_query_file['text'])

# 2. get docs clustering for each randomly selected docLeaders
the number of docLeaders are configuarable

In [13]:
# get the leader docs index and the docs clusterings with clustering number =  int(math.sqrt(train_tfidf.shape[0]))
# stemmed
leaderIndex_stemmed,doc_clustering_stemmed =  preclusteringByRandomLeader(train_doc_file, int(math.sqrt(len(train_doc_file))))

In [14]:
# get the leader docs index and the docs clusterings with clustering number =  int(math.sqrt(train_tfidf.shape[0]))
#stemmed
leaderIndex_10_stemmed,doc_clustering_10_stemmed =  preclusteringByRandomLeader( train_doc_file, 10)

# 3. retrieve results and rank them for each query
the docs are ranked through its aimilarity with the query

In [15]:
################ IRByRandomLeaderPreClustering result3, need to get the performance result #########################

# stemmed

# test the IR on the whole query file, #docLeaders = squareroot of total docs number, "-" converted to "and"
t1 =  datetime.datetime.now()
IR_results = IRqueryByLeaders(leaderIndex_stemmed, train_doc_file, doc_clustering_stemmed, train_query_file )

t2 =  datetime.datetime.now()
t = t2-t1
print("running time:", t )

# save result
df = pd.DataFrame(IR_results, columns = ['QUERY_ID', 'DOC_ID', 'sim_results'])
df.to_csv('IRByRandomLeaderPreClustering result3.txt', header=None, index=None, sep=' ', mode='a')
df

running time: 0:01:36.926940


Unnamed: 0,QUERY_ID,DOC_ID,sim_results
0,PLAIN-10,MED-2494,0.374199
1,PLAIN-10,MED-2054,0.341844
2,PLAIN-10,MED-1985,0.335333
3,PLAIN-10,MED-2058,0.333037
4,PLAIN-10,MED-3601,0.320036
5,PLAIN-10,MED-2475,0.311803
6,PLAIN-10,MED-3774,0.303089
7,PLAIN-10,MED-3150,0.281433
8,PLAIN-10,MED-1760,0.279506
9,PLAIN-10,MED-5005,0.276854


In [16]:
################ IRByRandomLeaderPreClustering result4, need to get the performance result #########################

# stemmed

# test the IR on the whole query file, #docLeaders = 10, "-" converted to "and"
t1 =  datetime.datetime.now()
IR_results = IRqueryByLeaders(leaderIndex_10_stemmed, train_doc_file, doc_clustering_10_stemmed, train_query_file)
t2 =  datetime.datetime.now()
t = t2-t1
print("running time:", t )

# save result
df = pd.DataFrame(IR_results, columns = ['QUERY_ID', 'DOC_ID', 'sim_results'])
df.to_csv('IRByRandomLeaderPreClustering result4.txt', header=None, index=None, sep=' ', mode='a')
df

running time: 0:05:50.437656


Unnamed: 0,QUERY_ID,DOC_ID,sim_results
0,PLAIN-10,MED-2054,0.341844
1,PLAIN-10,MED-1985,0.335333
2,PLAIN-10,MED-3601,0.320036
3,PLAIN-10,MED-2475,0.311803
4,PLAIN-10,MED-3150,0.281433
5,PLAIN-10,MED-5005,0.276854
6,PLAIN-10,MED-3370,0.257234
7,PLAIN-10,MED-1164,0.251320
8,PLAIN-10,MED-2479,0.251185
9,PLAIN-10,MED-5062,0.248174


# --------------------------------------------------------------------------------------------------------------

# Don't Run Code Below 

In [10]:
### create query vector matrix for the train.nontopic-titles.queries file 
#query_vect = get_QueryVector(train_query_file.text, train_doc_file.text)
#query_vect

<1141x16322 sparse matrix of type '<class 'numpy.int64'>'
	with 4293 stored elements in Compressed Sparse Row format>

In [8]:
### create tfidf weighted DTM for the train.docs file
#train_tfidf = get_DTM_tfidf(train_doc_file.text)
#train_tfidf

<3612x16322 sparse matrix of type '<class 'numpy.float64'>'
	with 289370 stored elements in Compressed Sparse Row format>

In [11]:
# input: 1* n dims sparse matrix, or single vector from query_vect/ train_tfidf matrix
#def getSquareSum(vector):
 #   squaresum = 0
  #  for i in range(vector.shape[1]):
   #     squaresum += vector[0,i]* vector[0,i]
        
    #return squaresum 

In [12]:
# input: 1* n sparse matrix, or single vector from query_vect/ train_tfidf matrix
# return the cosine sim of two vectors

#def getCosineSimilarity(query_vector, doc_vector):
    #squaresum_query = getSquareSum(query_vector)
    
    #squaresum_doc= getSquareSum(doc_vector)
    #sim = np.dot(query_vector, doc_vector.transpose())/(math.sqrt(squaresum_query)*math.sqrt(squaresum_doc))
  

In [40]:
########################Dont run this ######################################

################ IRByRandomLeaderPreClustering result1, need to get the performance result #########################
# without being stemmed
# test the IR on the whole query file, #docLeaders = squareroot of total docs number, "-" converted to "and"
#t1 =  datetime.datetime.now()
#IR_results = IRqueryByLeaders(leaderIndex, train_doc_file, doc_clustering, train_query_file, query_vect )

#t2 =  datetime.datetime.now()
#t = t2-t1
#print("running time:", t )

# save result
#df = pd.DataFrame(IR_results)
#df.to_csv('IRByRandomLeaderPreClustering result1.csv', header=None, index=None, sep=' ', mode='a')
#df

running time: 0:00:54.682999


Unnamed: 0,0,1,2,3
0,PLAIN-100,0,MED-2697,0.072161
1,PLAIN-104,0,MED-1237,0.076772
2,PLAIN-105,0,MED-2697,0.072161
3,PLAIN-107,0,MED-2280,0.127111
4,PLAIN-107,0,MED-3531,0.116436
5,PLAIN-109,0,MED-982,0.063806
6,PLAIN-109,0,MED-1496,0.055761
7,PLAIN-109,0,MED-3216,0.486182
8,PLAIN-109,0,MED-4590,0.055085
9,PLAIN-110,0,MED-982,0.063806


In [22]:
########################Dont run this ######################################

################ IRByRandomLeaderPreClustering result2, need to get the performance result #########################
# without being stemmed
# test the IR on the whole query file, #docLeaders = 10, "-" converted to "and"
#t1 =  datetime.datetime.now()
#IR_results = IRqueryByLeaders(leaderIndex_10, train_doc_file, doc_clustering_10, train_query_file, query_vect)
#t2 =  datetime.datetime.now()
#t = t2-t1
#print("running time:", t )

PLAIN-100 0 MED-2697 0.0721609528169607
PLAIN-104 0 MED-928 0.0346921160302065
PLAIN-104 0 MED-1309 0.07911041267042357
PLAIN-104 0 MED-1492 0.06259065081488634
PLAIN-104 0 MED-1868 0.06684000800169654
PLAIN-105 0 MED-2697 0.0721609528169607
PLAIN-106 0 MED-3030 0.14298841548577074
PLAIN-107 0 MED-3531 0.11643606064904424
PLAIN-108 0 MED-1204 0.07847585430651721
PLAIN-109 0 MED-884 0.04969257514556273
PLAIN-109 0 MED-1447 0.11817220580820516
PLAIN-109 0 MED-2011 0.1161101722815622
PLAIN-109 0 MED-2108 0.057620152675702904
PLAIN-109 0 MED-2201 0.047072445679494306
PLAIN-109 0 MED-2294 0.0500620444287085
PLAIN-109 0 MED-2901 0.37736356688811784
PLAIN-109 0 MED-3215 0.10550012189816652
PLAIN-109 0 MED-3216 0.4861824915875047
PLAIN-109 0 MED-3220 0.05260126328503092
PLAIN-109 0 MED-3227 0.4202182000598841
PLAIN-109 0 MED-3229 0.36725064723544815
PLAIN-109 0 MED-3235 0.05260126328503092
PLAIN-109 0 MED-3900 0.04610064388600115
PLAIN-109 0 MED-4104 0.03924273732279929
PLAIN-109 0 MED-4163 0.

PLAIN-138 0 MED-2035 0.09851064012444066
PLAIN-138 0 MED-2149 0.05716501447504646
PLAIN-138 0 MED-3087 0.02694589227487393
PLAIN-138 0 MED-3532 0.05476299144149169
PLAIN-138 0 MED-4090 0.08257671736373665
PLAIN-138 0 MED-4193 0.051459241422570544
PLAIN-139 0 MED-2708 0.3110300955041829
PLAIN-139 0 MED-5310 0.6165892909004761
PLAIN-14 0 MED-1696 0.06019713969971562
PLAIN-14 0 MED-2076 0.08374343520271192
PLAIN-140 0 MED-1342 0.07368731074800818
PLAIN-140 0 MED-2697 0.0721609528169607
PLAIN-141 0 MED-820 0.05942578321696331
PLAIN-141 0 MED-825 0.03067071617908496
PLAIN-141 0 MED-866 0.028461215588583565
PLAIN-141 0 MED-878 0.03015844043022804
PLAIN-141 0 MED-897 0.07772060652055339
PLAIN-141 0 MED-1009 0.04244532581217782
PLAIN-141 0 MED-1011 0.020769364239512783
PLAIN-141 0 MED-1029 0.03431048456695516
PLAIN-141 0 MED-1069 0.026560270074309066
PLAIN-141 0 MED-1109 0.0366606764607214
PLAIN-141 0 MED-1192 0.02874037508735656
PLAIN-141 0 MED-1203 0.03570719778516272
PLAIN-141 0 MED-1246 0.

PLAIN-166 0 MED-2039 0.08861047651328864
PLAIN-166 0 MED-4159 0.027953034403309103
PLAIN-166 0 MED-4856 0.04629630708483317
PLAIN-168 0 MED-3680 0.0503072370437014
PLAIN-169 0 MED-872 0.03953106414441399
PLAIN-169 0 MED-1204 0.048941288211792895
PLAIN-169 0 MED-3193 0.031133165940763388
PLAIN-169 0 MED-3455 0.05773475384515607
PLAIN-169 0 MED-3787 0.031133165940763388
PLAIN-17 0 MED-1309 0.07911041267042357
PLAIN-17 0 MED-1868 0.06684000800169654
PLAIN-17 0 MED-4762 0.2515802323750719
PLAIN-170 0 MED-928 0.02728085980459467
PLAIN-170 0 MED-1109 0.05705144174349151
PLAIN-170 0 MED-1625 0.06712186034173181
PLAIN-170 0 MED-1828 0.05363350071886906
PLAIN-170 0 MED-1860 0.046672249042258036
PLAIN-170 0 MED-2016 0.10475080222324003
PLAIN-170 0 MED-2586 0.14632386684475965
PLAIN-170 0 MED-3476 0.0483062779212867
PLAIN-170 0 MED-5283 0.09949758561272219
PLAIN-173 0 MED-707 0.058800998646153936
PLAIN-173 0 MED-734 0.02870454206786816
PLAIN-173 0 MED-821 0.03504037501531205
PLAIN-173 0 MED-824 0

PLAIN-19 0 MED-2379 0.09485065252515475
PLAIN-19 0 MED-2925 0.10131011301647934
PLAIN-19 0 MED-3869 0.09267216360315163
PLAIN-190 0 MED-2039 0.08861047651328864
PLAIN-190 0 MED-2224 0.056750765668832796
PLAIN-190 0 MED-3250 0.05387317763103978
PLAIN-190 0 MED-4159 0.027953034403309103
PLAIN-190 0 MED-4856 0.04629630708483317
PLAIN-193 0 MED-1111 0.0755159393537383
PLAIN-194 0 MED-864 0.25449593692256006
PLAIN-194 0 MED-1648 0.19235154157915269
PLAIN-194 0 MED-2223 0.13827530772873647
PLAIN-197 0 MED-2039 0.08861047651328864
PLAIN-197 0 MED-4159 0.027953034403309103
PLAIN-197 0 MED-4856 0.04629630708483317
PLAIN-197 0 MED-5325 0.25399550581025443
PLAIN-198 0 MED-4669 0.18516930967436426
PLAIN-198 0 MED-5319 0.04339572182648471
PLAIN-199 0 MED-709 0.2071997413572165
PLAIN-199 0 MED-712 0.05447310826796234
PLAIN-199 0 MED-1673 0.07949239261023762
PLAIN-199 0 MED-1687 0.14019132570459145
PLAIN-199 0 MED-1799 0.17680809393798527
PLAIN-199 0 MED-2926 0.04034929550430257
PLAIN-199 0 MED-3168 

PLAIN-225 0 MED-897 0.04866264283642514
PLAIN-225 0 MED-1533 0.05005259781228019
PLAIN-225 0 MED-1828 0.06473673026968992
PLAIN-225 0 MED-1865 0.13160575854295456
PLAIN-225 0 MED-1866 0.11721360799264184
PLAIN-225 0 MED-1872 0.040606982179980974
PLAIN-225 0 MED-3198 0.10921147168355576
PLAIN-225 0 MED-5081 0.07078609522444086
PLAIN-225 0 MED-5153 0.05593424434212088
PLAIN-225 0 MED-5178 0.0694992443961933
PLAIN-225 0 MED-5179 0.08142610525437798
PLAIN-225 0 MED-5297 0.07067678630987946
PLAIN-229 0 MED-329 0.11081571231996319
PLAIN-229 0 MED-332 0.0510257288864777
PLAIN-229 0 MED-726 0.07702384341459834
PLAIN-229 0 MED-819 0.020132595246431125
PLAIN-229 0 MED-855 0.011670329600664405
PLAIN-229 0 MED-876 0.06788864494435715
PLAIN-229 0 MED-895 0.02580029923683462
PLAIN-229 0 MED-899 0.022142799972405112
PLAIN-229 0 MED-901 0.03274765668093116
PLAIN-229 0 MED-921 0.026356813568269417
PLAIN-229 0 MED-922 0.05258586919417793
PLAIN-229 0 MED-927 0.036924597041318834
PLAIN-229 0 MED-959 0.099

PLAIN-232 0 MED-2061 0.11475847690269776
PLAIN-232 0 MED-5267 0.07864178250904225
PLAIN-234 0 MED-1986 0.3106998409049158
PLAIN-234 0 MED-3030 0.12357959250376263
PLAIN-235 0 MED-2708 0.3110300955041829
PLAIN-236 0 MED-2708 0.3110300955041829
PLAIN-241 0 MED-4782 0.09593031432110179
PLAIN-242 0 MED-1669 0.053510797946427414
PLAIN-2427 0 MED-4007 0.07919655408112695
PLAIN-2428 0 MED-4839 0.0937929826651917
PLAIN-2431 0 MED-1192 0.17247076454397017
PLAIN-2431 0 MED-2708 0.3110300955041829
PLAIN-2432 0 MED-3201 0.05718108419462983
PLAIN-2432 0 MED-3530 0.06626247551985225
PLAIN-2432 0 MED-4736 0.3862461934773271
PLAIN-2434 0 MED-956 0.0647937355573553
PLAIN-2434 0 MED-1015 0.037158623062481994
PLAIN-2434 0 MED-1339 0.17069092557634244
PLAIN-2434 0 MED-1472 0.07419580798485849
PLAIN-2434 0 MED-2288 0.06480920942134127
PLAIN-2434 0 MED-2744 0.056378283639261596
PLAIN-2434 0 MED-2992 0.42159303720303526
PLAIN-2434 0 MED-3556 0.22644803434401745
PLAIN-2434 0 MED-3627 0.04474750087128291
PLAIN

PLAIN-246 0 MED-3030 0.12801948208231642
PLAIN-2461 0 MED-866 0.07846007832937926
PLAIN-2461 0 MED-3201 0.05718108419462983
PLAIN-2462 0 MED-880 0.07405944732449721
PLAIN-2462 0 MED-3250 0.12328091477911386
PLAIN-2462 0 MED-3466 0.06198327148003612
PLAIN-2462 0 MED-5029 0.06431353444929142
PLAIN-2464 0 MED-834 0.05019490050333165
PLAIN-2464 0 MED-853 0.0686894441441981
PLAIN-2464 0 MED-1851 0.05786734276180237
PLAIN-2464 0 MED-1915 0.13971309271844773
PLAIN-2464 0 MED-2257 0.0466827213333189
PLAIN-2464 0 MED-3773 0.06802981886813173
PLAIN-2464 0 MED-4027 0.05077012052364523
PLAIN-2464 0 MED-4107 0.07678455585705793
PLAIN-2464 0 MED-4533 0.042811182205311535
PLAIN-2464 0 MED-4655 0.06582154028535131
PLAIN-2466 0 MED-5325 0.25399550581025443
PLAIN-2467 0 MED-880 0.06946906477541263
PLAIN-2467 0 MED-1643 0.05070177171864054
PLAIN-2467 0 MED-5274 0.07046897793128533
PLAIN-2468 0 MED-880 0.06946906477541263
PLAIN-2468 0 MED-1643 0.05070177171864054
PLAIN-2468 0 MED-5274 0.07046897793128533


PLAIN-2516 0 MED-833 0.03811526500781941
PLAIN-2516 0 MED-854 0.047433536810311164
PLAIN-2516 0 MED-866 0.052428860901180534
PLAIN-2516 0 MED-867 0.02786925170313153
PLAIN-2516 0 MED-912 0.10842061614815332
PLAIN-2516 0 MED-949 0.02646880880332994
PLAIN-2516 0 MED-1011 0.019129789192070857
PLAIN-2516 0 MED-1111 0.029592681268458727
PLAIN-2516 0 MED-1198 0.037299696751842984
PLAIN-2516 0 MED-1203 0.03288840035701382
PLAIN-2516 0 MED-1300 0.027798845898286018
PLAIN-2516 0 MED-1309 0.03389698870420777
PLAIN-2516 0 MED-1342 0.024619792942156438
PLAIN-2516 0 MED-1348 0.14449367999918566
PLAIN-2516 0 MED-1349 0.1491332340111739
PLAIN-2516 0 MED-1358 0.07684160162242215
PLAIN-2516 0 MED-1360 0.03128626717879399
PLAIN-2516 0 MED-1393 0.021223722025507416
PLAIN-2516 0 MED-1522 0.04238914766805043
PLAIN-2516 0 MED-1648 0.04019589193361971
PLAIN-2516 0 MED-1709 0.05104773086783311
PLAIN-2516 0 MED-1837 0.027598124808313904
PLAIN-2516 0 MED-1862 0.05123503287438091
PLAIN-2516 0 MED-1863 0.05103066

PLAIN-2552 0 MED-1669 0.053510797946427414
PLAIN-2552 0 MED-2109 0.1759647647353537
PLAIN-2552 0 MED-2110 0.16582972419491268
PLAIN-2554 0 MED-846 0.11049333212755433
PLAIN-2554 0 MED-2056 0.07613111102498239
PLAIN-2554 0 MED-2521 0.22111068468045228
PLAIN-2557 0 MED-823 0.03780043759965936
PLAIN-2557 0 MED-1258 0.06908048363196748
PLAIN-2557 0 MED-1321 0.022297304263835048
PLAIN-2557 0 MED-1324 0.23496620781945474
PLAIN-2557 0 MED-1447 0.04009139278659466
PLAIN-2557 0 MED-1458 0.030292833317147334
PLAIN-2557 0 MED-1655 0.04036992083068842
PLAIN-2557 0 MED-1688 0.03655601955051034
PLAIN-2557 0 MED-1743 0.030454033403272567
PLAIN-2557 0 MED-1873 0.1421399859595246
PLAIN-2557 0 MED-2108 0.07819341807309349
PLAIN-2557 0 MED-2129 0.06672716449011055
PLAIN-2557 0 MED-2180 0.04445662205620486
PLAIN-2557 0 MED-2191 0.03430513741378818
PLAIN-2557 0 MED-2201 0.03193982707279229
PLAIN-2557 0 MED-2202 0.024218692203826286
PLAIN-2557 0 MED-2294 0.03396834430168431
PLAIN-2557 0 MED-2416 0.070491776

PLAIN-2591 0 MED-1639 0.054018718154662825
PLAIN-2593 0 MED-928 0.028709850172148464
PLAIN-2593 0 MED-1810 0.039689301991511425
PLAIN-2593 0 MED-2049 0.03950367148547045
PLAIN-2593 0 MED-2227 0.07344233381323888
PLAIN-2594 0 MED-745 0.12805600569628153
PLAIN-2594 0 MED-825 0.055321373523214035
PLAIN-2594 0 MED-867 0.027288355737869047
PLAIN-2594 0 MED-878 0.02719868584379468
PLAIN-2594 0 MED-949 0.025917103131324733
PLAIN-2594 0 MED-1005 0.028382879034884234
PLAIN-2594 0 MED-1354 0.029937374762491416
PLAIN-2594 0 MED-1360 0.030634148256984434
PLAIN-2594 0 MED-1393 0.02078134292527903
PLAIN-2594 0 MED-1456 0.035463331539397895
PLAIN-2594 0 MED-1492 0.028523947122933158
PLAIN-2594 0 MED-1522 0.020752802287491025
PLAIN-2594 0 MED-1638 0.024577958374864997
PLAIN-2594 0 MED-1671 0.023294832184368366
PLAIN-2594 0 MED-1765 0.026354753445177185
PLAIN-2594 0 MED-1806 0.024789387736133702
PLAIN-2594 0 MED-1810 0.021856140843340074
PLAIN-2594 0 MED-1862 0.025083554775897007
PLAIN-2594 0 MED-1863 

PLAIN-2603 0 MED-2708 0.3110300955041829
PLAIN-2604 0 MED-846 0.11049333212755433
PLAIN-2604 0 MED-2056 0.07613111102498239
PLAIN-2604 0 MED-2521 0.22111068468045228
PLAIN-2605 0 MED-846 0.11049333212755433
PLAIN-2605 0 MED-2056 0.07613111102498239
PLAIN-2605 0 MED-2521 0.22111068468045228
PLAIN-2607 0 MED-733 0.09319339213839756
PLAIN-2607 0 MED-1060 0.07277029934029747
PLAIN-2607 0 MED-1436 0.09656397779403604
PLAIN-2607 0 MED-1475 0.2926945020446159
PLAIN-2607 0 MED-2128 0.09181935969780113
PLAIN-2607 0 MED-2135 0.11591944968001645
PLAIN-2607 0 MED-2136 0.044085605432205334
PLAIN-2607 0 MED-2604 0.08228253553730822
PLAIN-2607 0 MED-2812 0.07750039615727863
PLAIN-2607 0 MED-2814 0.07741342458697029
PLAIN-2607 0 MED-2824 0.08770275276932955
PLAIN-2607 0 MED-4269 0.11967150516955846
PLAIN-2607 0 MED-4394 0.07208890096470566
PLAIN-2607 0 MED-4630 0.11490026079091123
PLAIN-2607 0 MED-5237 0.1301233058311142
PLAIN-2608 0 MED-1309 0.07911041267042357
PLAIN-2608 0 MED-1868 0.066840008001696

PLAIN-2628 0 MED-1348 0.033267013203060206
PLAIN-2628 0 MED-5278 0.14898543208875464
PLAIN-2631 0 MED-1109 0.07561618523975983
PLAIN-2632 0 MED-1669 0.053510797946427414
PLAIN-2632 0 MED-2061 0.11475847690269776
PLAIN-2634 0 MED-1360 0.06309288602709252
PLAIN-2634 0 MED-1533 0.04732352377041211
PLAIN-2634 0 MED-1616 0.11499515945108062
PLAIN-2634 0 MED-3111 0.12535854606106944
PLAIN-2634 0 MED-5150 0.048781067840700916
PLAIN-2635 0 MED-1360 0.06309288602709252
PLAIN-2635 0 MED-1533 0.04732352377041211
PLAIN-2635 0 MED-1616 0.11499515945108062
PLAIN-2635 0 MED-3111 0.12535854606106944
PLAIN-2635 0 MED-5150 0.048781067840700916
PLAIN-2636 0 MED-866 0.07846007832937926
PLAIN-2636 0 MED-5095 0.042308748409947666
PLAIN-2637 0 MED-2061 0.11475847690269776
PLAIN-2637 0 MED-3680 0.0503072370437014
PLAIN-2638 0 MED-843 0.060169931856398275
PLAIN-2638 0 MED-1008 0.037363890290346265
PLAIN-2638 0 MED-1348 0.017511145593813334
PLAIN-2638 0 MED-1473 0.047483520154947945
PLAIN-2638 0 MED-1638 0.0395

PLAIN-267 0 MED-912 0.06745874719984986
PLAIN-267 0 MED-1533 0.04380241814045042
PLAIN-267 0 MED-4782 0.09593031432110179
PLAIN-2674 0 MED-880 0.07405944732449721
PLAIN-2674 0 MED-3250 0.12328091477911386
PLAIN-2675 0 MED-843 0.21626277747025244
PLAIN-2675 0 MED-885 0.050457432405718684
PLAIN-2675 0 MED-897 0.027023600969087926
PLAIN-2675 0 MED-928 0.018286062389901155
PLAIN-2675 0 MED-1046 0.1424090019116742
PLAIN-2675 0 MED-1198 0.08448441638333483
PLAIN-2675 0 MED-1203 0.18623189676511812
PLAIN-2675 0 MED-1473 0.06826610990540312
PLAIN-2675 0 MED-1491 0.07540063980550535
PLAIN-2675 0 MED-1494 0.09790044780811252
PLAIN-2675 0 MED-1669 0.025327873777522426
PLAIN-2675 0 MED-1671 0.0808295372513979
PLAIN-2675 0 MED-1710 0.027404457407656253
PLAIN-2675 0 MED-1841 0.23243492870508475
PLAIN-2675 0 MED-1868 0.03243435557670841
PLAIN-2675 0 MED-2378 0.05997800762722398
PLAIN-2675 0 MED-2586 0.04903964496390656
PLAIN-2675 0 MED-2697 0.034155414883618916
PLAIN-2675 0 MED-2945 0.035413195253723

PLAIN-2712 0 MED-4347 0.1259759151389253
PLAIN-2713 0 MED-816 0.056941228342726066
PLAIN-2713 0 MED-825 0.04918182635223801
PLAIN-2713 0 MED-1765 0.046859823787739036
PLAIN-2713 0 MED-2708 0.1621292171820109
PLAIN-2713 0 MED-3794 0.23342052211998657
PLAIN-2713 0 MED-3796 0.03652682844679837
PLAIN-2713 0 MED-5095 0.20065442122399382
PLAIN-2713 0 MED-5269 0.3934818218139628
PLAIN-2715 0 MED-816 0.056941228342726066
PLAIN-2715 0 MED-825 0.04918182635223801
PLAIN-2715 0 MED-1765 0.046859823787739036
PLAIN-2715 0 MED-3728 0.12908059761101917
PLAIN-2715 0 MED-3730 0.0692587201451336
PLAIN-2715 0 MED-3794 0.23342052211998657
PLAIN-2715 0 MED-3796 0.03652682844679837
PLAIN-2716 0 MED-872 0.6403907354255276
PLAIN-2717 0 MED-4007 0.07919655408112695
PLAIN-2717 0 MED-4090 0.08988236447549547
PLAIN-2718 0 MED-1111 0.0755159393537383
PLAIN-2718 0 MED-1638 0.2514956702019052
PLAIN-272 0 MED-1669 0.053510797946427414
PLAIN-272 0 MED-2888 0.04928112504707134
PLAIN-272 0 MED-2925 0.06202362661559604
PL

PLAIN-2743 0 MED-1016 0.314840742463454
PLAIN-2743 0 MED-2061 0.06531262691849457
PLAIN-2743 0 MED-3904 0.05578145355077013
PLAIN-2743 0 MED-3925 0.06839140664111273
PLAIN-2743 0 MED-4829 0.14342223772530927
PLAIN-2743 0 MED-5056 0.20323061528884515
PLAIN-2744 0 MED-872 0.03953106414441399
PLAIN-2744 0 MED-1016 0.314840742463454
PLAIN-2744 0 MED-1204 0.048941288211792895
PLAIN-2744 0 MED-2061 0.06531262691849457
PLAIN-2744 0 MED-3193 0.031133165940763388
PLAIN-2744 0 MED-3455 0.05773475384515607
PLAIN-2744 0 MED-3787 0.031133165940763388
PLAIN-2744 0 MED-3904 0.05578145355077013
PLAIN-2744 0 MED-3925 0.06839140664111273
PLAIN-2744 0 MED-5056 0.20323061528884515
PLAIN-2745 0 MED-2109 0.2639471471030306
PLAIN-2745 0 MED-4352 0.1326665068755408
PLAIN-2746 0 MED-5288 0.050672190696985794
PLAIN-2747 0 MED-5095 0.042308748409947666
PLAIN-2747 0 MED-5283 0.09871416668438125
PLAIN-2748 0 MED-969 0.07341805067216253
PLAIN-2748 0 MED-1111 0.17084680167400768
PLAIN-2748 0 MED-2061 0.0883921100025

PLAIN-276 0 MED-1669 0.053510797946427414
PLAIN-2762 0 MED-928 0.02728085980459467
PLAIN-2762 0 MED-1109 0.05705144174349151
PLAIN-2762 0 MED-1625 0.06712186034173181
PLAIN-2762 0 MED-1828 0.05363350071886906
PLAIN-2762 0 MED-1860 0.046672249042258036
PLAIN-2762 0 MED-2016 0.10475080222324003
PLAIN-2762 0 MED-2586 0.14632386684475965
PLAIN-2762 0 MED-3476 0.0483062779212867
PLAIN-2762 0 MED-5283 0.09949758561272219
PLAIN-2763 0 MED-5283 0.1364699708603498
PLAIN-2766 0 MED-707 0.058800998646153936
PLAIN-2766 0 MED-734 0.02870454206786816
PLAIN-2766 0 MED-821 0.03504037501531205
PLAIN-2766 0 MED-824 0.07746297242820875
PLAIN-2766 0 MED-829 0.06001824847005372
PLAIN-2766 0 MED-832 0.1306393853507118
PLAIN-2766 0 MED-1015 0.06530827155363708
PLAIN-2766 0 MED-1124 0.11039529999622623
PLAIN-2766 0 MED-1224 0.08970334084671507
PLAIN-2766 0 MED-1244 0.1436309043297318
PLAIN-2766 0 MED-1339 0.4430067046642862
PLAIN-2766 0 MED-1394 0.14815660852334017
PLAIN-2766 0 MED-1521 0.11499271880829932
PL

PLAIN-2785 0 MED-2224 0.056750765668832796
PLAIN-2785 0 MED-3250 0.05387317763103978
PLAIN-2787 0 MED-4090 0.08988236447549547
PLAIN-2788 0 MED-1111 0.0755159393537383
PLAIN-2791 0 MED-5325 0.25399550581025443
PLAIN-2792 0 MED-872 0.6403907354255276
PLAIN-2792 0 MED-4669 0.18516930967436426
PLAIN-2793 0 MED-866 0.05675330038871016
PLAIN-2795 0 MED-1880 0.051668880569133505
PLAIN-2795 0 MED-2047 0.05609057691184558
PLAIN-2795 0 MED-3530 0.06626247551985225
PLAIN-2795 0 MED-3531 0.07439619169988175
PLAIN-2795 0 MED-4013 0.06372765042551173
PLAIN-2795 0 MED-4538 0.06524050602379526
PLAIN-2796 0 MED-3530 0.06626247551985225
PLAIN-2797 0 MED-4669 0.18516930967436426
PLAIN-2798 0 MED-4669 0.18516930967436426
PLAIN-2801 0 MED-3382 0.1203914470426231
PLAIN-2802 0 MED-2224 0.056750765668832796
PLAIN-2802 0 MED-3250 0.05387317763103978
PLAIN-2802 0 MED-3382 0.1203914470426231
PLAIN-2803 0 MED-870 0.04677899825763905
PLAIN-2803 0 MED-1437 0.11843206651169075
PLAIN-2803 0 MED-1750 0.06467538499957

PLAIN-2833 0 MED-897 0.04866264283642514
PLAIN-2833 0 MED-1828 0.06473673026968992
PLAIN-2833 0 MED-5081 0.07078609522444086
PLAIN-2833 0 MED-5153 0.05593424434212088
PLAIN-2833 0 MED-5297 0.07067678630987946
PLAIN-2834 0 MED-1533 0.05005259781228019
PLAIN-2834 0 MED-1865 0.13160575854295456
PLAIN-2834 0 MED-1866 0.11721360799264184
PLAIN-2834 0 MED-1872 0.040606982179980974
PLAIN-2834 0 MED-3198 0.10921147168355576
PLAIN-2834 0 MED-5178 0.0694992443961933
PLAIN-2834 0 MED-5179 0.08142610525437798
PLAIN-2835 0 MED-1533 0.46468307936268194
PLAIN-2835 0 MED-1534 0.21945304097685453
PLAIN-2835 0 MED-3143 0.058300845470789155
PLAIN-2835 0 MED-3896 0.21573586680648313
PLAIN-2835 0 MED-5095 0.04425253999483067
PLAIN-2837 0 MED-1669 0.053510797946427414
PLAIN-284 0 MED-918 0.03535209253588888
PLAIN-284 0 MED-1204 0.043855256363767546
PLAIN-284 0 MED-1516 0.053944937267971185
PLAIN-284 0 MED-1522 0.035450192785001014
PLAIN-284 0 MED-1669 0.0374068668973686
PLAIN-284 0 MED-2227 0.06908572849235

PLAIN-293 0 MED-872 0.046596657843077975
PLAIN-293 0 MED-877 0.10724241857794682
PLAIN-293 0 MED-880 0.05259982291178758
PLAIN-293 0 MED-1030 0.25052666261204903
PLAIN-293 0 MED-1204 0.04972758266154728
PLAIN-293 0 MED-1516 0.12233659312872229
PLAIN-293 0 MED-1683 0.06686869589300752
PLAIN-293 0 MED-1865 0.058511106647429464
PLAIN-293 0 MED-2061 0.11475847690269776
PLAIN-293 0 MED-2228 0.047907449967747616
PLAIN-293 0 MED-2372 0.07721396338562381
PLAIN-293 0 MED-2697 0.11439784825130737
PLAIN-293 0 MED-4421 0.045110699504379036
PLAIN-293 0 MED-4520 0.26137321260559165
PLAIN-293 0 MED-5013 0.04070263521325184
PLAIN-293 0 MED-5152 0.04781740971163185
PLAIN-293 0 MED-5179 0.07240308602254217
PLAIN-293 0 MED-5266 0.05494195457801151
PLAIN-293 0 MED-5330 0.10982302094601974
PLAIN-2932 0 MED-4090 0.08988236447549547
PLAIN-2933 0 MED-2224 0.056750765668832796
PLAIN-2933 0 MED-3250 0.05387317763103978
PLAIN-2934 0 MED-2224 0.056750765668832796
PLAIN-2934 0 MED-3250 0.05387317763103978
PLAIN-29

PLAIN-2974 0 MED-759 0.0400072746289203
PLAIN-2974 0 MED-1055 0.056601926513868424
PLAIN-2974 0 MED-2201 0.06624866237257963
PLAIN-2974 0 MED-2693 0.03162034862095492
PLAIN-2974 0 MED-2695 0.038680278183393355
PLAIN-2974 0 MED-4135 0.041742195673325516
PLAIN-2974 0 MED-4147 0.024222576251536978
PLAIN-2974 0 MED-4232 0.04742400692504562
PLAIN-2974 0 MED-4447 0.0252404308537943
PLAIN-2974 0 MED-4496 0.07610429224997914
PLAIN-2975 0 MED-969 0.07890412122956537
PLAIN-2975 0 MED-1198 0.05785816879934121
PLAIN-2975 0 MED-2040 0.044198462365542056
PLAIN-2975 0 MED-2076 0.04864568952809808
PLAIN-2975 0 MED-2224 0.03771886163813591
PLAIN-2975 0 MED-2372 0.07721396338562381
PLAIN-2975 0 MED-4335 0.08383575267808184
PLAIN-2975 0 MED-5039 0.08961351351614877
PLAIN-2975 0 MED-5041 0.08752451043900908
PLAIN-2975 0 MED-5147 0.03897846338895432
PLAIN-2975 0 MED-5153 0.042544495174526124
PLAIN-2975 0 MED-5319 0.06782026047075584
PLAIN-2976 0 MED-2372 0.07721396338562381
PLAIN-2976 0 MED-3492 0.04777507

PLAIN-3038 0 MED-3087 0.08665753045682624
PLAIN-3039 0 MED-2892 0.04672350191596686
PLAIN-3050 0 MED-3247 0.0785940403265431
PLAIN-3056 0 MED-1534 0.1613319101363406
PLAIN-3057 0 MED-3663 0.09649787342942166
PLAIN-3059 0 MED-846 0.11049333212755433
PLAIN-3059 0 MED-2056 0.07613111102498239
PLAIN-3059 0 MED-2521 0.22111068468045228
PLAIN-3060 0 MED-3247 0.0785940403265431
PLAIN-3060 0 MED-3422 0.0978621883704891
PLAIN-3061 0 MED-5178 0.18574322733399162
PLAIN-3064 0 MED-10 0.034614715590424824
PLAIN-3064 0 MED-852 0.10306371218798618
PLAIN-3064 0 MED-890 0.06123020092023349
PLAIN-3064 0 MED-978 0.027540872071794037
PLAIN-3064 0 MED-1028 0.007429606784884593
PLAIN-3064 0 MED-1139 0.038911728977309074
PLAIN-3064 0 MED-1146 0.185630176267588
PLAIN-3064 0 MED-1151 0.04356583380372153
PLAIN-3064 0 MED-1417 0.026683308556638106
PLAIN-3064 0 MED-1560 0.025808978839767592
PLAIN-3064 0 MED-1567 0.030058078087502712
PLAIN-3064 0 MED-1717 0.07196499777856698
PLAIN-3064 0 MED-1718 0.053270920761193

PLAIN-3076 0 MED-745 0.07321755262548506
PLAIN-3076 0 MED-1342 0.07368731074800818
PLAIN-3076 0 MED-5040 0.03967618100954434
PLAIN-3077 0 MED-1360 0.06309288602709252
PLAIN-3077 0 MED-1533 0.04732352377041211
PLAIN-3077 0 MED-1616 0.11499515945108062
PLAIN-3077 0 MED-3111 0.12535854606106944
PLAIN-3077 0 MED-5150 0.048781067840700916
PLAIN-3078 0 MED-1456 0.10323015331127756
PLAIN-3079 0 MED-1456 0.10323015331127756
PLAIN-3079 0 MED-3022 0.1749510588421455
PLAIN-308 0 MED-3512 0.06705741956272113
PLAIN-308 0 MED-4118 0.05436589026649232
PLAIN-308 0 MED-4335 0.14857009107774508
PLAIN-3080 0 MED-1456 0.10323015331127756
PLAIN-3080 0 MED-3459 0.06025207631582006
PLAIN-3080 0 MED-3534 0.14013158399133196
PLAIN-3081 0 MED-5290 0.06572368155537715
PLAIN-3083 0 MED-816 0.040770435863170654
PLAIN-3083 0 MED-877 0.03524351968372071
PLAIN-3083 0 MED-928 0.10063780740386293
PLAIN-3083 0 MED-1009 0.04873367125960066
PLAIN-3083 0 MED-1111 0.03688897541235057
PLAIN-3083 0 MED-1445 0.0492984415145031

PLAIN-3124 0 MED-3749 0.0723017906296279
PLAIN-3125 0 MED-1192 0.07730884954361363
PLAIN-3125 0 MED-4782 0.031201544976081304
PLAIN-3126 0 MED-1349 0.10371361684029519
PLAIN-3126 0 MED-1696 0.05553094574500243
PLAIN-3127 0 MED-3925 0.09596959357547519
PLAIN-3128 0 MED-820 0.16382123916934552
PLAIN-3128 0 MED-4762 0.09130657770725398
PLAIN-3129 0 MED-833 0.44477613780436565
PLAIN-313 0 MED-877 0.10724241857794682
PLAIN-313 0 MED-880 0.05259982291178758
PLAIN-313 0 MED-1030 0.25052666261204903
PLAIN-313 0 MED-1204 0.04972758266154728
PLAIN-313 0 MED-1360 0.06309288602709252
PLAIN-313 0 MED-1516 0.12233659312872229
PLAIN-313 0 MED-1533 0.04732352377041211
PLAIN-313 0 MED-1616 0.11499515945108062
PLAIN-313 0 MED-1669 0.053510797946427414
PLAIN-313 0 MED-1683 0.06686869589300752
PLAIN-313 0 MED-1865 0.058511106647429464
PLAIN-313 0 MED-2228 0.047907449967747616
PLAIN-313 0 MED-2697 0.11439784825130737
PLAIN-313 0 MED-3087 0.02694589227487393
PLAIN-313 0 MED-3111 0.12535854606106944
PLAIN-31

PLAIN-317 0 MED-2888 0.04928112504707134
PLAIN-317 0 MED-2925 0.06202362661559604
PLAIN-317 0 MED-2945 0.28648802549256136
PLAIN-317 0 MED-3680 0.07805249064619689
PLAIN-3172 0 MED-1437 0.11335247561801537
PLAIN-3172 0 MED-1705 0.15579795969077587
PLAIN-3172 0 MED-2240 0.1251805826904815
PLAIN-3172 0 MED-2801 0.08227638661824407
PLAIN-3172 0 MED-2812 0.07592121677265981
PLAIN-3172 0 MED-2814 0.07583601737020722
PLAIN-3172 0 MED-2815 0.08515243541081483
PLAIN-3172 0 MED-2817 0.08483850185246308
PLAIN-3172 0 MED-4281 0.09366979648525464
PLAIN-3173 0 MED-3456 0.0626003494408556
PLAIN-3175 0 MED-1837 0.0826014907432611
PLAIN-3176 0 MED-2888 0.054537561898459944
PLAIN-3178 0 MED-1111 0.0755159393537383
PLAIN-3179 0 MED-2224 0.056750765668832796
PLAIN-3179 0 MED-3250 0.05387317763103978
PLAIN-3182 0 MED-825 0.061159171611303256
PLAIN-3182 0 MED-1533 0.15241313001287793
PLAIN-3182 0 MED-2379 0.1537996060317101
PLAIN-3182 0 MED-3896 0.1415200173034665
PLAIN-3185 0 MED-1360 0.06309288602709252


PLAIN-3286 0 MED-1696 0.044749368057177635
PLAIN-3286 0 MED-2224 0.1448095789575462
PLAIN-3286 0 MED-2225 0.10664102752772557
PLAIN-3286 0 MED-3476 0.056754623832719585
PLAIN-3286 0 MED-3477 0.057386491011551036
PLAIN-3286 0 MED-3680 0.04395227142265071
PLAIN-3286 0 MED-3920 0.13494375855012208
PLAIN-3286 0 MED-5041 0.11200751166823598
PLAIN-3286 0 MED-5151 0.14641044214610607
PLAIN-3287 0 MED-843 0.21626277747025244
PLAIN-3287 0 MED-885 0.050457432405718684
PLAIN-3287 0 MED-897 0.027023600969087926
PLAIN-3287 0 MED-928 0.018286062389901155
PLAIN-3287 0 MED-1046 0.1424090019116742
PLAIN-3287 0 MED-1198 0.08448441638333483
PLAIN-3287 0 MED-1203 0.18623189676511812
PLAIN-3287 0 MED-1473 0.06826610990540312
PLAIN-3287 0 MED-1491 0.07540063980550535
PLAIN-3287 0 MED-1494 0.09790044780811252
PLAIN-3287 0 MED-1669 0.025327873777522426
PLAIN-3287 0 MED-1671 0.0808295372513979
PLAIN-3287 0 MED-1710 0.027404457407656253
PLAIN-3287 0 MED-1841 0.23243492870508475
PLAIN-3287 0 MED-1868 0.032434355

PLAIN-3335 0 MED-2228 0.06327180512785487
PLAIN-3336 0 MED-1360 0.1095762695071762
PLAIN-3336 0 MED-2109 0.05604888507288111
PLAIN-3336 0 MED-3531 0.1483504130266623
PLAIN-3336 0 MED-3663 0.05646017205017089
PLAIN-3336 0 MED-4522 0.05732314377367003
PLAIN-3336 0 MED-4736 0.04428077195146859
PLAIN-3336 0 MED-5176 0.05731760942916103
PLAIN-3338 0 MED-1360 0.06309288602709252
PLAIN-3338 0 MED-1533 0.04732352377041211
PLAIN-3338 0 MED-1616 0.11499515945108062
PLAIN-3338 0 MED-2225 0.04975936936134207
PLAIN-3338 0 MED-3059 0.08875982548297437
PLAIN-3338 0 MED-3111 0.12535854606106944
PLAIN-3338 0 MED-3193 0.06179637660799173
PLAIN-3338 0 MED-3456 0.07342276847318202
PLAIN-3338 0 MED-3520 0.0491636747466931
PLAIN-3338 0 MED-3787 0.06179637660799173
PLAIN-3338 0 MED-4564 0.04917066244303185
PLAIN-3338 0 MED-4588 0.10915942989149828
PLAIN-3338 0 MED-5150 0.048781067840700916
PLAIN-3339 0 MED-1360 0.06309288602709252
PLAIN-3339 0 MED-1533 0.04732352377041211
PLAIN-3339 0 MED-1616 0.114995159451

PLAIN-3349 0 MED-1069 0.15653696685128096
PLAIN-3349 0 MED-2378 0.225921160694889
PLAIN-3349 0 MED-4736 0.05393350059472091
PLAIN-3354 0 MED-3893 0.07844105668831462
PLAIN-3356 0 MED-897 0.042037227439465655
PLAIN-3356 0 MED-2225 0.09464106865917238
PLAIN-3356 0 MED-3476 0.15110464636553023
PLAIN-3356 0 MED-4336 0.05030880778086325
PLAIN-3356 0 MED-4588 0.05190462234426701
PLAIN-3356 0 MED-4902 0.051582590883292576
PLAIN-3356 0 MED-5041 0.04970184012612102
PLAIN-3356 0 MED-5042 0.08982208198334146
PLAIN-3356 0 MED-5151 0.1299353637976423
PLAIN-3358 0 MED-3904 0.06766031087507106
PLAIN-336 0 MED-1669 0.053510797946427414
PLAIN-336 0 MED-3925 0.09596959357547519
PLAIN-3363 0 MED-2224 0.056750765668832796
PLAIN-3363 0 MED-3250 0.05387317763103978
PLAIN-3364 0 MED-2224 0.056750765668832796
PLAIN-3364 0 MED-3250 0.05387317763103978
PLAIN-3365 0 MED-2224 0.056750765668832796
PLAIN-3365 0 MED-3250 0.05387317763103978
PLAIN-3366 0 MED-2224 0.056750765668832796
PLAIN-3366 0 MED-3250 0.053873177

PLAIN-3434 0 MED-3530 0.06626247551985225
PLAIN-3435 0 MED-820 0.16382123916934552
PLAIN-3435 0 MED-3751 0.2199252484580415
PLAIN-3436 0 MED-821 0.0313580806531732
PLAIN-3436 0 MED-824 0.034661303367569986
PLAIN-3436 0 MED-829 0.053711099706087805
PLAIN-3436 0 MED-974 0.05373248440922122
PLAIN-3436 0 MED-991 0.04322934602641774
PLAIN-3436 0 MED-1015 0.01948173638555878
PLAIN-3436 0 MED-1215 0.01950728600273672
PLAIN-3436 0 MED-1224 0.020069167324130922
PLAIN-3436 0 MED-1227 0.03516691340197805
PLAIN-3436 0 MED-1328 0.026215518969211534
PLAIN-3436 0 MED-1394 0.026517449526983237
PLAIN-3436 0 MED-1410 0.0685819179454686
PLAIN-3436 0 MED-1415 0.028188353796321692
PLAIN-3436 0 MED-1431 0.13376323529339892
PLAIN-3436 0 MED-1435 0.10016961870157827
PLAIN-3436 0 MED-1438 0.05593240541714869
PLAIN-3436 0 MED-1439 0.1455554088490287
PLAIN-3436 0 MED-1614 0.029396685820249947
PLAIN-3436 0 MED-1619 0.2039873592667158
PLAIN-3436 0 MED-1656 0.025018274217323694
PLAIN-3436 0 MED-1657 0.0591563610305

PLAIN-351 0 MED-1300 0.051691120814951586
PLAIN-351 0 MED-1625 0.08816390773621863
PLAIN-351 0 MED-2040 0.05298305067220111
PLAIN-351 0 MED-3489 0.061860499833407454
PLAIN-351 0 MED-3534 0.048789843720361346
PLAIN-351 0 MED-4007 0.09351910983069443
PLAIN-351 0 MED-4150 0.24365850762271604
PLAIN-351 0 MED-4159 0.028366481759127007
PLAIN-351 0 MED-4520 0.08541996664539618
PLAIN-351 0 MED-4856 0.04698106586530002
PLAIN-351 0 MED-5062 0.04614161096742625
PLAIN-351 0 MED-5334 0.05215803701456786
PLAIN-356 0 MED-3904 0.05578145355077013
PLAIN-356 0 MED-5056 0.20323061528884515
PLAIN-36 0 MED-1360 0.06309288602709252
PLAIN-36 0 MED-1533 0.04732352377041211
PLAIN-36 0 MED-1616 0.11499515945108062
PLAIN-36 0 MED-1710 0.03691897106587326
PLAIN-36 0 MED-1866 0.10699423576450726
PLAIN-36 0 MED-1872 0.02904652722776342
PLAIN-36 0 MED-2378 0.04040084237463446
PLAIN-36 0 MED-3111 0.12535854606106944
PLAIN-36 0 MED-3247 0.04490487756223482
PLAIN-36 0 MED-5150 0.048781067840700916
PLAIN-361 0 MED-843 0

PLAIN-403 0 MED-2061 0.060481416725860375
PLAIN-403 0 MED-3904 0.05578145355077013
PLAIN-403 0 MED-4829 0.04732375344260318
PLAIN-403 0 MED-4856 0.04990392190983157
PLAIN-403 0 MED-5056 0.20323061528884515
PLAIN-403 0 MED-5148 0.04704993106322485
PLAIN-405 0 MED-2372 0.07721396338562381
PLAIN-405 0 MED-3680 0.04395227142265071
PLAIN-408 0 MED-745 0.04776948671935752
PLAIN-408 0 MED-825 0.04127371619073586
PLAIN-408 0 MED-880 0.04052073835208907
PLAIN-408 0 MED-885 0.04625705278165993
PLAIN-408 0 MED-1246 0.040859984654197774
PLAIN-408 0 MED-1342 0.03597054939688808
PLAIN-408 0 MED-1613 0.044977895385685224
PLAIN-408 0 MED-1869 0.0924207225243792
PLAIN-408 0 MED-1986 0.043444893266079095
PLAIN-408 0 MED-2225 0.03924462271122726
PLAIN-408 0 MED-2697 0.088127393219691
PLAIN-408 0 MED-3250 0.03372583695961693
PLAIN-408 0 MED-3255 0.05309622942507173
PLAIN-408 0 MED-3819 0.06097770109393025
PLAIN-408 0 MED-4347 0.08918934520622467
PLAIN-408 0 MED-4352 0.04857239632658813
PLAIN-408 0 MED-485

PLAIN-432 0 MED-878 0.05019990191688639
PLAIN-432 0 MED-880 0.5012122572363878
PLAIN-432 0 MED-1620 0.04854389933941758
PLAIN-432 0 MED-1638 0.4989918327317765
PLAIN-432 0 MED-1639 0.25909227664196505
PLAIN-432 0 MED-1640 0.2768654343826074
PLAIN-432 0 MED-1642 0.06403068207832963
PLAIN-432 0 MED-1648 0.14528429607314428
PLAIN-432 0 MED-1841 0.078109011162168
PLAIN-432 0 MED-3489 0.06012169965083874
PLAIN-432 0 MED-4774 0.5163505455200632
PLAIN-432 0 MED-4902 0.06685952792622468
PLAIN-432 0 MED-5248 0.18284164872947334
PLAIN-433 0 MED-330 0.04207686162992428
PLAIN-433 0 MED-745 0.14135888833327376
PLAIN-433 0 MED-820 0.03944081373644221
PLAIN-433 0 MED-833 0.05493041880981778
PLAIN-433 0 MED-843 0.055040760625388006
PLAIN-433 0 MED-866 0.03777934229806225
PLAIN-433 0 MED-867 0.040164214197518554
PLAIN-433 0 MED-878 0.04003223406404083
PLAIN-433 0 MED-880 0.03996949322863899
PLAIN-433 0 MED-912 0.05208403029962541
PLAIN-433 0 MED-918 0.03046037033217464
PLAIN-433 0 MED-928 0.02326981079

PLAIN-49 0 MED-1008 0.053109270624582865
PLAIN-49 0 MED-1009 0.1750951662124589
PLAIN-5 0 MED-3422 0.0978621883704891
PLAIN-53 0 MED-843 0.21626277747025244
PLAIN-53 0 MED-885 0.050457432405718684
PLAIN-53 0 MED-897 0.08580323979831633
PLAIN-53 0 MED-928 0.018286062389901155
PLAIN-53 0 MED-1046 0.1424090019116742
PLAIN-53 0 MED-1198 0.08448441638333483
PLAIN-53 0 MED-1203 0.18623189676511812
PLAIN-53 0 MED-1473 0.06826610990540312
PLAIN-53 0 MED-1491 0.07540063980550535
PLAIN-53 0 MED-1494 0.09790044780811252
PLAIN-53 0 MED-1669 0.025327873777522426
PLAIN-53 0 MED-1671 0.0808295372513979
PLAIN-53 0 MED-1710 0.027404457407656253
PLAIN-53 0 MED-1841 0.23243492870508475
PLAIN-53 0 MED-1868 0.03243435557670841
PLAIN-53 0 MED-2378 0.05997800762722398
PLAIN-53 0 MED-2586 0.04903964496390656
PLAIN-53 0 MED-2697 0.034155414883618916
PLAIN-53 0 MED-2945 0.03541319525372321
PLAIN-53 0 MED-3455 0.07005852195855836
PLAIN-53 0 MED-3466 0.09801333263389655
PLAIN-53 0 MED-3476 0.09713751159819915
PLA

PLAIN-80 0 MED-2076 0.3349737408108477
PLAIN-80 0 MED-3422 0.0978621883704891
PLAIN-82 0 MED-5025 0.2718262173167027
PLAIN-84 0 MED-4782 0.09593031432110179
PLAIN-85 0 MED-884 0.04969257514556273
PLAIN-85 0 MED-1447 0.11817220580820516
PLAIN-85 0 MED-1685 0.057042306281712384
PLAIN-85 0 MED-2011 0.1161101722815622
PLAIN-85 0 MED-2108 0.057620152675702904
PLAIN-85 0 MED-2201 0.047072445679494306
PLAIN-85 0 MED-2294 0.0500620444287085
PLAIN-85 0 MED-2901 0.37736356688811784
PLAIN-85 0 MED-3215 0.10550012189816652
PLAIN-85 0 MED-3216 0.4861824915875047
PLAIN-85 0 MED-3220 0.05260126328503092
PLAIN-85 0 MED-3227 0.4202182000598841
PLAIN-85 0 MED-3229 0.36725064723544815
PLAIN-85 0 MED-3235 0.05260126328503092
PLAIN-85 0 MED-3900 0.04610064388600115
PLAIN-85 0 MED-4104 0.03924273732279929
PLAIN-85 0 MED-4163 0.09699062666083628
PLAIN-85 0 MED-4298 0.058118689919697805
PLAIN-85 0 MED-4313 0.045150227339603236
PLAIN-85 0 MED-4469 0.13200762554699333
PLAIN-85 0 MED-4682 0.4257831161775292
PLAI

In [56]:
################# Don't Run This #####################

# test the IR on the whole query file,cosine similarity
#t1 =  datetime.datetime.now()
#IR_results = IRqueryByLeaders(leaderIndex, train_doc_file, doc_clustering, train_query_file, query_vect )
#t2 =  datetime.datetime.now()
#t = t2-t1
#print(IR_results)
#print("running time:", t )

PLAIN-103 0 MED-923 0.013800359307706437
PLAIN-103 0 MED-1924 0.040803690906507505
PLAIN-103 0 MED-1941 0.017226203311271726
PLAIN-103 0 MED-3279 0.015230033641484383
PLAIN-103 0 MED-3949 0.01754612144794677
PLAIN-103 0 MED-5120 0.028419632590014274
PLAIN-107 0 MED-2777 0.022424543374402913
PLAIN-114 0 MED-988 0.03259181497510262
PLAIN-114 0 MED-1784 0.06025351114573294
PLAIN-114 0 MED-3161 0.04839545963402858
PLAIN-114 0 MED-3452 0.03478144919797345
PLAIN-114 0 MED-3531 0.041643514779335686
PLAIN-114 0 MED-4271 0.020371893655253746
PLAIN-114 0 MED-4522 0.032090979918216925
PLAIN-114 0 MED-4698 0.04667312311130312
PLAIN-114 0 MED-5365 0.03902755223048856
PLAIN-115 0 MED-758 0.08859839449209364
PLAIN-115 0 MED-1375 0.0706649646467008
PLAIN-115 0 MED-1377 0.08978640466395812
PLAIN-115 0 MED-1387 0.13162315919862985
PLAIN-115 0 MED-1388 0.11682892234269779
PLAIN-115 0 MED-1405 0.09256090137005252
PLAIN-115 0 MED-1406 0.045777315656227294
PLAIN-115 0 MED-1527 0.13107194488783008
PLAIN-115 

KeyboardInterrupt: 

In [None]:
#simsList = []
#for i in range(len(IR_results)):
    #simsList.append(IR_results[i][3])
    
#print(max(simsList), min(simsList),sum(simsList)/len(simsList) )  

In [44]:
################# Don't Run This #####################
#train_qrel_file = pd.read_csv("train.2-1-0.qrel", encoding = 'utf-8', sep='\t', header=None)
#train_qrel_file

Unnamed: 0,0,1,2,3
0,PLAIN-3,0,MED-2436,2
1,PLAIN-3,0,MED-2437,2
2,PLAIN-3,0,MED-2438,2
3,PLAIN-3,0,MED-2439,2
4,PLAIN-3,0,MED-2440,2
5,PLAIN-3,0,MED-2427,1
6,PLAIN-3,0,MED-2428,1
7,PLAIN-3,0,MED-2429,1
8,PLAIN-3,0,MED-2430,1
9,PLAIN-3,0,MED-2431,1


In [45]:
################# Don't Run This #####################
#dev_qrel_file = pd.read_csv("dev.2-1-0.qrel", encoding = 'utf-8', sep='\t', header=None)
#dev_qrel_file

Unnamed: 0,0,1,2,3
0,PLAIN-1,0,MED-2421,2
1,PLAIN-1,0,MED-2422,2
2,PLAIN-1,0,MED-2416,2
3,PLAIN-1,0,MED-2423,2
4,PLAIN-1,0,MED-2417,2
5,PLAIN-1,0,MED-2418,2
6,PLAIN-1,0,MED-4451,2
7,PLAIN-1,0,MED-2420,2
8,PLAIN-1,0,MED-2414,1
9,PLAIN-1,0,MED-4070,1


In [46]:
################# Don't Run This #####################
#test_qrel_file = pd.read_csv("test.2-1-0.qrel", encoding = 'utf-8', sep='\t', header=None)
#test_qrel_file

Unnamed: 0,0,1,2,3
0,PLAIN-2,0,MED-2427,2
1,PLAIN-2,0,MED-10,2
2,PLAIN-2,0,MED-2429,2
3,PLAIN-2,0,MED-2430,2
4,PLAIN-2,0,MED-2431,2
5,PLAIN-2,0,MED-14,2
6,PLAIN-2,0,MED-2432,2
7,PLAIN-2,0,MED-2428,1
8,PLAIN-2,0,MED-2440,1
9,PLAIN-2,0,MED-2434,1


In [49]:
#qrel_total = train_qrel_file.append(dev_qrel_file)
#qrel_total = qrel_total.append(test_qrel_file)
#qrel_total

Unnamed: 0,0,1,2,3
0,PLAIN-3,0,MED-2436,2
1,PLAIN-3,0,MED-2437,2
2,PLAIN-3,0,MED-2438,2
3,PLAIN-3,0,MED-2439,2
4,PLAIN-3,0,MED-2440,2
5,PLAIN-3,0,MED-2427,1
6,PLAIN-3,0,MED-2428,1
7,PLAIN-3,0,MED-2429,1
8,PLAIN-3,0,MED-2430,1
9,PLAIN-3,0,MED-2431,1


In [52]:
################# Don't Run This #####################
#qrel_total.to_csv('qrel_total.txt', header=None, index=None, sep=' ', mode='a')