In [284]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [285]:
pip install python-terrier==0.10.0 nltk scikit-learn lightgbm xgboost fastrank



In [286]:
!pip install --upgrade git+https://github.com/Georgetown-IR-Lab/OpenNIR

Collecting git+https://github.com/Georgetown-IR-Lab/OpenNIR
  Cloning https://github.com/Georgetown-IR-Lab/OpenNIR to /tmp/pip-req-build-snuo8j4v
  Running command git clone --filter=blob:none --quiet https://github.com/Georgetown-IR-Lab/OpenNIR /tmp/pip-req-build-snuo8j4v
  Resolved https://github.com/Georgetown-IR-Lab/OpenNIR to commit 88a4679372f471a04d284a99404ffce2b7a1dc49
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [287]:
import pandas as pd
import json
import pyterrier as pt
import random

In [288]:
#UNCOMMENT THE FOLLOWING LINE TO USE EITHER THE TVR DATASET OR THE QVH DATASET

dataset_choice = "TVR"
# dataset_choice = "QVH"

In [289]:
# Paths to JSONL files on Colab
if dataset_choice == "TVR":
    jsonl_train_path = '/content/drive/MyDrive/IR/text_data/tvr_train_release.jsonl'
    jsonl_val_path = '/content/drive/MyDrive/IR/text_data/tvr_val_release.jsonl'
    subs_path = '/content/drive/MyDrive/IR/text_data/tvqa_preprocessed_subtitles.jsonl'

elif dataset_choice == "QVH":
    jsonl_train_path = "/content/drive/MyDrive/IR/text_data_QVH/highlight_train_release.jsonl"
    subs_path = "/content/drive/MyDrive/IR/text_data_QVH/subs_train.jsonl"

In [290]:
# Paths to JSONL files
# if dataset_choice == "TVR":
#     jsonl_train_path = 'text_data/tvr_train_release.jsonl'
#     jsonl_val_path = 'text_data/tvr_val_release.jsonl'
#     subs_path = 'text_data/tvqa_preprocessed_subtitles.jsonl'

# elif dataset_choice == "QVH":
#     jsonl_train_path = "text_data_QVH/highlight_train_release.jsonl"
#     subs_path = "text_data_QVH/subs_train.jsonl"

In [291]:
# Load subtitles into a dictionary for quick access
subtitles_dict = {}
if dataset_choice == "TVR":
    with open(subs_path, 'r') as subs_file:
        for line in subs_file:
            sub_data = json.loads(line)
            subtitles_dict[sub_data['vid_name']] = sub_data['sub']
elif dataset_choice == "QVH":
    with open(subs_path, 'r') as subs_file:
        for line in subs_file:
            sub_data = json.loads(line)
            triple = sub_data['vid'].split("_")
            name = triple[0:-2]
            #turn the list name into a string
            name = "".join(name)
            if name not in subtitles_dict:
                subtitles_dict[name] = [(float(triple[-2]) + sub_data["relevant_windows"][0][0], float(triple[-2]) + sub_data["relevant_windows"][0][1], sub_data['query'])]
            else:
                subtitles_dict[name].append((float(triple[-2]) + sub_data["relevant_windows"][0][0], float(triple[-2]) + sub_data["relevant_windows"][0][1], sub_data['query']))

In [292]:
# Function to find matching subtitles in TVR case
def find_matching_subtitles(vid_name, ts_range, subtitles_dict):
    matching_subs = []
    if vid_name in subtitles_dict:
        for subtitle in subtitles_dict[vid_name]:
            if (ts_range[0] <= subtitle['start'] <= ts_range[1]) or (ts_range[0] <= subtitle['end'] <= ts_range[1]) or (subtitle['start'] <= ts_range[0] and subtitle['end'] >= ts_range[1]):
                matching_subs.append(subtitle['text'])
    return matching_subs

In [293]:
def parse_jsonl_TVR(jsonl_path, split_type):
    # Initialize empty lists for your data
    queries_data = []
    documents_data = []
    query_rankings_data = []

    with open(jsonl_path, 'r') as file:
        for idx, line in enumerate(file):
            data = json.loads(line)
            # drop non text-based queries
            if data['type'] not in ['t', 'vt']:
                continue

            # Find matching subtitles
            matching_subs = find_matching_subtitles(data['vid_name'], data['ts'], subtitles_dict)

            if matching_subs == []:
                continue

            # Extract data for the Query Set DataFrame
            queries_data.append({'qid': str(data['desc_id']), 'query': data['desc']})

            # Extract data for the Documents Set DataFrame, including matching subtitles
            documents_data.append({'docno': split_type + str(idx), 'vid_name': data['vid_name'], 'ts': data['ts'],
                                'duration': data['duration'], 'type': data['type'], 'text': "".join(matching_subs)})

            # Extract data for the Query Rankings DataFrame
            query_rankings_data.append({'qid': str(data["desc_id"]), 'query': data['desc'], 'docno': split_type + str(idx), 'rank': 1, 'score': 1.0})

    return queries_data, documents_data, query_rankings_data

def parse_jsonl_QVH(jsonl_path):
    queries_data = []
    documents_data = []
    query_rankings_data = []
    with open(jsonl_path, 'r') as file:
        for idx,line in enumerate(file):

            # Load the JSON object from the line
            data = json.loads(line)

            triple = data["vid"].split("_")
            document_name = triple[0:-2]
            document_name = "".join(document_name)
            start_time = float(triple[-2])
            end_time = float(triple[-1])

            if document_name not in subtitles_dict:
                #print("Document not found in subtitles: ", document_name)
                continue

            momentaneus_rank =[]
            count = 0
            for id,relevant_window in enumerate(data["relevant_windows"]):
                ts = [start_time+relevant_window[0], start_time+relevant_window[1]]
                subs = [sub for sub in subtitles_dict[document_name] if sub[0] <= ts[1] and ts[0] <= sub[1]]
                if len(subs) == 0:
                    #print("No subtitles found for ", document_name, " at time ", ts)
                    continue
                count += 1
                documents_data.append({"docno" : str(idx) +"_"+str(ts[0]) + "_" + str(ts[1]), "vid_name" : document_name, "ts": ts, "duration": data["duration"], "text": "".join([sub[2] for sub in subs])})
                scores = [data["saliency_scores"][i]  for i,clip_id in enumerate(data["relevant_clip_ids"]) if clip_id*2 >= relevant_window[0] and clip_id*2 <= relevant_window[1]]
                if len(scores) == 0:
                    #print("No scores found for ", document_name, " at time ", ts)
                    continue
                #each entry of scores is a triple of integers. Create a variable score which is the average of all the scores
                score = 0 if len(scores) ==0 else sum(sum(scores[i]) for i in range(len(scores)))/(3*len(scores))

                momentaneus_rank.append({"qid" : str(data["qid"]), "query": data["query"] , "docno" : str(idx) +"_"+str(ts[0]) + "_" + str(ts[1]), "score": score, "rank":1})

            if count == 0:
                #print("No relevant windows found for ", document_name)
                continue
            #adjust the rank of the momentaneus_rank based on the score
            momentaneus_rank = sorted(momentaneus_rank, key=lambda x: x["score"], reverse=True)
            for i in range(len(momentaneus_rank)):
                momentaneus_rank[i]["rank"] = i+1
            queries_data.append({"qid" : str(data["qid"]), "query": data["query"]})
            query_rankings_data.extend(momentaneus_rank)

    return queries_data, documents_data, query_rankings_data

In [294]:

if dataset_choice == "TVR":
    queries_data_train, documents_data_train, query_rankings_data_train = parse_jsonl_TVR(jsonl_train_path, "t")
    queries_data_val, documents_data_val, query_rankings_data_val = parse_jsonl_TVR(jsonl_val_path, "v")
    #have to create a test set; to do it, extract a random 10% of the train set
    random.seed(42)
    query_rankings_data_test = random.sample(query_rankings_data_train, int(len(query_rankings_data_train)*0.1))
    query_rankings_data_train = [query for query in query_rankings_data_train if query not in query_rankings_data_test]
    queries_data_test = [query for query in queries_data_train if query["qid"] in [query["qid"] for query in query_rankings_data_test]]
    queries_data_train = [query for query in queries_data_train if query not in queries_data_test]
    documents_data_test = [doc for doc in documents_data_train if doc["docno"] in [query["docno"] for query in query_rankings_data_test]]
    documents_data_train = [doc for doc in documents_data_train if doc not in documents_data_test]

elif dataset_choice == "QVH":
    queries_data_train, documents_data_train, query_rankings_data_train = parse_jsonl_QVH(jsonl_train_path)
    #have to create a val set; to do it, extract a random 10% of the train set
    random.seed(42)
    query_rankings_data_val = random.sample(query_rankings_data_train, int(len(query_rankings_data_train)*0.1))
    query_rankings_data_train = [query for query in query_rankings_data_train if query not in query_rankings_data_val]
    queries_data_val = [query for query in queries_data_train if query["qid"] in [query["qid"] for query in query_rankings_data_val]]
    queries_data_train = [query for query in queries_data_train if query not in queries_data_val]
    documents_data_val = [doc for doc in documents_data_train if doc["docno"] in [query["docno"] for query in query_rankings_data_val]]
    documents_data_train = [doc for doc in documents_data_train if doc not in documents_data_val]
    #have to create a test set; to do it, extract a random 10% of the train set
    random.seed(42)
    query_rankings_data_test = random.sample(query_rankings_data_train, int(len(query_rankings_data_train)*0.1))
    query_rankings_data_train = [query for query in query_rankings_data_train if query not in query_rankings_data_test]
    queries_data_test = [query for query in queries_data_train if query["qid"] in [query["qid"] for query in query_rankings_data_test]]
    queries_data_train = [query for query in queries_data_train if query not in queries_data_test]
    documents_data_test = [doc for doc in documents_data_train if doc["docno"] in [query["docno"] for query in query_rankings_data_test]]
    documents_data_train = [doc for doc in documents_data_train if doc not in documents_data_test]



In [295]:
# Create DataFrames for the Query Set, Documents Set, and Query Rankings
queries_train_df = pd.DataFrame(queries_data_train)
documents_train_df = pd.DataFrame(documents_data_train)

queries_val_df = pd.DataFrame(queries_data_val)
documents_val_df = pd.DataFrame(documents_data_val)

queries_test_df = pd.DataFrame(queries_data_test)
documents_test_df = pd.DataFrame(documents_data_test)

q_rels = pd.concat([pd.DataFrame(query_rankings_data_train), pd.DataFrame(query_rankings_data_val), pd.DataFrame(query_rankings_data_test)]).reset_index(drop=True)

#print length of the dataframes
print("Train set:")
print("Queries: ", len(queries_train_df))
print("Documents: ", len(documents_train_df))

print("Val set:")
print("Queries: ", len(queries_val_df))
print("Documents: ", len(documents_val_df))

print("Test set:")
print("Queries: ", len(queries_test_df))
print("Documents: ", len(documents_test_df))

print("Query Rankings: ", len(q_rels))



Train set:
Queries:  20062
Documents:  20062
Val set:
Queries:  2775
Documents:  2775
Test set:
Queries:  2229
Documents:  2229
Query Rankings:  25066


### First Stage Retrieval [TODO: BOX]
The following part of the code will define three different first stage retrieval pipelines as an input for the trained model.

In [296]:
if not pt.started():
    pt.init()

In [297]:
# Create an index
from pathlib import Path

indexer = pt.IterDictIndexer(
    "./index_path/",
    meta={
        "docno": 64,
        "vid_name": 64,
        "text": 131072,
    },
    stemmer="porter",
    stopwords="terrier",
    overwrite=True,
    # type=pt.index.IndexingType.MEMORY,
)

In [298]:
joint_documents_set_df = pd.concat([documents_train_df, documents_val_df, documents_test_df])

print("Length: ", len(joint_documents_set_df))

Length:  25066


In [299]:
indexed = indexer.index(
    joint_documents_set_df.to_dict(orient="records")
)

16:45:19.902 [ForkJoinPool-11-worker-3] WARN org.terrier.structures.indexing.Indexer - Indexed 38 empty documents


In [300]:
#These are some examples, not necessarily the best ones. Experiment with different models

# Initialize BatchRetrieve with the created index and specify BM25 as the weighting model
first_stage_bm25 = pt.BatchRetrieve(
    indexed,
    wmodel="BM25",
    num_results=5,
    metadata=["docno", "vid_name", "text"]
)

# Initialize BatchRetrieve with the created index and specify LemurTF_IDF as the weighting model
first_stage_lemurtfidf = pt.BatchRetrieve(
    indexed,
    wmodel="LemurTF_IDF",
    num_results=5,
    metadata=["docno", "vid_name", "text"]
)

# Initialize BatchRetrieve with the created index and specify Hiemstra_LM as the weighting model
first_stage_hiemstra_lm = pt.BatchRetrieve(
    indexed,
    wmodel="Hiemstra_LM",
    num_results=5,
    metadata=["docno", "vid_name", "text"]
)


# Computing feature
The weighting model can be use in [pt.weighting_model.package: http://terrier.org/docs/current/javadoc/org/terrier/matching/models/package-summary.html](http://terrier.org/docs/current/javadoc/org/terrier/matching/models/package-summary.html)

In [301]:
#We create features for the second stage using the first stage retrievers

#TF-IDF based features
lemur_tf_idf_retriever = pt.BatchRetrieve(indexed, wmodel="LemurTF_IDF")
bm25_retriever = pt.BatchRetrieve(indexed, wmodel="BM25")
tf_idf_retriever = pt.BatchRetrieve(indexed, wmodel="TF_IDF")

#Language model based features
hiem_retriever = pt.BatchRetrieve(indexed, wmodel="Hiemstra_LM")
dirichlet_retriever = pt.BatchRetrieve(indexed, wmodel="DirichletLM")

#Divergence from randomness based features
pl2_retriever = pt.BatchRetrieve(indexed, wmodel="PL2")
dph_retriever = pt.BatchRetrieve(indexed, wmodel="DPH")
dlh_retriever = pt.BatchRetrieve(indexed, wmodel="DLH")

#Can add more!


In [302]:
# PIPELINES WITH FSR AND FEATURES

#Can test many possibilities, i just put some examples here

bm25_pipeline = ~first_stage_bm25 >> (
   pl2_retriever ** dph_retriever ** tf_idf_retriever
)


lemurtf_idf_pipeline = ~first_stage_lemurtfidf >> (
    pl2_retriever ** dph_retriever ** hiem_retriever
)

hiem_lm_pipeline = ~hiem_retriever >> (
    pl2_retriever ** dph_retriever ** tf_idf_retriever
)

#NOTE: i believe we should not use the same first stage retriever for features AND for the first stage retrieval

In [303]:
# Prepare the queries for the pipeline, remove special characters and extra spaces
prepared_trainqueries = queries_train_df
prepared_trainqueries['query'] = prepared_trainqueries['query'].str.replace('[\'"?!]', ' ', regex=True)
prepared_trainqueries['query'] = prepared_trainqueries['query'].str.replace(r'[^\w\s]', ' ', regex=True)
prepared_trainqueries['query'] = prepared_trainqueries['query'].str.replace(r'\s+', ' ', regex=True).str.strip()

prepared_train_qrels = pd.DataFrame(query_rankings_data_train)
prepared_train_qrels['query'] = prepared_train_qrels['query'].str.replace('[\'"?!]', ' ', regex=True)
prepared_train_qrels['query'] = prepared_train_qrels['query'].str.replace(r'[^\w\s]', ' ', regex=True)
prepared_train_qrels['query'] = prepared_train_qrels['query'].str.replace(r'\s+', ' ', regex=True).str.strip()

prepared_train_qrels['label'] = prepared_train_qrels['score']

prepared_train_qrels['label'] = prepared_train_qrels['label'].astype(int)

prepared_val_qrels = pd.DataFrame(query_rankings_data_val)
prepared_val_qrels['query'] = prepared_val_qrels['query'].str.replace('[\'"?!]', ' ', regex=True)
prepared_val_qrels['query'] = prepared_val_qrels['query'].str.replace(r'[^\w\s]', ' ', regex=True)
prepared_val_qrels['query'] = prepared_val_qrels['query'].str.replace(r'\s+', ' ', regex=True).str.strip()

prepared_val_qrels['label'] = prepared_val_qrels['score']

prepared_val_qrels['label'] = prepared_val_qrels['label'].astype(int)

prepared_test_qrels = pd.DataFrame(query_rankings_data_test)
prepared_test_qrels['query'] = prepared_test_qrels['query'].str.replace('[\'"?!]', ' ', regex=True)
prepared_test_qrels['query'] = prepared_test_qrels['query'].str.replace(r'[^\w\s]', ' ', regex=True)
prepared_test_qrels['query'] = prepared_test_qrels['query'].str.replace(r'\s+', ' ', regex=True).str.strip()

prepared_test_qrels['label'] = prepared_test_qrels['score']

prepared_test_qrels['label'] = prepared_test_qrels['label'].astype(int)

prepared_qrels = q_rels
prepared_qrels['query'] = prepared_qrels['query'].str.replace('[\'"?!]', ' ', regex=True)
prepared_qrels['query'] = prepared_qrels['query'].str.replace(r'[^\w\s]', ' ', regex=True)
prepared_qrels['query'] = prepared_qrels['query'].str.replace(r'\s+', ' ', regex=True).str.strip()

prepared_qrels['label'] = prepared_qrels['score']

prepared_qrels['label'] = prepared_qrels['label'].astype(int)

prepared_valqueries = queries_val_df.reset_index()
prepared_valqueries['query'] = prepared_valqueries['query'].str.replace('[\'"?!]', ' ', regex=True)
prepared_valqueries['query'] = prepared_valqueries['query'].str.replace(r'[^\w\s]', ' ', regex=True)

prepared_valqueries['query'] = prepared_valqueries['query'].str.replace(r'\s+', ' ', regex=True).str.strip()

#test set
prepared_testqueries = queries_test_df.reset_index()
prepared_testqueries['query'] = prepared_testqueries['query'].str.replace('[\'"?!]', ' ', regex=True)
prepared_testqueries['query'] = prepared_testqueries['query'].str.replace(r'[^\w\s]', ' ', regex=True)
prepared_testqueries['query'] = prepared_testqueries['query'].str.replace(r'\s+', ' ', regex=True).str.strip()


Storage format: .csv or trec

trec(https://pyterrier.readthedocs.io/en/latest/io.html): The pt io format, but it doesn't contain feature.

In [304]:
from sklearn.svm import SVR
import xgboost as xgb
import fastrank

index = pt.IndexFactory.of("./index_path")

fsr_pipelines = [
    # {
    #     'pipe': lemurtf_idf_pipeline,
    #     'name': 'LemurTF_IDF'
    # },
    {
        'pipe': bm25_pipeline,
        'name': 'BM25'
    },
    # {
    #     'pipe': hiem_lm_pipeline,
    #     'name': 'Hiemstra LM'
    # }
]

learned_models = [
    {
        'model': SVR(),
        'form': 'reg',
        'name': 'SVR',
    },
    {
        'model': xgb.XGBRanker(tree_method="hist", objective="rank:ndcg"),
        'form': 'ltr',
        'name': 'XGBoost (NDCG)',
     },
    # {
    #     'model': xgb.XGBRanker(tree_method="hist", objective="rank:map"),
    #     'form': 'ltr',
    #     'name': 'XGBoost (MAP)',
    # },
    {
        'model': fastrank.TrainRequest.coordinate_ascent(),
        'form': 'fastrank',
        'name': 'FastRank Coordinate Ascent',
    },
    {
        'model': fastrank.TrainRequest.random_forest(),
        'form': 'fastrank',
        'name': 'FastRank Random Forest',
    }
  ]

In [305]:
from pyterrier.measures import nDCG, RR, MAP

trained_models = [first_stage_bm25, first_stage_lemurtfidf, first_stage_hiemstra_lm]
names = ['BM25', 'LemurTF_IDF', 'Hiemstra LM']
eval_metrics = [
    nDCG @ 1, nDCG @ 3, nDCG @ 5, nDCG @ 10,
    RR @ 1,   RR @ 3,   RR @ 5,   RR @ 10,
    MAP
]

for fsr in fsr_pipelines:
    for model in learned_models:
        names.append(f"{fsr['name']} >> {model['name']}")
        print(names[-1])
        if 'form' in model:
          pipe = fsr['pipe'] >> pt.ltr.apply_learned_model(model['model'], form=model['form'])
        else:
          pipe = fsr['pipe'] >> pt.ltr.apply_learned_model(model['model'])
        pipe.fit(
            prepared_trainqueries,
            prepared_train_qrels,
            prepared_valqueries,
            prepared_val_qrels
        )
        trained_models.append(pipe)

BM25 >> SVR
BM25 >> XGBoost (NDCG)
BM25 >> FastRank Coordinate Ascent
BM25 >> FastRank Random Forest


In [306]:
from pyterrier.measures import nDCG, RR, MAP

pt.Experiment(
    trained_models,
    prepared_testqueries,
    prepared_test_qrels,
    names=names,
    eval_metrics=eval_metrics,
)

Unnamed: 0,name,nDCG@1,nDCG@3,nDCG@5,nDCG@10,RR@1,RR@3,RR@5,RR@10,AP
0,BM25,0.148497,0.189519,0.207494,0.207494,0.149843,0.180051,0.189921,0.189921,0.189233
1,LemurTF_IDF,0.122925,0.164092,0.182974,0.182974,0.12472,0.154628,0.165014,0.165014,0.16393
2,Hiemstra LM,0.133244,0.171563,0.187259,0.187259,0.13459,0.162629,0.171467,0.171467,0.170734
3,BM25 >> SVR,0.05563,0.090639,0.113375,0.113375,0.05563,0.081726,0.093929,0.093929,0.093831
4,BM25 >> XGBoost (NDCG),0.089278,0.120767,0.133201,0.133201,0.091072,0.113728,0.120772,0.120772,0.119837
5,BM25 >> FastRank Coordinate Ascent,0.093764,0.125087,0.135881,0.135881,0.09511,0.117915,0.123972,0.123972,0.123351
6,BM25 >> FastRank Random Forest,0.086586,0.115586,0.130525,0.130525,0.087932,0.108943,0.117108,0.117108,0.116435


In [307]:
from pyterrier.measures import nDCG, RR, MAP

pt.Experiment(
    trained_models,
    prepared_valqueries,
    prepared_val_qrels,
    names=names,
    eval_metrics=eval_metrics,
)

Unnamed: 0,name,nDCG@1,nDCG@3,nDCG@5,nDCG@10,RR@1,RR@3,RR@5,RR@10,AP
0,BM25,0.134414,0.188128,0.201667,0.201667,0.134414,0.174114,0.18197,0.18197,0.182246
1,LemurTF_IDF,0.123604,0.169158,0.184775,0.184775,0.123964,0.157538,0.166348,0.166348,0.166462
2,Hiemstra LM,0.128649,0.171548,0.18597,0.18597,0.128649,0.160781,0.168871,0.168871,0.169099
3,BM25 >> SVR,0.056937,0.088639,0.112234,0.112234,0.055856,0.07976,0.092553,0.092553,0.093189
4,BM25 >> XGBoost (NDCG),0.08973,0.122453,0.132625,0.132625,0.08973,0.113634,0.119598,0.119598,0.119838
5,BM25 >> FastRank Coordinate Ascent,0.096937,0.125671,0.135766,0.135766,0.096937,0.117838,0.123874,0.123874,0.124042
6,BM25 >> FastRank Random Forest,0.083243,0.1161,0.128689,0.128689,0.083243,0.107387,0.114505,0.114505,0.114691


In [308]:
from pyterrier.measures import nDCG, RR, MAP

pt.Experiment(
    trained_models,
    prepared_trainqueries[:1000],
    prepared_train_qrels[:1000],
    names=names,
    eval_metrics=eval_metrics,
)

Unnamed: 0,name,nDCG@1,nDCG@3,nDCG@5,nDCG@10,RR@1,RR@3,RR@5,RR@10,AP
0,BM25,0.138,0.183701,0.195964,0.195964,0.138,0.173333,0.179833,0.179833,0.179583
1,LemurTF_IDF,0.121,0.162892,0.17787,0.17787,0.122,0.153833,0.161683,0.161683,0.1609
2,Hiemstra LM,0.128,0.168523,0.186895,0.186895,0.13,0.160167,0.170117,0.170117,0.168767
3,BM25 >> SVR,0.061,0.093428,0.112574,0.112574,0.061,0.085667,0.095667,0.095667,0.0955
4,BM25 >> XGBoost (NDCG),0.14,0.151833,0.153986,0.153986,0.142,0.15,0.15125,0.15125,0.15025
5,BM25 >> FastRank Coordinate Ascent,0.111,0.134011,0.139822,0.139822,0.111,0.1285,0.13175,0.13175,0.131417
6,BM25 >> FastRank Random Forest,0.093,0.123213,0.131863,0.131863,0.096,0.1175,0.1223,0.1223,0.1208
