# CSX Re-ranking

The purpose of this notebook is to demonstrate how to do re-ranking with CSX ES index

This includes all the methods required for the re-ranking process, including:

 1. 

## Port forward to local

```
ssh -N -f -L localhost:9200:localhost:9200 csxesm03.ist.psu.edu
```

# Run embedding service

```
python api/query_embedding_service.py
```

In [3]:
import pandas as pd
from elasticsearch import Elasticsearch
from transformers import BertAdapterModel
from transformers import AutoTokenizer, AutoModel
import torch
from tqdm import tqdm
from utils import get_embeddings, get_es_results, get_knn_results, reciprocal_rank_fusion, retrieve_docs, es

# import bulk helper from elasticsearch
from elasticsearch.helpers import bulk
# use cuda device 2 for huggingface
import os
os.environ["CUDA_VISIBLE_DEVICES"]="2"

In [4]:
# create Elasticsearch client
es = Elasticsearch()

if es.ping():
    print("Connected to Elasticsearch")
else:
    print("Could not connect to Elasticsearch")

Connected to Elasticsearch


In [8]:
SEARCH_QUERY = 'machine learning'
K = 500
INDEX_NAME = 'csx_citeseer_docs_old_pubinfo'
EMBEDDING_INDEX_NAME = 'csx_citeseer_docs_old_pubinfo_embeddings'

In [9]:
query_embedding = get_embeddings([SEARCH_QUERY])

In [10]:
es_results = get_es_results(INDEX_NAME, SEARCH_QUERY, K)

In [11]:
knn_results = get_knn_results(EMBEDDING_INDEX_NAME, query_embedding, K)

In [12]:
fused_list = reciprocal_rank_fusion(es_results, knn_results)


In [15]:
fused_list

[{'_id': 'byaKvoMByhvPsGt1TOaa',
  '_score': 74.078674,
  '_score_normalized': 1.0},
 {'_id': '77bc234ce9bdaa85d1670fb519c9b91563eaa656',
  '_score': 1.8241123,
  '_score_normalized': 1.0},
 {'_id': 'b6882d4a73b417f76f942b7c664752954c0dd88c',
  '_score': 1.8223401,
  '_score_normalized': 0.99902845893863},
 {'_id': '1d47fd0a05a3fb942a7c851e34c72c9971aed0af',
  '_score': 1.81422,
  '_score_normalized': 0.9945769238001411},
 {'_id': 'bd6f92f0627dafabc0bdfd2c544a5b7c84bc3213',
  '_score': 1.8138039,
  '_score_normalized': 0.9943488128444724},
 {'_id': '8395ed44e3d2ee5636cf3d81a3d4283c37195df1',
  '_score': 1.8086896,
  '_score_normalized': 0.9915450929199918},
 {'_id': '85aeeae62622f3d0139b4b5b2162bbf04a180a19',
  '_score': 1.8081241,
  '_score_normalized': 0.9912350791121797},
 {'_id': '2e3955accf1e1719ab9b3a56742bf09bff1de660',
  '_score': 1.8074265,
  '_score_normalized': 0.9908526465174321},
 {'_id': '03ea82f015b86de55cb7636f650373b7f49f5fea',
  '_score': 1.8073379,
  '_score_normaliz