In [1]:
!pip install datasets transformers sentence-transformers
!pip install git+https://git@github.com/pinecone-io/pinecone-python-client.git

Collecting datasets
  Downloading datasets-2.14.5-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.6/519.6 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers
  Downloading transformers-4.34.0-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m83.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentence-transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manyl

In [2]:
from datasets import load_dataset

pubmed = load_dataset(
    "pubmed_qa",
    "pqa_labeled",
    split="train"
)

pubmed

Downloading builder script:   0%|          | 0.00/11.1k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/12.7k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/4.59k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/709k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/152M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/533M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/1000 [00:00<?, ? examples/s]

Dataset({
    features: ['pubid', 'question', 'context', 'long_answer', 'final_decision'],
    num_rows: 1000
})

In [3]:
pubmed[0]["pubid"], pubmed[0]["context"]

(21645374,
 {'contexts': ['Programmed cell death (PCD) is the regulated death of cells within an organism. The lace plant (Aponogeton madagascariensis) produces perforations in its leaves through PCD. The leaves of the plant consist of a latticework of longitudinal and transverse veins enclosing areoles. PCD occurs in the cells at the center of these areoles and progresses outwards, stopping approximately five cells from the vasculature. The role of mitochondria during PCD has been recognized in animals; however, it has been less studied during PCD in plants.',
   'The following paper elucidates the role of mitochondrial dynamics during developmentally regulated PCD in vivo in A. madagascariensis. A single areole within a window stage leaf (PCD is occurring) was divided into three areas based on the progression of PCD; cells that will not undergo PCD (NPCD), cells in early stages of PCD (EPCD), and cells in late stages of PCD (LPCD). Window stage leaves were stained with the mitochondr

In [4]:
limit = 384

def chunker(contexts):
    chunks = []
    all_contexts = " ".join(contexts).split(".")
    chunk = []
    for context in all_contexts:
        chunk.append(context)
        if len(chunk) >= 3 and len(".".join(chunk)) > limit:
            chunks.append(".".join(chunk).strip() + ".")
            chunk = chunk[-2:]
    if chunk is not None:
        chunks.append(".".join(chunk))
    return chunks

In [5]:
chunks = chunker(pubmed[0]["context"]["contexts"])
chunks

['Programmed cell death (PCD) is the regulated death of cells within an organism. The lace plant (Aponogeton madagascariensis) produces perforations in its leaves through PCD. The leaves of the plant consist of a latticework of longitudinal and transverse veins enclosing areoles. PCD occurs in the cells at the center of these areoles and progresses outwards, stopping approximately five cells from the vasculature.',
 'The leaves of the plant consist of a latticework of longitudinal and transverse veins enclosing areoles. PCD occurs in the cells at the center of these areoles and progresses outwards, stopping approximately five cells from the vasculature. The role of mitochondria during PCD has been recognized in animals; however, it has been less studied during PCD in plants. The following paper elucidates the role of mitochondrial dynamics during developmentally regulated PCD in vivo in A.',
 'The role of mitochondria during PCD has been recognized in animals; however, it has been less

In [6]:
ids = []
for i in range(len(chunks)):
    ids.append(f"{pubmed[0]['pubid']}-{i}")
ids

['21645374-0',
 '21645374-1',
 '21645374-2',
 '21645374-3',
 '21645374-4',
 '21645374-5',
 '21645374-6']

In [7]:
data = []
for record in pubmed:
    chunks = chunker(record["context"]["contexts"])
    for i, context in enumerate(chunks):
        data.append({
            "id": f"{record['pubid']}-{i}",
            "context": context
        })

data[:5]

[{'id': '21645374-0',
  'context': 'Programmed cell death (PCD) is the regulated death of cells within an organism. The lace plant (Aponogeton madagascariensis) produces perforations in its leaves through PCD. The leaves of the plant consist of a latticework of longitudinal and transverse veins enclosing areoles. PCD occurs in the cells at the center of these areoles and progresses outwards, stopping approximately five cells from the vasculature.'},
 {'id': '21645374-1',
  'context': 'The leaves of the plant consist of a latticework of longitudinal and transverse veins enclosing areoles. PCD occurs in the cells at the center of these areoles and progresses outwards, stopping approximately five cells from the vasculature. The role of mitochondria during PCD has been recognized in animals; however, it has been less studied during PCD in plants. The following paper elucidates the role of mitochondrial dynamics during developmentally regulated PCD in vivo in A.'},
 {'id': '21645374-2',
  '

In [8]:
from sentence_transformers import SentenceTransformer
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

dense_model = SentenceTransformer(
    "msmarco-bert-base-dot-v5",
    device=device
)
dense_model

Downloading (…)8df09/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)50dc78df09/README.md:   0%|          | 0.00/6.14k [00:00<?, ?B/s]

Downloading (…)dc78df09/config.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/54.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)8df09/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/461 [00:00<?, ?B/s]

Downloading (…)df09/train_script.py:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

Downloading (…)50dc78df09/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)c78df09/modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
)

In [9]:
emb = dense_model.encode(data[0]["context"])
emb.shape

(768,)

In [10]:
dim = dense_model.get_sentence_embedding_dimension()
dim

768

In [11]:
!pip install git+https://github.com/naver/splade.git
from splade.models.transformer_rep import Splade

sparse_model_id = "naver/splade-cocondenser-ensembledistil"

sparse_model = Splade(sparse_model_id, agg="max")
sparse_model.to(device)
sparse_model.eval()

Collecting git+https://github.com/naver/splade.git
  Cloning https://github.com/naver/splade.git to /tmp/pip-req-build-gr17gzzw
  Running command git clone --filter=blob:none --quiet https://github.com/naver/splade.git /tmp/pip-req-build-gr17gzzw
  Resolved https://github.com/naver/splade.git to commit 3781228d5f07e7a6ae14a479e469a715de79e976
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting transformers==4.18.0 (from SPLADE==2.1)
  Downloading transformers-4.18.0-py3-none-any.whl (4.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.0/4.0 MB[0m [31m37.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting omegaconf==2.1.2 (from SPLADE==2.1)
  Downloading omegaconf-2.1.2-py3-none-any.whl (74 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m74.7/74.7 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting antlr4-python3-runtime==4.8 (from omegaconf==2.1.2->SPLADE==2.1)
  Downloading antlr4-python3-runtime-4.8.tar.gz (112 kB)
[2K

Downloading (…)lve/main/config.json:   0%|          | 0.00/670 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/466 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [12]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(sparse_model_id)

tokens = tokenizer(data[0]["context"], return_tensors="pt")
tokens

{'input_ids': tensor([[  101, 16984,  3526,  2331,  1006,  7473,  2094,  1007,  2003,  1996,
         12222,  2331,  1997,  4442,  2306,  2019, 15923,  1012,  1996, 12922,
          3269,  1006,  9706, 17175, 18150,  2239, 11934, 27806,  1007,  7137,
          2566, 29278, 10708,  1999,  2049,  3727,  2083,  7473,  2094,  1012,
          1996,  3727,  1997,  1996,  3269,  8676,  1997,  1037, 17779,  6198,
          1997, 20134,  1998, 18323,  9607,  4372, 20464, 18606,  2024, 29111,
          1012,  7473,  2094,  5158,  1999,  1996,  4442,  2012,  1996,  2415,
          1997,  2122,  2024, 29111,  1998, 22901, 15436,  2015,  1010,  7458,
          3155,  2274,  4442,  2013,  1996, 12436, 28817, 20051,  5397,  1012,
           102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [13]:
with torch.no_grad():
    sparse_emb = sparse_model(d_kwargs=tokens.to(device))["d_rep"].squeeze()

sparse_emb.shape

torch.Size([30522])

In [14]:
sparse_emb

tensor([0., 0., 0.,  ..., 0., 0., 0.], device='cuda:0')

In [15]:
indices = sparse_emb.nonzero().squeeze().cpu().tolist()
print(len(indices))

173


In [16]:
values = sparse_emb[indices].cpu().tolist()
sparse = {"indices": indices, "values": values}
sparse

{'indices': [1000,
  1039,
  1052,
  1997,
  1999,
  2003,
  2024,
  2049,
  2083,
  2094,
  2173,
  2239,
  2278,
  2290,
  2306,
  2331,
  2415,
  2427,
  2523,
  2537,
  2550,
  2565,
  2566,
  2597,
  2644,
  2754,
  2757,
  2832,
  2974,
  3030,
  3081,
  3102,
  3252,
  3269,
  3274,
  3280,
  3370,
  3392,
  3399,
  3508,
  3526,
  3571,
  3581,
  3628,
  3727,
  3740,
  3817,
  3965,
  3968,
  4264,
  4295,
  4372,
  4442,
  4456,
  4574,
  4649,
  4717,
  4730,
  4758,
  4775,
  4870,
  4962,
  4963,
  5080,
  5104,
  5258,
  5397,
  5701,
  5708,
  5920,
  5996,
  6198,
  6210,
  6215,
  6310,
  6418,
  6470,
  6531,
  6546,
  6580,
  6897,
  7053,
  7337,
  7366,
  7403,
  7473,
  7609,
  7691,
  7775,
  7816,
  8475,
  8676,
  8715,
  8761,
  8765,
  8872,
  8979,
  9007,
  9232,
  9448,
  9607,
  9706,
  9890,
  9895,
  9915,
  10012,
  10088,
  10244,
  10267,
  10327,
  10507,
  10708,
  10738,
  11503,
  11568,
  11704,
  11767,
  11798,
  11829,
  11934,
  12222,
  124

In [17]:
idx2token = {idx: token for token, idx in tokenizer.get_vocab().items()}

In [18]:
sparse_dict_tokens = {idx2token[idx]: round(weight, 2) for idx, weight in zip(indices, values)}

sparse_dict_tokens = {
    k: v for k, v in sorted(
        sparse_dict_tokens.items(),
        key=lambda item: item[1],
        reverse=True
    )
}

sparse_dict_tokens

{'pc': 3.02,
 'lace': 2.95,
 'programmed': 2.36,
 '##for': 2.28,
 'madagascar': 2.26,
 'death': 1.96,
 '##d': 1.95,
 'lattice': 1.81,
 'cell': 1.69,
 '##iensis': 1.64,
 'malaga': 1.6,
 '##get': 1.56,
 'regulated': 1.53,
 'die': 1.51,
 'lacey': 1.5,
 '##ono': 1.46,
 '##ole': 1.45,
 '##oles': 1.45,
 '##scu': 1.39,
 'transverse': 1.38,
 'leaves': 1.34,
 'cells': 1.31,
 'longitudinal': 1.31,
 'plant': 1.21,
 'plants': 1.16,
 'leaf': 1.15,
 'ap': 1.14,
 'organism': 1.11,
 'per': 1.1,
 'regulation': 1.03,
 'veins': 1.02,
 'organisms': 1.0,
 '##work': 0.99,
 'are': 0.94,
 'modified': 0.93,
 'controlled': 0.92,
 'dead': 0.9,
 'occur': 0.9,
 'disorder': 0.87,
 'program': 0.82,
 '##lat': 0.81,
 'through': 0.76,
 '##cl': 0.74,
 'computer': 0.71,
 '##ations': 0.7,
 'abbreviation': 0.69,
 'produced': 0.67,
 'is': 0.65,
 'center': 0.63,
 '"': 0.62,
 'produce': 0.62,
 'technology': 0.61,
 'process': 0.6,
 '##osing': 0.59,
 'matt': 0.54,
 'cc': 0.54,
 '##ation': 0.53,
 'outward': 0.53,
 'gage': 0.52,


In [19]:
import pinecone

def builder(records):
    ids = [x["id"] for x in records]
    contexts = [x["context"] for x in records]

    dense_vecs = dense_model.encode(contexts).tolist()

    input_ids = tokenizer(
        contexts,
        return_tensors="pt",
        padding=True,
        truncation=True
    )

    with torch.no_grad():
        sparse_vecs = sparse_model(d_kwargs=input_ids.to(device))["d_rep"].squeeze()

    upserts = []
    for _id, dense_vec, sparse_vec, context in zip(ids, dense_vecs, sparse_vecs, contexts):
        indices = sparse_vec.nonzero().squeeze().cpu().tolist()
        values = sparse_vec[indices].cpu().tolist()
        sparse_values = {
            "indices": indices,
            "values": values
        }
        metadata = {"context": context}
        upserts.append({
            "id": _id,
            "values": dense_vec,
            "sparse_values": sparse_values,
            "metadata": metadata
        })

    return upserts

In [20]:
builder(data[:3])

[{'id': '21645374-0',
  'values': [-0.0860980972647667,
   -0.06404605507850647,
   -0.09067439287900925,
   -0.13883446156978607,
   0.40349075198173523,
   0.04510989040136337,
   0.17842265963554382,
   0.008637930266559124,
   0.39867380261421204,
   -0.12001233547925949,
   -0.055883314460515976,
   0.1040591150522232,
   -0.5984246730804443,
   0.4460744261741638,
   0.07607370615005493,
   0.718574583530426,
   0.13898858428001404,
   -0.03241853415966034,
   0.05966181308031082,
   0.05813855305314064,
   -0.14696815609931946,
   0.02058224566280842,
   0.7175166606903076,
   0.26266899704933167,
   0.18689090013504028,
   -0.27962222695350647,
   -0.4334171712398529,
   -0.36501309275627136,
   -0.4082491993904114,
   0.4922325313091278,
   -0.04993252828717232,
   -0.3248228430747986,
   0.14582324028015137,
   -0.21379928290843964,
   0.06254779547452927,
   -0.031296759843826294,
   -0.5419846177101135,
   -0.1686738133430481,
   -0.44803133606910706,
   -0.0754419788718223

In [21]:
import pinecone

pinecone.init(
    api_key="",
    environment=""
)

In [22]:
index_name = "pubmed-splade"

pinecone.create_index(
    index_name,
    dimension=dim,
    metric="dotproduct",
    pod_type="s1"
)

In [24]:
index = pinecone.Index(index_name)
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [25]:
index.upsert(builder(data[:5]))

{'upserted_count': 5}

In [26]:
from tqdm.auto import tqdm

batch_size = 32

for i in tqdm(range(0, len(data), batch_size)):
    i_end = min(i + batch_size, len(data))
    batch = data[i:i_end]
    index.upsert(builder(data[i:i + batch_size]))

  0%|          | 0/186 [00:00<?, ?it/s]

In [27]:
len(data), index.describe_index_stats()

(5930,
 {'dimension': 768,
  'index_fullness': 0.1,
  'namespaces': {'': {'vector_count': 5930}},
  'total_vector_count': 5930})

In [28]:
def encode(text):
    dense_vec = dense_model.encode(text).tolist()
    input_ids = tokenizer(text, return_tensors="pt")
    with torch.no_grad():
        sparse_vec = sparse_model(
            d_kwargs=input_ids.to(device)
        )["d_rep"].squeeze()
    indices = sparse_vec.nonzero().squeeze().cpu().tolist()
    values = sparse_vec[indices].cpu().tolist()
    sparse_dict = {
        "indices": indices,
        "values": values
    }
    return dense_vec, sparse_dict

In [29]:
query = "Can clinicians use the PHQ-9 to assess depression in people with vision loss?"
dense, sparse = encode(query)

xc = index.query(
    vector=dense,
    sparse_vector=sparse,
    top_k=2,
    include_metadata=True
)
xc

{'matches': [{'id': '19156007-0',
              'metadata': {'context': 'To investigate whether the Patient '
                                      'Health Questionnaire-9 (PHQ-9) '
                                      'possesses the essential psychometric '
                                      'characteristics to measure depressive '
                                      'symptoms in people with visual '
                                      'impairment. The PHQ-9 scale was '
                                      'completed by 103 participants with low '
                                      'vision. These data were then assessed '
                                      'for fit to the Rasch model. The '
                                      "participants' mean +/- standard "
                                      'deviation (SD) age was 74.7 +/- 12.2 '
                                      'years.'},
              'score': 203.74826,
              'values': []},
             {'id': '

In [30]:
def hybrid_scale(dense, sparse, alpha: float):
    if alpha < 0 or alpha > 1:
        raise ValueError("Alpha must be between 0 and 1")
    hsparse = {
        "indices": sparse["indices"],
        "values": [v * (1 - alpha) for v in sparse["values"]]
    }
    hdense = [v * alpha for v in dense]
    return hdense, hsparse

In [31]:
hdense, hsparse = hybrid_scale(dense, sparse, alpha=1.0)

xc = index.query(
    vector=hdense,
    sparse_vector=hsparse,
    top_k=2,
    include_metadata=True
)
xc

{'matches': [{'id': '19156007-0',
              'metadata': {'context': 'To investigate whether the Patient '
                                      'Health Questionnaire-9 (PHQ-9) '
                                      'possesses the essential psychometric '
                                      'characteristics to measure depressive '
                                      'symptoms in people with visual '
                                      'impairment. The PHQ-9 scale was '
                                      'completed by 103 participants with low '
                                      'vision. These data were then assessed '
                                      'for fit to the Rasch model. The '
                                      "participants' mean +/- standard "
                                      'deviation (SD) age was 74.7 +/- 12.2 '
                                      'years.'},
              'score': 181.907089,
              'values': []},
             {'id': 

In [32]:
hdense, hsparse = hybrid_scale(dense, sparse, alpha=0.0)

xc = index.query(
    vector=hdense,
    sparse_vector=hsparse,
    top_k=2,
    include_metadata=True
)
xc

{'matches': [{'id': '19156007-0',
              'metadata': {'context': 'To investigate whether the Patient '
                                      'Health Questionnaire-9 (PHQ-9) '
                                      'possesses the essential psychometric '
                                      'characteristics to measure depressive '
                                      'symptoms in people with visual '
                                      'impairment. The PHQ-9 scale was '
                                      'completed by 103 participants with low '
                                      'vision. These data were then assessed '
                                      'for fit to the Rasch model. The '
                                      "participants' mean +/- standard "
                                      'deviation (SD) age was 74.7 +/- 12.2 '
                                      'years.'},
              'score': 21.8411713,
              'values': []},
             {'id': 

In [33]:
query = "Does ibuprofen increase perioperative blood loss during hip arthroplasty?"

dense, sparse = encode(query)
hdense, hsparse = hybrid_scale(dense, sparse, alpha=0.0)

xc = index.query(
    vector=hdense,
    sparse_vector=hsparse,
    top_k=2,
    include_metadata=True
)
xc

{'matches': [{'id': '12442934-0',
              'metadata': {'context': 'To determine whether prior exposure of '
                                      'non-steroidal anti-inflammatory drugs '
                                      'increases perioperative blood loss '
                                      'associated with major orthopaedic '
                                      'surgery. Fifty patients scheduled for '
                                      'total hip replacement were allocated to '
                                      'two groups (double blind, randomized '
                                      'manner). All patients were pretreated '
                                      'for 2 weeks before surgery: Group 1 '
                                      'with placebo drug, Group 2 with '
                                      'ibuprofen. All patients were injected '
                                      'intrathecally with bupivacaine 20mg '
                                 

In [34]:
query = "Does ibuprofen increase perioperative blood loss during hip arthroplasty?"

dense, sparse = encode(query)
hdense, hsparse = hybrid_scale(dense, sparse, alpha=1.0)

xc = index.query(
    vector=hdense,
    sparse_vector=hsparse,
    top_k=2,
    include_metadata=True
)
xc

{'matches': [{'id': '12442934-3',
              'metadata': {'context': ' The perioperative blood loss increased '
                                      'by 45% in the ibuprofen group compared '
                                      'with placebo. The total (+/-SD) blood '
                                      'loss in the ibuprofen group was 1161 '
                                      '(+/-472) mL versus 796 (+/-337) mL in '
                                      'the placebo group.'},
              'score': 177.406326,
              'values': []},
             {'id': '12442934-0',
              'metadata': {'context': 'To determine whether prior exposure of '
                                      'non-steroidal anti-inflammatory drugs '
                                      'increases perioperative blood loss '
                                      'associated with major orthopaedic '
                                      'surgery. Fifty patients scheduled for '
                     

In [35]:
pinecone.delete_index(index_name)