In [1]:
import json
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

BATCH_SIZE = 4096
EMBED_MODEL = "intfloat/e5-base-v2"
INPUT_FILE = "data/parsed_pmc_2_chunked.jsonl"
FAISS_INDEX_PATH = "data/faiss_index.index"
METADATA_PATH = "data/faiss_metadata.jsonl"
SAVE_N_BATCHES = 10

In [2]:
model = SentenceTransformer(EMBED_MODEL)
index = None
metadata = []
batch_count = 0

In [None]:
def embed_texts(texts):
    embs = model.encode(texts, batch_size=128, show_progress_bar=False).astype('float32')
    norms = np.linalg.norm(embs, axis=1, keepdims=True)
    embs = embs / (norms + 1e-10)
    return embs


In [None]:
def save_checkpoint(index, metadata, index_path, metadata_path):
    faiss.write_index(index, index_path)
    # Append new metadata
    with open(metadata_path, "a") as fout:
        for meta in metadata:
            fout.write(json.dumps(meta) + "\n")

with open(INPUT_FILE, "r") as fin:
    batch_texts = []
    batch_meta = []
    for line in tqdm(fin, desc="Processing JSONL"):
        item = json.loads(line)
        if item['section'] == 'id':
            continue
        batch_texts.append(item['text'])
        batch_meta.append(item)
        if len(batch_texts) == BATCH_SIZE:
            embs = embed_texts(batch_texts)
            if index is None:
                dim = embs.shape[1]
                index = faiss.IndexFlatIP(dim)
            index.add(embs)
            metadata.extend(batch_meta)
            batch_texts, batch_meta = [], []
            batch_count += 1
            # Save after every N batches
            if batch_count % SAVE_N_BATCHES == 0:
                save_checkpoint(index, metadata, FAISS_INDEX_PATH, METADATA_PATH)
                metadata = []

    # Leftovers
    if batch_texts:
        embs = embed_texts(batch_texts)
        if index is None:
            dim = embs.shape[1]
            index = faiss.IndexFlatIP(dim)
        index.add(embs)
        metadata.extend(batch_meta)

    # save
    if metadata:
        save_checkpoint(index, metadata, FAISS_INDEX_PATH, METADATA_PATH)

Processing JSONL: 8797731it [19:43:43, 108.61it/s]

Ran yesterday for Processing JSONL: 8797731it [19:43:43, 108.61it/s]

## Experiment

In [4]:
index = faiss.read_index("data/faiss_index.index")
with open("data/faiss_metadata.jsonl") as f:
    meta = [json.loads(line) for line in f]

query = "What are the comparative outcomes of Tenon duplication versus dura mater covering techniques in Ahmed glaucoma valve implantation?"
query_emb = model.encode([query]).astype('float32')
D, I = index.search(query_emb, k=5)
#scores and indices of the nearest neighbors
print("Nearest neighbors:")
print("Distances:", D[0])
print("Indices:", I[0])
# Print the metadata for the nearest neighbors
print("Metadata for nearest neighbors:")
for idx in I[0]:
    print(meta[idx]['section'], meta[idx]['text'])



Nearest neighbors:
Distances: [0.97176313 0.9260005  0.9045711  0.90162516 0.89384025]
Indices: [    620     621  750243 1133490 2288789]
Metadata for nearest neighbors:
title Comparison of Tenon duplication with dura mater covering technique for Ahmed glaucoma valve implantation
abstract To compare the efficacy and complications of Tenon duplication with dura mater covering technique for Ahmed glaucoma valve (AGV) implantation. This retrospective study included 44 refractory glaucoma patients (44 eyes) who underwent AGV implantation from 2017 to 2020 in the Ophthalmology Clinic of Eskişehir Osmangazi University Hospital and attended regular postoperative follow-ups. The patients were divided based on whether they underwent Tenon duplication technique (group 1: n = 20) or dura mater covering technique (group 2: n = 24) during surgery. The patients’ age, gender, systemic diseases, glaucoma type, pre-op intraocular pressure (IOP), and ocular surgeries were recorded. The groups were compa

#### L2 Distance Output
Nearest neighbors:
Distances: [0.9354083  0.7911593  0.65694475 0.6328492  0.48136634]
Indices: [620 621 626 625 623]

#### Cosine Similarity with parsed_pmc_2_s_chunked » 200.000

Nearest neighbors:
Distances: [0.971763   0.9260006  0.8907722  0.88272965 0.8749901 ]
Indices: [620 621 625 626 624]
Metadata for nearest neighbors:
title Comparison of Tenon duplication with dura mater covering technique for Ahmed glaucoma valve implantation
abstract To compare the efficacy and complications of Tenon duplication with dura mater covering technique for Ahmed glaucoma valve (AGV) implantation. This retrospective study included 44 refractory glaucoma patients (44 eyes) who underwent AGV implantation from 2017 to 2020 in the Ophthalmology Clinic of Eskişehir Osmangazi University Hospital and attended regular postoperative follow-ups. The patients were divided based on whether they underwent Tenon duplication technique (group 1: n = 20) or dura mater covering technique (group 2: n = 24) during surgery. The patients’ age, gender, systemic diseases, glaucoma type, pre-op intraocular pressure (IOP), and ocular surgeries were recorded. The groups were compared for IOP level control, early and late complications, postoperative antiglaucomatous medication requirements, glaucoma surgery requirements, presence of postoperative hypertensive phase (HP), and surgical success which was defined as an IOP ≥5 and ≤21 mmHg, with or without antiglaucoma medication. By the end of the mean follow-up (22.6 ± 10.6 months), the success rates were 95% (group 1) and 96% (group 2). The groups showed no differences in postoperative complications, postoperative antiglaucomatous drugs’ onset time, additional glaucoma surgery, need for needling, presence of HP at 6 months postoperatively, and relationship between the glaucoma type and success rates (P values: 0.86, 0.9, 0.48, 0.12, 0.36, and 0.8, respectively). The IOP values at the last follow-up were 15.2 ± 4.1 in group 1 and 14.7 ± 4.8 in group 2. The IOP reduction rates showed no significant differences. Since success and complications are similar in both Tenon duplication and dura mater covering technique, unique grafting materials may not be needed in AGV implantation surgery, except in special cases.
discussion Gedar Totuk et al. tried to prevent AGV tube exposure by creating a long scleral flap augmented with Tenon advancement and duplication; they had no exposure in the 2-year follow-up. Since an exposure case was seen in the Tenon duplication group in our study, it can be thought that adding the long scleral flap technique to the Tenon duplication technique may have an effect on reducing exposure. However, long scleral tunnel or scleral flap techniques are not ideal for all cases, such as in patients who have previously undergone multiple ocular surgeries and therefore have very thin scleral tissue. In those patients, the graft material would be combined with Tenon duplication technique to prevent tube exposure. Our study showed no significant difference between the two techniques in terms of surgical success and complications. The finding that there was no statistically significant difference in the IOP-lowering effect of both techniques and in the incidence of HP in our study is a valuable new result. There are over 1000 articles about AGV in the literature, yet there is no clear consensus on the failure criteria of AGV implantation. Although some authors consider the requirement of needling as a failure, there are some who consider the lack of regulation of IOP despite needling as a failure. For example, in the review of Riva et al. and the study of Eibschitz-Tsimhoni et al., cases whose IOP could not be controlled despite medical treatment and 5-FU injection together with needling were considered unsuccessful. We do not consider needling as a failure, as all of the patients in our study who needed needling achieved the desired IOP values after needling and the values were found to be within normal ranges in their follow-up. The limitations of our study include its retrospective nature, nonrandomized design, and follow-up loss at later time points. The selection bias due to the retrospective nature of the study could affect the reliability and validity.
discussion The limitations of our study include its retrospective nature, nonrandomized design, and follow-up loss at later time points. The selection bias due to the retrospective nature of the study could affect the reliability and validity. Although differences between the groups, especially in terms of gender, follow-up time, systemic disease, and glaucoma subtype, may affect the outcome of the study, we think that the acceptability of the study continues because there was no significant difference between the two groups. A possibility of overestimating surgical failure exists, as patients with more severe pathologies may continue to visit the ophthalmologist for longer periods. An underestimation of surgical failure is also possible due to the short follow-up time. The study’s strengths include the fact that all the operations were performed at a single hospital by two qualified surgeons. Comparison of the Tenon duplication technique and the dura mater covering technique also had not been done before. In conclusion, the use of special allograft materials, which can prolong the operation time and increase the patient’s economic burden, may not be necessary in AGV implantation surgery, except in special cases. Not applicable. This study was approved by the ethics committee of Eskişehir Osmangazi University, and the study complied with the tenets of the Declaration of Helsinki. Nil. There are no conflicts of interest.
discussion Filtering surgery is usually the first surgical option for glaucoma treatment. GDDs, such as AGV, are often an option available to patients with glaucoma refractory to filtering surgeries. Although GDDs have been used frequently for years to reduce eye pressure, complications of AGV, such as tube exposure, choroidal detachment, hyphema, iris–lens touch, and encapsulation, can occur. Ou et al. reported that the most common complication after AGV implantation in patients with primary congenital glaucoma was tube–corneal touch. Tube exposure is also a significant AGV implantation problem, as it leads to both mechanical damage and inflammation-mediated melting of the self-tissue or the donor graft. Inferiorly placed implants, diabetes, presence of prior inflammation, previous ocular surgery, and younger age are well-known risk factors for tube exposure associated with GDDs. In addition, there are studies claiming that previous eye surgery is not a risk factor for exposure. Untreated cases of tube exposure are well known to lead to possible endophthalmitis, with a poor prognosis. Recognizing these complications of AGV tube, we investigated the methods and modifications that could potentially prevent exposure and maintain IOP reduction during the postoperative period. Although a number of surgical methods have been described to prevent tube exposure, such as placement of patch graft (e.g., fascia lata, pericardium, donor sclera, or lyophilized dura mater), lengthy scleral tunnel, and/or duplication and advancement of Tenon’s tissue, there is no consensus on the best approach. Tamcelik et al. divided their patients into three groups according to the covering techniques used during AGV implantation. They did not see any exposure in the Tenon duplication group and claimed it to be the safest method. Gedar Totuk et al. tried to prevent AGV tube exposure by creating a long scleral flap augmented with Tenon advancement and duplication; they had no exposure in the 2-year follow-up.

Last outcome with newest index which is not fully embedded
Nearest neighbors:
Distances: [0.97176313 0.9260005  0.9045711  0.90162516 0.89384025]
Indices: [    620     621  750243 1133490 2288789]
Metadata for nearest neighbors:
title Comparison of Tenon duplication with dura mater covering technique for Ahmed glaucoma valve implantation
abstract To compare the efficacy and complications of Tenon duplication with dura mater covering technique for Ahmed glaucoma valve (AGV) implantation. This retrospective study included 44 refractory glaucoma patients (44 eyes) who underwent AGV implantation from 2017 to 2020 in the Ophthalmology Clinic of Eskişehir Osmangazi University Hospital and attended regular postoperative follow-ups. The patients were divided based on whether they underwent Tenon duplication technique (group 1: n = 20) or dura mater covering technique (group 2: n = 24) during surgery. The patients’ age, gender, systemic diseases, glaucoma type, pre-op intraocular pressure (IOP), and ocular surgeries were recorded. The groups were compared for IOP level control, early and late complications, postoperative antiglaucomatous medication requirements, glaucoma surgery requirements, presence of postoperative hypertensive phase (HP), and surgical success which was defined as an IOP ≥5 and ≤21 mmHg, with or without antiglaucoma medication. By the end of the mean follow-up (22.6 ± 10.6 months), the success rates were 95% (group 1) and 96% (group 2). The groups showed no differences in postoperative complications, postoperative antiglaucomatous drugs’ onset time, additional glaucoma surgery, need for needling, presence of HP at 6 months postoperatively, and relationship between the glaucoma type and success rates (P values: 0.86, 0.9, 0.48, 0.12, 0.36, and 0.8, respectively). The IOP values at the last follow-up were 15.2 ± 4.1 in group 1 and 14.7 ± 4.8 in group 2. The IOP reduction rates showed no significant differences. Since success and complications are similar in both Tenon duplication and dura mater covering technique, unique grafting materials may not be needed in AGV implantation surgery, except in special cases.
title Surgical Outcomes of Different Ahmed Glaucoma Valve Implantation Methods between Scleral Graft and Scleral Flap
title Clinical Outcomes after Combined Ahmed Glaucoma Valve Implantation and Penetrating Keratoplasty or Pars Plana Vitrectomy
title Outcomes of Ahmed glaucoma valve implantation in advanced primary congenital glaucoma with previous surgical failure