#### Importing libraries

In [2]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pickle
import os


  from .autonotebook import tqdm as notebook_tqdm


#### Loading the .parquet file

In [4]:
df = pd.read_parquet('vectorized_medical_book_chunks.parquet')
df.head()


Unnamed: 0,book_name,chunk_id,chunk_text,chunk_length,embedding
0,C:/Users/Ayush Jindal/Downloads/ROUTINE BLOOD ...,C:/Users/Ayush_Jindal/Downloads/ROUTINE_BLOOD_...,Routine Blood Results Explained Routine Blood ...,495,"[-0.033440527, 0.016502352, 0.006819937, 0.039..."
1,C:/Users/Ayush Jindal/Downloads/ROUTINE BLOOD ...,C:/Users/Ayush_Jindal/Downloads/ROUTINE_BLOOD_...,book is available from the British Library Cop...,128,"[-0.009590683, -0.026081773, -0.042963974, 0.0..."
2,C:/Users/Ayush Jindal/Downloads/ROUTINE BLOOD ...,C:/Users/Ayush_Jindal/Downloads/ROUTINE_BLOOD_...,"No pa rt of this book may be reproduced, store...",440,"[0.007997773, 0.04616743, -0.07028714, -0.0291..."
3,C:/Users/Ayush Jindal/Downloads/ROUTINE BLOOD ...,C:/Users/Ayush_Jindal/Downloads/ROUTINE_BLOOD_...,5 Part 2: Immunology Objectives and Scope .......,487,"[-0.025172405, -0.029858258, -0.05281477, -0.0..."
4,C:/Users/Ayush Jindal/Downloads/ROUTINE BLOOD ...,C:/Users/Ayush_Jindal/Downloads/ROUTINE_BLOOD_...,"92 : Calcium, Bone, and Mu sculo-Skeletal Dise...",485,"[-0.062301837, 0.054388817, -0.044433784, -0.0..."


#### Loading the chunk_text column for use

In [5]:
texts = df['chunk_text'].astype(str).tolist()
print(f"Loaded {len(texts)} text chunks.")

Loaded 58762 text chunks.


#### Loading the embedding model

In [6]:
model = SentenceTransformer('all-MiniLM-L6-v2')

#### Generating the embeddings

In [7]:
embeddings = model.encode(texts, show_progress_bar=True, convert_to_numpy=True)
print("Generated embeddings of shape:", embeddings.shape)

Batches: 100%|██████████| 1837/1837 [01:15<00:00, 24.28it/s]


Generated embeddings of shape: (58762, 384)


#### Creating a FAISS index from embeddings

In [8]:
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

print(f"FAISS index built with {index.ntotal} vectors.")

FAISS index built with 58762 vectors.


#### saving the faiss index and metadata

In [11]:
# Save FAISS index
faiss.write_index(index, "db_faiss/faiss_index_chunk_text.faiss")

# Save metadata (texts, and optionally chunk_length if needed later)
with open("db_faiss/chunk_texts.pkl", "wb") as f:
    pickle.dump(texts, f)

with open("db_faiss/chunk_lengths.pkl", "wb") as f:
    pickle.dump(df['chunk_length'].tolist(), f)

print("Saved FAISS index and metadata (texts + lengths).")

Saved FAISS index and metadata (texts + lengths).


#### Test Search

In [12]:
# Load index
index = faiss.read_index("db_faiss/faiss_index_chunk_text.faiss")

# Load texts
with open("db_faiss/chunk_texts.pkl", "rb") as f:
    texts = pickle.load(f)

# Embed and search
query = "what is heart attack"
query_embedding = model.encode([query])

D, I = index.search(np.array(query_embedding), k=5)

# Show top 5 matched chunks
for idx in I[0]:
    print(texts[idx])


Etiology Cardiac sources of chest pain: Ischemic / coronary heart disease —ACSs (STEMI, NSTEMI, UA), stable angina pectoris Ischemic/nonatherosclerotic—aortic stenosis, hypertrophic cardiomyopathy (HCM), severe systemic hypertension, right ventricular hypertension, aortic regurgitation, severe anemia, coronary vasospasm, anatomical abnormalities Inflammatory —pericarditis, infectious and autoimmune vasculitis Hyperadrenergic states —stress cardiomyopathy, severe hypertension, pheochromocytoma
MYOCARDITIS Definition Myocarditis is an inflammatory disease of heart muscle due to infectious and noninfectious etiologies with potential long-term sequela of DCM.

Most common etiology in developed countries is viral infection (coxsackie, echovirus, adenovirus, HIV, CMV, parvovirus B19), with rheumatic carditis, Trypanosoma cruzi (more likely to present as chronic cardiomyopathy), and bacterial infections still contributing substantially to cases in the developing world.
Chest pain, dyspnea, sy