# Vector Index ConstructionImplemented based on src/index_builder.py

In [None]:
# 1. Environment Setup
!pip install -q faiss-gpu sentence-transformers
from google.colab import drive
drive.mount('/content/drive')

import os
PROJECT_PATH = "/content/drive/MyDrive/CS6120_project"
os.chdir(PROJECT_PATH)

# GPU detection
import torch
print(f"Available GPU: {torch.cuda.is_available()}")

In [None]:
# 2. Load fine-tuned SBERT model
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("models/sbert_model")
model.to('cuda' if torch.cuda.is_available() else 'cpu')
print("Model loaded successfully")

In [None]:
# 3. Build FAISS index (based on src/index_builder.py)
import faiss
import numpy as np
from tqdm import tqdm

# Load data
with open("data/processed/combined.json") as f:
    data = json.load(f)["train"]

# Batch encoding
batch_size = 512
embeddings = []
for i in tqdm(range(0, len(data), batch_size)):
    batch = data[i:i+batch_size]
    emb = model.encode(batch, show_progress_bar=False)
    embeddings.append(emb)

embeddings = np.vstack(embeddings)
dimension = embeddings.shape[1]

# Create FAISS index
index = faiss.IndexFlatIP(dimension)
if torch.cuda.is_available():
    res = faiss.StandardGpuResources()
    index = faiss.index_cpu_to_gpu(res, 0, index)

index.add(embeddings)
print(f"Index built with {index.ntotal} vectors")

In [None]:
# 4. Save index
os.makedirs("indices", exist_ok=True)
faiss.write_index(faiss.index_gpu_to_cpu(index), "indices/sbert_faiss.index")
print("Index saved to indices/sbert_faiss.index")