In [1]:
from sentence_transformers import SentenceTransformer
from genshin_data import GenshinData
import nltk
import torch
import faiss
import numpy as np

In [2]:
genshin_data = GenshinData("characters_data.json", "lore_data.json")
chunks = genshin_data.get_all_chuncks()
len(chunks)

In [3]:
class FaissVectorDB:
    def __init__(self, dimension):
        self.index = faiss.IndexFlatL2(dimension)
        self.documents = []

    def add_documents(self, texts, embeddings):
        embeddings = np.array(embeddings).astype("float32")
        self.index.add(embeddings)
        self.documents.extend(texts)
        print("Added documents to the FAISS index")

    def search(self, query_embedding, top_k=5):
        query_embedding = np.array([query_embedding]).astype("float32")
        distances, indices = self.index.search(query_embedding, top_k)
        return [self.documents[idx] for idx in indices[0]], distances[0]

In [5]:
model = SentenceTransformer('all-MiniLM-L6-v2')

In [6]:
dimension = 384

faiss_db = FaissVectorDB(dimension)

embeddings = [model.encode(text).tolist() for text in chunks]
faiss_db.add_documents(chunks, embeddings)

In [13]:
query = "Give me story of Li Yue"
query_embedding = model.encode(query)
results, distances = faiss_db.search(query_embedding, top_k=10)

In [14]:
for i, (result, distance) in enumerate(zip(results, distances)):
    print(f"Rank {i+1}\n{result}\nDistance: {distance}\n")