# Initialize Embedding Model

In [1]:
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import FAISS
from langchain.embeddings.base import Embeddings
import torch
from langchain.schema import Document

# Load SBERT model dan pastikan menggunakan GPU jika tersedia
device = 'cuda' if torch.cuda.is_available() else 'cpu'
sbert_model = model = SentenceTransformer('naufalihsan/indonesian-sbert-large')
sbert_model = sbert_model.to(device)  # Pindahkan model ke GPU (jika ada)

# Custom embeddings class for SBERT
class SBERTEmbeddings(Embeddings):
    def embed_documents(self, texts: list[str]) -> list[list[float]]:
        # Menggunakan model SBERT untuk menghasilkan embeddings
        embeddings = sbert_model.encode(texts, convert_to_tensor=True, show_progress_bar=True)
        embeddings = embeddings.to(device)  # Pindahkan embeddings ke GPU (jika ada)
        return embeddings.cpu().numpy().tolist()  # Pindahkan kembali ke CPU untuk konversi

    def embed_query(self, query: str) -> list[float]:
        # Menghasilkan embedding untuk query
        embedding = sbert_model.encode(query, convert_to_tensor=True)
        embedding = embedding.to(device)  # Pindahkan embedding ke GPU (jika ada)
        return embedding.cpu().numpy().tolist()  # Pindahkan kembali ke CPU untuk konversi

# Inisialisasi embeddings SBERT dan FAISS vector store
sbert_embeddings = SBERTEmbeddings()



  from .autonotebook import tqdm as notebook_tqdm


# Initialize Vector Store

In [2]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

embedding_dim = len(sbert_embeddings.embed_query("hello world"))
index = faiss.IndexFlatL2(embedding_dim)

vector_store = FAISS(
    embedding_function=sbert_embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

# Load Document and Embed

In [4]:
import pickle
from langchain.vectorstores import FAISS
from langchain.schema import Document


list_of_docs = [
    'Data/Docs/docs3.pkl',
    'Data/Docs/docs4.pkl',
]

# Loop semua file pkl
for doc_path in list_of_docs:
    with open(doc_path, 'rb') as f:
        loaded_docs = pickle.load(f)  # <- isinya list[Document]
        
        # Tambahkan ke vectorstore
        vector_store.add_documents(loaded_docs)

# Setelah semua ditambahkan, simpan ke FAISS
vector_store.save_local("Embeddings")


Batches: 100%|██████████| 15/15 [00:23<00:00,  1.57s/it]
Batches: 100%|██████████| 3/3 [00:03<00:00,  1.15s/it]
