In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
import os
from langchain_nomic import NomicEmbeddings
from dotenv import load_dotenv
# from openai.error import RateLimitError
from langchain_openai import ChatOpenAI
import networkx as nx
import fitz
from typing import List
import numpy as np
from knowledge_graph import KnowledgeGraph
from query_engine import QueryEngine
from langchain.document_loaders import  PyPDFLoader

load_dotenv()  # Load environment variables from .env file

api_key = os.getenv("NOMIC_API_KEY")

path = "/home/name-1/AI-Agent/RAG_Project/RAG_Project/data/Understanding_Climate_Change.pdf"
loader = PyPDFLoader(path)
documents = loader.load()
documents = documents[:10]

# DocumentProcessor's process_documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
embeddings =  NomicEmbeddings(model="nomic-embed-text-v1.5",)
splits = text_splitter.split_documents(documents)
vector_store = FAISS.from_documents(splits, embeddings)
graph = nx.Graph()

knowledge_graph = KnowledgeGraph()
llm = ChatOpenAI(
                model="lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF",
                base_url="http://10.2.125.37:1234/v1",
                api_key="lm-studio"
            )

In [5]:
splits = text_splitter.split_documents(documents)
vector_store = FAISS.from_documents(splits, embeddings)

In [6]:
for i, split in enumerate(splits):
    graph.add_node(i, content=split.page_content)

In [7]:
texts = [split.page_content for split in splits]
create_embedding = embeddings.embed_documents(texts)

In [None]:
with ThreadPoolExecutor() as executor:
    future_to_node = {executor.submit(_extract_concepts_and_entities, split.page_content, llm): i 
                        for i, split in enumerate(splits)}
    
    for future in tqdm(as_completed(future_to_node), total=len(splits), desc="Extracting concepts and entities"):
        node = future_to_node[future]
        concepts = future.result()
        graph.nodes[node]['concepts'] = concepts