In [6]:
import os
import glob
from dotenv import load_dotenv

In [2]:
# imports for langchain

from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
# from langchain_chroma import Chroma
from langchain.vectorstores import FAISS
import numpy as np
from sklearn.manifold import TSNE
import plotly.graph_objects as go
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

In [3]:
# imports for langchain, plotly and Chroma

from langchain.prompts import ChatPromptTemplate
from langchain_community.llms.ollama import Ollama
from langchain_community.vectorstores import Chroma

In [4]:
#MODEL = Ollama(model="llama3.2")
MODEL = "gpt-4o-mini"
db_name = "vector-database"

In [7]:
folders = glob.glob("example/*")

In [8]:
text_loader_kwargs = {'encoding': 'utf-8'}

In [40]:
import jsonlines
with jsonlines.open("articles-Copy1.json","r") as documents:
    print(next(iter(documents)))

{'url': 'https://arxiv.org/pdf/2405.10825', 'title': 'Large Language Model (LLM) for Telecommunications: A Comprehensive Survey on Principles, Key Techniques, and Opportunities', 'time': '2023-10-17T14:00:32.000Z', 'tags': ['Large Language Model', 'Telecom', 'Article', '5G'], 'authors': ['Hao Zhou', 'Chengming Hu', 'Ye Yuan', 'Yufei Cui', 'Yili Jin']}


In [41]:
from langchain.document_loaders import PyPDFLoader
from langchain_core.documents import Document
folders = glob.glob("example/*")

In [58]:
langchain_documents = []

with jsonlines.open("articles-Copy1.json", "r") as metadata_file:
    for file_path, metadata in zip(folders, metadata_file):
        try:
            pdf_loader = PyPDFLoader(file_path)
            pdf_documents = pdf_loader.load()

            for pdf_doc in pdf_documents:
            # Metadata'yı dökümana ekle
                pdf_doc.metadata.update({
                    "url": metadata.get("url"),
                    "tags":", ".join(metadata.get("tags")),
                    "title": metadata.get("title"),
                    "authors":", ".join(metadata.get("authors"))
                }
                )

            # Dökümanı listeye ekle
                langchain_documents.append( Document(
                        page_content=pdf_doc.page_content,
                        metadata=pdf_doc.metadata
                    )
                )
        except Exception as e:
            print(f"Error loading file {file_path}: {e}")

            


In [59]:
len(langchain_documents), langchain_documents[:1]

(29,
 [Document(metadata={'source': 'example/5G Core Network_Study Paper_v8.pdf', 'page': 0, 'url': 'https://arxiv.org/pdf/2405.10825', 'tags': 'Large Language Model, Telecom, Article, 5G', 'title': 'Large Language Model (LLM) for Telecommunications: A Comprehensive Survey on Principles, Key Techniques, and Opportunities', 'authors': 'Hao Zhou, Chengming Hu, Ye Yuan, Yufei Cui, Yili Jin'}, page_content='MOBILE  DIVISION  \n \n \n \n \n \n \n \n \n \n \n \n \n \nSTUDY PAPER ON \n \n5G CORE NETWORK \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n©TEC \n \n \n \n \nTELECOMMUNICATION ENGINEERING CENTRE \nKHURSHID LAL BHAWAN, JANPATH \nNEW DELHI - 110001 \nINDIA \n \n ')])

In [60]:
from langchain.vectorstores import Qdrant
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings

In [61]:
'''from langchain_ollama import OllamaEmbeddings

# Load environment variables in a file called .env

load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
embeddings = OpenAIEmbeddings()
'''

"from langchain_ollama import OllamaEmbeddings\n\n# Load environment variables in a file called .env\n\nload_dotenv()\nos.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\nembeddings = OpenAIEmbeddings()\n"

In [62]:
#text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 200, chunk_overlap = 20, separators=[])
chunks = text_splitter.split_documents(langchain_documents)
len(chunks)

366

In [63]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="llama3.2")

if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)

print(f"Vectorstore created with {vectorstore._collection.count()} documents")
#vectorstore = FAISS.from_documents(chunks, embedding=embeddings)
# Convert loaded documents into strings by concatenating their content
# and ignoring metadata

"""def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

chain = {"docs": format_docs} | prompt | model | StrOutputParser()

question = "Summarize this document"

docs = vectorstore.similarity_search(question)

chain.invoke(docs)
"""



Vectorstore created with 366 documents


'def format_docs(docs):\n    return "\n\n".join(doc.page_content for doc in docs)\n\nchain = {"docs": format_docs} | prompt | model | StrOutputParser()\n\nquestion = "Summarize this document"\n\ndocs = vectorstore.similarity_search(question)\n\nchain.invoke(docs)\n'

In [64]:
# Get one vector and find how many dimensions it has

collection = vectorstore._collection
sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]
dimensions = len(sample_embedding)
print(f"The vectors have {dimensions:,} dimensions")
retriever = vectorstore.as_retriever()

The vectors have 3,072 dimensions


In [65]:
from langchain_ollama import ChatOllama

In [66]:
"""llm = ChatOpenAI(temperature=0.7, model_name=MODEL)
retriever = vectorstore.as_retriever
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever)
"""
from langchain_ollama import OllamaLLM
from langchain_core.output_parsers import JsonOutputParser
from langchain.prompts import PromptTemplate

#PROMPT_TEMPLATE = """
#Answer the question based only on the following context:

#{context}

#---

#Answer the question based on the above context: {question}
#"""
#query_text = "What is Time Sensitive Networking architecture"
#results = vectorstore.similarity_search_with_score(query_text,k = 9) # The most important part
retriever = vectorstore.as_retriever()
#context_text = "\n\n\n\n".join([doc.page_content for doc, _score in results])
llm = ChatOllama(model="llama3.2", format="json", temperature=0)
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing relevance 
    of a retrieved document to a user question. If the document contains keywords related to the user question, 
    grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a float number score  between 0 to 1 to indicate whether the document is relevant to the question. \n
    Provide the binary score as a JSON with a single key 'score' and no premable or explaination.
     <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the retrieved document: \n\n {document} \n\n
    Here is the user question: {question} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["question", "document"],
)
retrieval_grader = prompt | llm | JsonOutputParser()
question = "What is Network Function?"
docs = retriever.invoke(question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_txt}))
#prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
#prompt = prompt_template.format(context = context_text,question=query_text)



#response = model.invoke(prompt)
#docs = retriever.invoke(question)

"""
model = "ollama-3.2"
# create a new Chat 
llm = ChatOllama(temperature=0.7, model_name=MODEL)

# set up the conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# the retriever is an abstraction over the VectorStore that will be used during RAG
retriever = vectorstore.as_retriever()

# putting it together: set up the conversation chain with the GPT 4o-mini LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)
"""


{'score': 0.2}


'\nmodel = "ollama-3.2"\n# create a new Chat \nllm = ChatOllama(temperature=0.7, model_name=MODEL)\n\n# set up the conversation memory for the chat\nmemory = ConversationBufferMemory(memory_key=\'chat_history\', return_messages=True)\n\n# the retriever is an abstraction over the VectorStore that will be used during RAG\nretriever = vectorstore.as_retriever()\n\n# putting it together: set up the conversation chain with the GPT 4o-mini LLM, the vector store and memory\nconversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)\n'

In [29]:
response

'There is no mention of "Time Sensitive Networking" in the provided context. The context only discusses 4G, NF, VNF, Network Function, Service-Based Interfaces, Management & Maintenance, Load analytics, Non-Public Network (NPN), mobile edge computing, and spectrum usage in non-standalone mode, but it does not mention Time Sensitive Networking.'

In [46]:
"""store = Qdrant.from_documents(
    langchain_documents,
    embeddings,
    path="/tmp/ai_qdrant",
    collection_name="AI-Embeddings",
)
"""

In [25]:
vectorstore.similarity_search_with_score(
    query="AI and authors",
    k=1
)

[(Document(metadata={'authors': 'Hao Zhou, Chengming Hu, Ye Yuan, Yufei Cui, Yili Jin', 'page': 7, 'source': 'example/Rapor-RAGEvaluation.pdf', 'tags': 'Large Language Model, Telecom, Article, 5G', 'title': 'Large Language Model (LLM) for Telecommunications: A Comprehensive Survey on Principles, Key Techniques, and Opportunities', 'url': 'https://arxiv.org/pdf/2405.10825'}, page_content='Step 2: For each GT Statement, check if it’s can be attributed to the retrieved \ncontext.\nStatement 1: No\nStatement 2: Yes\nStep 3: Use the formula to compute the metric:\nContext recall : 1/2 = 0.5\nRAG EVALUATION TOOLS\nRAGAS\nRagas provides a set of evaluation metrics that can be used to measure the performance \nof your LLM application. These metrics are designed to help you objectively measure the \nperformance of your application. Metrics are available for different applications and tasks, \nsuch as RAG and Agentic workflows.\nEach metric are essentially paradigms that are designed to evaluate

In [90]:
vectorstore.similarity_search_with_score(
    query="authors",
    filter={"authors": "Yili Jin"},
    k=1
)  

[]

In [1]:
from langchain_community.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

llm = Ollama(
    model="llama3.2",
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
) 

  llm = Ollama(
  llm = Ollama(


In [50]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain.prompts import PromptTemplate

In [51]:
def retrieval_chain_with_filter(llm, filter={}):
    template = """You are a bot that answers user questions using only the context provided.
    If you don't know the answer, simply state that you don't know.
    {context}
    Question: {input}"""

    prompt = PromptTemplate(template=template, input_variables=["context", "input"])
    retriever = store.as_retriever(search_kwargs={'filter': filter})
    llm_with_prompt = create_stuff_documents_chain(llm, prompt)
    return create_retrieval_chain(retriever, llm_with_prompt)  

In [None]:
result = retrieval_chain_with_filter(llm).invoke({
    "input": ""
})

In [54]:
!pip uninstall fastembed

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Found existing installation: fastembed 0.4.2
Uninstalling fastembed-0.4.2:
  Would remove:
    /home/yasar/anaconda3/envs/llms/lib/python3.11/site-packages/fastembed-0.4.2.dist-info/*
    /home/yasar/anaconda3/envs/llms/lib/python3.11/site-packages/fastembed/*
Proceed (Y/n)? ^C
[31mERROR: Operation cancelled by user[0m[31m
[0m

In [None]:
%pip install transformers --quiet
%pip install intel-extension-for-transformers

# Intel Weight-Only Quantization (Vector Compressing)
## Weight-Only Quantization for Huggingface Models with Intel Extension for Transformers Pipelines


In [116]:
from intel_extension_for_transformers.transformers import WeightOnlyQuantConfig
from langchain_community.llms.weight_only_quantization import WeightOnlyQuantPipeline

conf = WeightOnlyQuantConfig(weight_dtype="nf4")
hf = WeightOnlyQuantPipeline.from_model_id(
    model_id="google/flan-t5-large",
    task="text2text-generation",
    quantization_config=conf,
    pipeline_kwargs={"max_new_tokens": 10},
)

ModuleNotFoundError: No module named 'neural_compressor.conf'

In [None]:
from intel_extension_for_transformers.transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer, pipeline


model_id = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
pipe = pipeline(
    "text2text-generation", model=model, tokenizer=tokenizer, max_new_tokens=10
)
hf = WeightOnlyQuantPipeline(pipeline=pipe)

# RAG CACHING

In [None]:
langchain-mongodb: Python package to use MongoDB as a vector store, semantic cache, chat history store etc. in LangChain

In [None]:
from langchain_core.prompts import PromptTemplate

template = """Question: {question}

Answer: Let's think step by step."""
prompt = PromptTemplate.from_template(template)

chain = prompt | hf

question = "What is electroencephalography?"

print(chain.invoke({"question": question}))

In [10]:
# Load environment variables in a file called .env

load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')

In [11]:
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_openai import ChatOpenAI

question = "What are the approaches to Task Decomposition?"
llm = ChatOpenAI(temperature=0)
retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=vectordb.as_retriever(), llm=llm
)

NameError: name 'vectordb' is not defined