In [3]:
# استيراد المكتبات الأساسية
from langchain_community.document_loaders import TextLoader, WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.tools.retriever import create_retriever_tool
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
from langchain_community.chat_models import ChatOllama
from langchain.agents import create_openai_tools_agent, AgentExecutor
from langchain import hub

# 🔹 **1. تحميل البيانات**
# يمكنك استبدال "your_file.txt" باسم الملف الذي تريد تلخيصه

from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("attention.pdf")

docs = loader.load()
# 🔹 **2. تقسيم النص إلى أجزاء مناسبة**
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_docs = text_splitter.split_documents(docs)

# 🔹 **3. تحويل النصوص إلى تمثيلات رقمية باستخدام Embeddings**
embedding_model = OllamaEmbeddings(model="llama3.2:3b")

# 🔹 **4. تخزين البيانات في قاعدة بيانات متجهية (FAISS)**
vector_db = FAISS.from_documents(split_docs, embedding_model)

# 🔹 **5. إنشاء RAG Retriever**
retriever = vector_db.as_retriever()
retriever_tool = create_retriever_tool(
    retriever,
    "document_search",
    "Search for relevant information from the uploaded document."
)

# 🔹 **6. إعداد مصادر أخرى مثل Wikipedia و Arxiv**
wiki_api = WikipediaAPIWrapper(top_k_results=2, doc_content_chars_max=300)
wiki_tool = WikipediaQueryRun(api_wrapper=wiki_api)

arxiv_api = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
arxiv_tool = ArxivQueryRun(api_wrapper=arxiv_api)

# 🔹 **7. تجهيز Agent لاستخدام RAG والمصادر الخارجية**
tools = [retriever_tool, wiki_tool, arxiv_tool]

# استخدام LLM من Ollama
llm = ChatOllama(model="llama3.2:3b")

# تحميل الـ Prompt Template من LangChain
prompt = hub.pull("hwchase17/openai-functions-agent")

# إنشاء الـ Agent باستخدام RAG والمصادر الخارجية
agent = create_openai_tools_agent(llm, tools, prompt)

# 🔹 **8. تنفيذ الاستعلام باستخدام Agent و RAG**
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

response = agent_executor.invoke({"input": "Summarize the uploaded document and find relevant Wikipedia and Arxiv references."})

print("📌 **الإجابة النهائية:**")
print(response)


  embedding_model = OllamaEmbeddings(model="llama3.2:3b")
  llm = ChatOllama(model="llama3.2:3b")




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI don't see an uploaded document. This conversation has just started, and I'm ready to assist you with any questions or tasks you'd like to discuss. Could you please provide the document or information you'd like me to summarize?

If you'd like, we can also explore specific topics or areas of research that might be relevant to your needs. Just let me know what's on your mind![0m

[1m> Finished chain.[0m
📌 **الإجابة النهائية:**
{'input': 'Summarize the uploaded document and find relevant Wikipedia and Arxiv references.', 'output': "I don't see an uploaded document. This conversation has just started, and I'm ready to assist you with any questions or tasks you'd like to discuss. Could you please provide the document or information you'd like me to summarize?\n\nIf you'd like, we can also explore specific topics or areas of research that might be relevant to your needs. Just let me know what's on your mind!"}


In [8]:
# 🔹 تحديث استيراد مكتبة Ollama
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.tools.retriever import create_retriever_tool
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
from langchain.agents import create_openai_tools_agent, AgentExecutor
from langchain import hub

# ✅ **1. تحميل الملف النصي**
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("attention.pdf")
docs = loader.load()

# ✅ **2. تقسيم النص إلى أجزاء**
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_docs = text_splitter.split_documents(docs)
print(f"🔹 عدد المستندات المقسمة: {len(split_docs)}")  # التحقق من وجود بيانات

# ✅ **3. تحويل النصوص إلى Embeddings وتخزينها**
embedding_model = OllamaEmbeddings(model="llama3.2:3b")
vector_db = FAISS.from_documents(split_docs, embedding_model)

# ✅ **4. إنشاء RAG Retriever**
retriever = vector_db.as_retriever()
retriever_tool = create_retriever_tool(
    retriever,
    "document_search",
    "Search for relevant information from the uploaded document."
)

# ✅ **5. إضافة Wikipedia و Arxiv كمصادر إضافية**
wiki_api = WikipediaAPIWrapper(top_k_results=2, doc_content_chars_max=300)
wiki_tool = WikipediaQueryRun(api_wrapper=wiki_api)

arxiv_api = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
arxiv_tool = ArxivQueryRun(api_wrapper=arxiv_api)

# ✅ **6. تجهيز Agent**
tools = [retriever_tool, wiki_tool, arxiv_tool]
llm = ChatOllama(model="llama3.2:3b")
prompt = hub.pull("hwchase17/openai-functions-agent")
agent = create_openai_tools_agent(llm, tools, prompt)

# ✅ **7. اختبار الاسترجاع من قاعدة البيانات قبل تشغيل Agent**
retrieval_test = retriever.invoke("summarize")
print("🔹 اختبار استرجاع البيانات من RAG:", retrieval_test)

# ✅ **8. تنفيذ الاستعلام باستخدام Agent**
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

response = agent_executor.invoke({"input": "Summarize the uploaded document and find relevant Wikipedia and Arxiv references."})

print("📌 **الإجابة النهائية:**")
print(response)


🔹 عدد المستندات المقسمة: 52




🔹 اختبار استرجاع البيانات من RAG: [Document(metadata={'source': 'attention.pdf', 'page': 10}, page_content='2017.\n[19] Yoon Kim, Carl Denton, Luong Hoang, and Alexander M. Rush. Structured attention networks.\nIn International Conference on Learning Representations, 2017.\n[20] Diederik Kingma and Jimmy Ba. Adam: A method for stochastic optimization. In ICLR, 2015.\n[21] Oleksii Kuchaiev and Boris Ginsburg. Factorization tricks for LSTM networks. arXiv preprint\narXiv:1703.10722, 2017.\n[22] Zhouhan Lin, Minwei Feng, Cicero Nogueira dos Santos, Mo Yu, Bing Xiang, Bowen\nZhou, and Yoshua Bengio. A structured self-attentive sentence embedding. arXiv preprint\narXiv:1703.03130, 2017.\n[23] Minh-Thang Luong, Quoc V . Le, Ilya Sutskever, Oriol Vinyals, and Lukasz Kaiser. Multi-task\nsequence to sequence learning. arXiv preprint arXiv:1511.06114, 2015.\n[24] Minh-Thang Luong, Hieu Pham, and Christopher D Manning. Effective approaches to attention-\nbased neural machine translation. arXiv pr

In [30]:
import pandas as pd
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.tools.retriever import create_retriever_tool
from langchain.agents import create_openai_tools_agent, AgentExecutor
from langchain import hub
from Main import analysis_data
# ✅ **1. تحميل ملف CSV وتحويله إلى نصوص**
file_path = "Regions.csv"  # ضع اسم ملفك هنا
df = pd.read_csv(file_path)  # قراءة ملف CSV

# ✅ **2. تحويل كل صف إلى نص**
documents = []
for index, row in df.iterrows():
    text = " | ".join([f"{col}: {str(row[col])}" for col in df.columns])  # تحويل كل صف إلى نص
    documents.append(text)

# ✅ **3. تقسيم البيانات إلى أجزاء صغيرة**
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_docs = text_splitter.split_text("\n".join(documents))

# ✅ **4. تحويل البيانات إلى Embeddings وتخزينها**
embedding_model = OllamaEmbeddings(model="llama3.2:3b")
vector_db = FAISS.from_texts(split_docs, embedding_model)

# ✅ **5. إنشاء RAG Retriever**
retriever = vector_db.as_retriever()
retriever_tool = create_retriever_tool(
    retriever,
    "data_analysis",
    "Analyze CSV data and provide insights."
)

# ✅ **6. تجهيز Agent**
tools = [retriever_tool]
llm = ChatOllama(model="llama3.2:3b")
prompt = hub.pull("hwchase17/openai-functions-agent")
agent = create_openai_tools_agent(llm, tools, prompt)

# ✅ **7. تنفيذ الاستعلام باستخدام Agent**
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

response = agent_executor.invoke({"input": "Analyze the dataset "})

response_text = response["output"]  # استخراج النص الصحيح من القاموس
print("\n📌 **الإجابة النهائية:**\n")
for line in response_text.split("\n"):
    print(line.strip()) 






[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `data_analysis` with `{'query': ''}`


[0m[36;1m[1;3mregion_id: 70 | sales_district: Salem | sales_region: North West
region_id: 71 | sales_district: Salem | sales_region: North West
region_id: 72 | sales_district: Salem | sales_region: North West
region_id: 73 | sales_district: Seattle | sales_region: North West
region_id: 74 | sales_district: Seattle | sales_region: North West
region_id: 75 | sales_district: Seattle | sales_region: North West
region_id: 76 | sales_district: Bremerton | sales_region: North West
region_id: 77 | sales_district: Bremerton | sales_region: North West
region_id: 78 | sales_district: Bellingham | sales_region: North West
region_id: 79 | sales_district: Bellingham | sales_region: North West
region_id: 80 | sales_district: Bellingham | sales_region: North West
region_id: 81 | sales_district: Seattle | sales_region: North West
region_id: 82 | sales_district: Seattle | sales_region: North

In [None]:
from langchain_community.tools from langchain_community.tools import WikipediaQueryRun  ## execute a query and retrieve the results
from langchain_community.utilities import WikipediaAPIWrapper   ## set up the connection or configuration for interacting with Wikipedia.import WikipediaQueryRun  ## execute a query and retrieve the results
from langchain_community.utilities import WikipediaAPIWrapper   ## set up the connection or configuration for interacting with Wikipedia.

In [None]:
api_wrapper=WikipediaAPIWrapper(top_k_results=3, doc_content_chars_max=500)  #يجيب اول تلت نتائج  وميزيدش عن 500 حرف
wiki=WikipediaQueryRun(api_wrapper=api_wrapper)

In [None]:
wiki.name

'wikipedia'

In [None]:
#load
#chunk 
#embedding
#vector db
#retriever 
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Load documents from the web
load_web = WebBaseLoader("https://docs.smith.langchain.com/")
docs_web = load_web.load()

# Initialize the text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

# Split the documents into chunks
split_web = text_splitter.split_documents(docs_web)

# Initialize the embedding model
embedding_model = OllamaEmbeddings(model="llama3.2:3b")

# Initialize the vector database
vector_db = FAISS.from_documents(split_web, embedding_model)


# Initialize the retriever
retriever_web = vector_db.as_retriever()

retriever_web

USER_AGENT environment variable not set, consider setting it to identify your requests.
  embedding_model = OllamaEmbeddings(model="llama3.2:3b")


VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000028BD175D010>, search_kwargs={})

In [None]:
# Create a retriever tool for the vector database
from langchain.tools.retriever import create_retriever_tool         ## used to search for information in a vector database or other data source

# Create the retriever tool with the vector database and a description
retriever_web_tool = create_retriever_tool(retriever_web,
                    "langsmith_search",  ## used when integrating the tool into an agent or pipeline 
                    "Search for information about LangSmith.For any questions about LangSmith, you must use this tool!")

retriever_web_tool.name

'langsmith_search'

In [None]:
# Arxiv Tool and Utilities for Creating and Reading Applications
from langchain_community.utilities import ArxivAPIWrapper
from langchain_community.tools import ArxivQueryRun   ##  query the Arxiv database and retrieve relevant information

# Set up the connection to Arxiv API
arxiv_api = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)

# Create an ArxivQueryRun tool
arxiv_query_run = ArxivQueryRun(arxiv_api=arxiv_api)

arxiv_query_run.name

'arxiv'

In [None]:
# Define the tools list
tools = [wiki, retriever_web_tool, arxiv_query_run]
tools

[WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from 'c:\\Users\\oa196\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\site-packages\\wikipedia\\__init__.py'>, top_k_results=3, lang='en', load_all_available_meta=False, doc_content_chars_max=500)),
 Tool(name='langsmith_search', description='Search for information about LangSmith.For any questions about LangSmith, you must use this tool!', args_schema=<class 'langchain_core.tools.retriever.RetrieverInput'>, func=functools.partial(<function _get_relevant_documents at 0x0000028BBA2BB420>, retriever=VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000028BD175D010>, search_kwargs={}), document_prompt=PromptTemplate(input_variables=['page_content'], input_types={}, partial_variables={}, template='{page_content}'), document_separator='\n\n'), coroutine=functools.partial(<function _aget_relevant_documents at 0x0000028BBA5A6FC0>,

In [None]:
# Initialize the Ollama LLM
from langchain_community.llms import Ollama

llm = Ollama(model="llama3.2:3b")

llm

  llm = Ollama(model="llama3.2:3b")


Ollama(model='llama3.2:3b')

In [None]:
from langchain import hub

# Get the prompt to use - you can modify this!
prompt = hub.pull("hwchase17/openai-functions-agent")

prompt.messages



[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a helpful assistant'), additional_kwargs={}),
 MessagesPlaceholder(variable_name='chat_history', optional=True),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={}),
 MessagesPlaceholder(variable_name='agent_scratchpad')]

In [None]:
## agent
from langchain.agents import create_openai_tools_agent
agent=create_openai_tools_agent(llm,tools,prompt)

In [None]:
## Agent Executer
from langchain.agents import AgentExecutor
agent_executor=AgentExecutor(agent=agent,tools=tools,verbose=True)
agent_executor

AgentExecutor(verbose=True, agent=RunnableMultiActionAgent(runnable=RunnableAssign(mapper={
  agent_scratchpad: RunnableLambda(lambda x: format_to_openai_tool_messages(x['intermediate_steps']))
})
| ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], optional_variables=['chat_history'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag

In [None]:
response = agent_executor.invoke({"input": "Tell me about LangSmith"})
response



[1m> Entering new AgentExecutor chain...[0m


ValueError: This output parser only works on ChatGeneration output

In [None]:
# used ollama
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings  # Use local embeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.utilities import ArxivAPIWrapper
from langchain_community.tools import ArxivQueryRun
from langchain.agents import create_openai_tools_agent
from langchain.agents import AgentExecutor
from langchain.tools.retriever import create_retriever_tool
from langchain import hub
from langchain_community.chat_models import ChatOllama  # Use Ollama instead of OpenAI

# Set up Wikipedia search
api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=200)
wiki = WikipediaQueryRun(api_wrapper=api_wrapper)

# Load and process LangSmith documentation
loader = WebBaseLoader("https://docs.smith.langchain.com/")
docs = loader.load()

# Initialize the text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

# Split the documents into chunks
split_web = text_splitter.split_documents(docs_web)

# Initialize the embedding model
embedding_model = OllamaEmbeddings(model="llama3.2:3b")

# Initialize the vector database
vector_db = FAISS.from_documents(split_web, embedding_model)

# Initialize the retriever
retriever_web = vector_db.as_retriever()

retriever_tool = create_retriever_tool(
    retriever_web,
    "langsmith_search",
    "Search for information about LangSmith. For any questions about LangSmith, you must use this tool!"
)

# Set up Arxiv search
arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
arxiv = ArxivQueryRun(api_wrapper=arxiv_wrapper)

# Define tools
tools = [wiki, arxiv, retriever_tool]

# Replace OpenAI with Ollama
llm = ChatOllama(model="llama3.2:3b")  # Change to your desired Ollama model

# Get the prompt template
prompt = hub.pull("hwchase17/openai-functions-agent")

# Create the agent
agent = create_openai_tools_agent(llm, tools, prompt)
agent_executor=AgentExecutor(agent=agent,tools=tools,verbose=True)
response = agent_executor.invoke({"input": "Tell me about Langsmith"})
print(response)





[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mLangsmith is an American poet, songwriter, and satirist. He was born in 1961 in Philadelphia, Pennsylvania.

Langsmith is best known for his humorous and insightful poetry, which often focuses on politics, social issues, and pop culture. His work has been widely published and has appeared in various media outlets, including The New Yorker, The Nation, and NPR.

Some of Langsmith's notable works include:

1. "The Poet Laureate of America's Grown-Up Children": This is a satirical poem that pokes fun at the role of poetry in American culture.
2. "America's Favorite Pastime: Football": This poem explores the cultural significance of football in America and the ways in which it shapes our society.
3. "The Great Unraveling": This poem examines the complexities of politics and social change, using humor to highlight the absurdities of modern life.

Langsmith has also written several books of poetry, including " Langsmith's America: 

In [3]:
import fitz  # PyMuPDF for PDF processing
import pandas as pd
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatOllama
from langchain.agents import create_openai_tools_agent, AgentExecutor
from langchain.tools.retriever import create_retriever_tool

# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = "\n".join([page.get_text("text") for page in doc])
    return text

# Load and process PDF
pdf_path = "storying.pdf"  # Change to your PDF file
pdf_text = extract_text_from_pdf(pdf_path)

# Split PDF text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
pdf_chunks = text_splitter.create_documents([pdf_text])

# Initialize the embedding model
embedding_model = HuggingFaceEmbeddings()

# Store PDF knowledge in a vector database
vector_db = FAISS.from_documents(pdf_chunks, embedding_model)
retriever = vector_db.as_retriever()
retriever_tool = create_retriever_tool(retriever, "pdf_guidelines", "Retrieve insights from the PDF guidelines.")

# Load CSV
def load_csv(csv_path):
    return pd.read_csv(csv_path)

csv_path = "Regions.csv"  # Change to your CSV file
df = load_csv(csv_path)

# Set up AI model
llm = ChatOllama(model="llama3.2:3b")

# Define agent
tools = [retriever_tool]
prompt = "Analyze the CSV file based on PDF guidelines."
agent = create_openai_tools_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

# Run analysis
response = agent_executor.invoke({"input": "Analyze the provided CSV file using the PDF instructions."})
print(response)


  embedding_model = HuggingFaceEmbeddings()
  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
  llm = ChatOllama(model="llama3.2:3b")


AttributeError: 'str' object has no attribute 'input_variables'