In [3]:
# استيراد المكتبات الأساسية
from langchain_community.document_loaders import TextLoader, WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.tools.retriever import create_retriever_tool
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
from langchain_community.chat_models import ChatOllama
from langchain.agents import create_openai_tools_agent, AgentExecutor
from langchain import hub

# 🔹 **1. تحميل البيانات**
# يمكنك استبدال "your_file.txt" باسم الملف الذي تريد تلخيصه

from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("attention.pdf")

docs = loader.load()
# 🔹 **2. تقسيم النص إلى أجزاء مناسبة**
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_docs = text_splitter.split_documents(docs)

# 🔹 **3. تحويل النصوص إلى تمثيلات رقمية باستخدام Embeddings**
embedding_model = OllamaEmbeddings(model="llama3.2:3b")

# 🔹 **4. تخزين البيانات في قاعدة بيانات متجهية (FAISS)**
vector_db = FAISS.from_documents(split_docs, embedding_model)

# 🔹 **5. إنشاء RAG Retriever**
retriever = vector_db.as_retriever()
retriever_tool = create_retriever_tool(
    retriever,
    "document_search",
    "Search for relevant information from the uploaded document."
)

# 🔹 **6. إعداد مصادر أخرى مثل Wikipedia و Arxiv**
wiki_api = WikipediaAPIWrapper(top_k_results=2, doc_content_chars_max=300)
wiki_tool = WikipediaQueryRun(api_wrapper=wiki_api)

arxiv_api = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
arxiv_tool = ArxivQueryRun(api_wrapper=arxiv_api)

# 🔹 **7. تجهيز Agent لاستخدام RAG والمصادر الخارجية**
tools = [retriever_tool, wiki_tool, arxiv_tool]

# استخدام LLM من Ollama
llm = ChatOllama(model="llama3.2:3b")

# تحميل الـ Prompt Template من LangChain
prompt = hub.pull("hwchase17/openai-functions-agent")

# إنشاء الـ Agent باستخدام RAG والمصادر الخارجية
agent = create_openai_tools_agent(llm, tools, prompt)

# 🔹 **8. تنفيذ الاستعلام باستخدام Agent و RAG**
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

response = agent_executor.invoke({"input": "Summarize the uploaded document and find relevant Wikipedia and Arxiv references."})

print("📌 **الإجابة النهائية:**")
print(response)


  embedding_model = OllamaEmbeddings(model="llama3.2:3b")
  llm = ChatOllama(model="llama3.2:3b")




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI don't see an uploaded document. This conversation has just started, and I'm ready to assist you with any questions or tasks you'd like to discuss. Could you please provide the document or information you'd like me to summarize?

If you'd like, we can also explore specific topics or areas of research that might be relevant to your needs. Just let me know what's on your mind![0m

[1m> Finished chain.[0m
📌 **الإجابة النهائية:**
{'input': 'Summarize the uploaded document and find relevant Wikipedia and Arxiv references.', 'output': "I don't see an uploaded document. This conversation has just started, and I'm ready to assist you with any questions or tasks you'd like to discuss. Could you please provide the document or information you'd like me to summarize?\n\nIf you'd like, we can also explore specific topics or areas of research that might be relevant to your needs. Just let me know what's on your mind!"}


In [8]:
# 🔹 تحديث استيراد مكتبة Ollama
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.tools.retriever import create_retriever_tool
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
from langchain.agents import create_openai_tools_agent, AgentExecutor
from langchain import hub

# ✅ **1. تحميل الملف النصي**
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("attention.pdf")
docs = loader.load()

# ✅ **2. تقسيم النص إلى أجزاء**
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_docs = text_splitter.split_documents(docs)
print(f"🔹 عدد المستندات المقسمة: {len(split_docs)}")  # التحقق من وجود بيانات

# ✅ **3. تحويل النصوص إلى Embeddings وتخزينها**
embedding_model = OllamaEmbeddings(model="llama3.2:3b")
vector_db = FAISS.from_documents(split_docs, embedding_model)

# ✅ **4. إنشاء RAG Retriever**
retriever = vector_db.as_retriever()
retriever_tool = create_retriever_tool(
    retriever,
    "document_search",
    "Search for relevant information from the uploaded document."
)

# ✅ **5. إضافة Wikipedia و Arxiv كمصادر إضافية**
wiki_api = WikipediaAPIWrapper(top_k_results=2, doc_content_chars_max=300)
wiki_tool = WikipediaQueryRun(api_wrapper=wiki_api)

arxiv_api = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
arxiv_tool = ArxivQueryRun(api_wrapper=arxiv_api)

# ✅ **6. تجهيز Agent**
tools = [retriever_tool, wiki_tool, arxiv_tool]
llm = ChatOllama(model="llama3.2:3b")
prompt = hub.pull("hwchase17/openai-functions-agent")
agent = create_openai_tools_agent(llm, tools, prompt)

# ✅ **7. اختبار الاسترجاع من قاعدة البيانات قبل تشغيل Agent**
retrieval_test = retriever.invoke("summarize")
print("🔹 اختبار استرجاع البيانات من RAG:", retrieval_test)

# ✅ **8. تنفيذ الاستعلام باستخدام Agent**
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

response = agent_executor.invoke({"input": "Summarize the uploaded document and find relevant Wikipedia and Arxiv references."})

print("📌 **الإجابة النهائية:**")
print(response)


🔹 عدد المستندات المقسمة: 52




🔹 اختبار استرجاع البيانات من RAG: [Document(metadata={'source': 'attention.pdf', 'page': 10}, page_content='2017.\n[19] Yoon Kim, Carl Denton, Luong Hoang, and Alexander M. Rush. Structured attention networks.\nIn International Conference on Learning Representations, 2017.\n[20] Diederik Kingma and Jimmy Ba. Adam: A method for stochastic optimization. In ICLR, 2015.\n[21] Oleksii Kuchaiev and Boris Ginsburg. Factorization tricks for LSTM networks. arXiv preprint\narXiv:1703.10722, 2017.\n[22] Zhouhan Lin, Minwei Feng, Cicero Nogueira dos Santos, Mo Yu, Bing Xiang, Bowen\nZhou, and Yoshua Bengio. A structured self-attentive sentence embedding. arXiv preprint\narXiv:1703.03130, 2017.\n[23] Minh-Thang Luong, Quoc V . Le, Ilya Sutskever, Oriol Vinyals, and Lukasz Kaiser. Multi-task\nsequence to sequence learning. arXiv preprint arXiv:1511.06114, 2015.\n[24] Minh-Thang Luong, Hieu Pham, and Christopher D Manning. Effective approaches to attention-\nbased neural machine translation. arXiv pr

In [30]:
import pandas as pd
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.tools.retriever import create_retriever_tool
from langchain.agents import create_openai_tools_agent, AgentExecutor
from langchain import hub
from Main import analysis_data
# ✅ **1. تحميل ملف CSV وتحويله إلى نصوص**
file_path = "Regions.csv"  # ضع اسم ملفك هنا
df = pd.read_csv(file_path)  # قراءة ملف CSV

# ✅ **2. تحويل كل صف إلى نص**
documents = []
for index, row in df.iterrows():
    text = " | ".join([f"{col}: {str(row[col])}" for col in df.columns])  # تحويل كل صف إلى نص
    documents.append(text)

# ✅ **3. تقسيم البيانات إلى أجزاء صغيرة**
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_docs = text_splitter.split_text("\n".join(documents))

# ✅ **4. تحويل البيانات إلى Embeddings وتخزينها**
embedding_model = OllamaEmbeddings(model="llama3.2:3b")
vector_db = FAISS.from_texts(split_docs, embedding_model)

# ✅ **5. إنشاء RAG Retriever**
retriever = vector_db.as_retriever()
retriever_tool = create_retriever_tool(
    retriever,
    "data_analysis",
    "Analyze CSV data and provide insights."
)

# ✅ **6. تجهيز Agent**
tools = [retriever_tool]
llm = ChatOllama(model="llama3.2:3b")
prompt = hub.pull("hwchase17/openai-functions-agent")
agent = create_openai_tools_agent(llm, tools, prompt)

# ✅ **7. تنفيذ الاستعلام باستخدام Agent**
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

response = agent_executor.invoke({"input": "Analyze the dataset "})

response_text = response["output"]  # استخراج النص الصحيح من القاموس
print("\n📌 **الإجابة النهائية:**\n")
for line in response_text.split("\n"):
    print(line.strip()) 






[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `data_analysis` with `{'query': ''}`


[0m[36;1m[1;3mregion_id: 70 | sales_district: Salem | sales_region: North West
region_id: 71 | sales_district: Salem | sales_region: North West
region_id: 72 | sales_district: Salem | sales_region: North West
region_id: 73 | sales_district: Seattle | sales_region: North West
region_id: 74 | sales_district: Seattle | sales_region: North West
region_id: 75 | sales_district: Seattle | sales_region: North West
region_id: 76 | sales_district: Bremerton | sales_region: North West
region_id: 77 | sales_district: Bremerton | sales_region: North West
region_id: 78 | sales_district: Bellingham | sales_region: North West
region_id: 79 | sales_district: Bellingham | sales_region: North West
region_id: 80 | sales_district: Bellingham | sales_region: North West
region_id: 81 | sales_district: Seattle | sales_region: North West
region_id: 82 | sales_district: Seattle | sales_region: North