In [1]:
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.llms import Ollama
import pandas as pd
import fitz  # PyMuPDF for reading PDF files
import os  # For file path checking
from DataAnalyzer import DataAnalyzer  # استيراد DataAnalyzer

# Define FAISS database path
FAISS_DB_PATH = "faiss_index"

# 1️⃣ Load rules from PDF memory
def load_analysis_rules_from_memory(pdf_content):
    doc = fitz.open(stream=pdf_content, filetype="pdf")
    documents = [Document(page_content=page.get_text()) for page in doc]
    return documents

# 2️⃣ Train RAG system with FAISS
def train_rag_system(documents):
    """Train or load the RAG model with FAISS to avoid recomputation."""
    embedding_model = OllamaEmbeddings(model="llama2")
    
    if os.path.exists(FAISS_DB_PATH):
        print("\n🔄 Loading existing FAISS index...")
        vector_db = FAISS.load_local(
            FAISS_DB_PATH, 
            embedding_model, 
            allow_dangerous_deserialization=True
        )
    else:
        print("\n🛠️ Generating new embeddings and saving FAISS index...")
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=300)
        texts = text_splitter.split_documents(documents)
        vector_db = FAISS.from_documents(texts, embedding_model)
        vector_db.save_local(FAISS_DB_PATH)
    
    retriever = vector_db.as_retriever(search_type="mmr", search_kwargs={"k": 5})
    llm = Ollama(model="llama2")
    
    retrievalQA = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=True
    )
    
    return retrievalQA, llm

# 3️⃣ Load CSV file
def load_csv(file_path):
    return pd.read_csv(file_path)

# 🚀 Main execution
if __name__ == "__main__":
    # Load rules from PDF file
    pdf_file_path = "storying.pdf"
    if not os.path.exists(pdf_file_path):
        raise FileNotFoundError(f"🚨 Error: The file '{pdf_file_path}' was not found!")
    
    with open(pdf_file_path, "rb") as file:
        pdf_content = file.read()
    
    documents = load_analysis_rules_from_memory(pdf_content)
    
    # Train RAG model
    retrievalQA, llm = train_rag_system(documents)
    
    # Load CSV data
    csv_file_path = "Regions.csv"
    if not os.path.exists(csv_file_path):
        raise FileNotFoundError(f"🚨 Error: The file '{csv_file_path}' was not found!")

    df = load_csv(csv_file_path)
    
    # Create DataAnalyzer instance
    analyzer = DataAnalyzer(df, llm=llm)
    
    # Perform data analysis and generate query
    analysis_result, questions = analyzer.analysis_data()
    print("\n📊 Analysis Result:")
    print(analysis_result)
    
    # Send each question to RAG Agent
    for question in questions:
        print(f"\n❓ Question: {question}")
        result = retrievalQA.invoke({"query": question})
        print("🔍 Answer:")
        print(result['result'])
        print("\n📚 Source Documents:")
        for doc in result['source_documents']:
            print(doc.page_content)

  embedding_model = OllamaEmbeddings(model="llama2")
  llm = Ollama(model="llama2")
  analysis_chain = LLMChain(



🔄 Loading existing FAISS index...


ValidationError: 1 validation error for LLMChain
config
  Extra inputs are not permitted [type=extra_forbidden, input_value={'temperature': 0.3, 'max_tokens': 500}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.9/v/extra_forbidden

In [6]:
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings.ollama import OllamaEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOllama
import pandas as pd 
from DataAnalyzer import DataAnalyzer

# تحميل المستند وتقسيمه
pdf_loader = PyPDFLoader("storying.pdf")
documents = pdf_loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
docs = text_splitter.split_documents(documents)

FAISS_DB_PATH = "faiss_index"

embedding_model = OllamaEmbeddings(model="llama2")
# إنشاء قاعدة بيانات FAISS
vector_db = FAISS.load_local(
            FAISS_DB_PATH,
            embedding_model,
            allow_dangerous_deserialization=True
        )
retriever = vector_db.as_retriever()

# إنشاء نموذج المحادثة
llm = ChatOllama(model="mistral")

# تحويل RAG إلى Agent باستخدام ConversationalRetrievalChain
rag_agent = ConversationalRetrievalChain.from_llm(llm, retriever=retriever)

df = pd.read_csv("Regions.csv")

# تحميل وتحليل البيانات
analyzer = DataAnalyzer(llm=rag_agent, dataframe=df)
data_insights = analyzer.analysis_data()

# استعلام تفاعلي لوضع تحليل البيانات في سياق المعلومات المسترجعة
query_with_context = f"Based on the following data insights: {data_insights}, apply the rules from the document to generate insights."
result = rag_agent.invoke({"question": query_with_context, "chat_history": []})

# إخراج النتيجة
print("\n🔍 Final Analysis Result:")
print(f"📊 Data Insights: \n{data_insights}\n")
print(f"📖 Rule-based Insights: \n{result['answer']}")

ValidationError: 1 validation error for LLMChain
config
  Extra inputs are not permitted [type=extra_forbidden, input_value={'temperature': 0.3, 'max_tokens': 500}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.9/v/extra_forbidden

In [None]:
# Use ConversationalRetrievalChain
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain_community.llms import Ollama
import pandas as pd
import fitz  # PyMuPDF for reading PDF files
import os  # For file path checking
from DataAnalyzer import DataAnalyzer  # Importing DataAnalyzer

# 🔹 Define FAISS database path
FAISS_DB_PATH = "faiss_index"

# 1️⃣ Load analysis rules from PDF memory
def load_analysis_rules_from_memory(pdf_content):
    doc = fitz.open(stream=pdf_content, filetype="pdf")
    documents = [Document(page_content=page.get_text()) for page in doc]
    return documents

# 2️⃣ Train RAG system and set up FAISS
def train_rag_system(documents):
    """Train or load the RAG model with FAISS to avoid recomputation."""
    embedding_model = OllamaEmbeddings(model="llama2")

    if os.path.exists(FAISS_DB_PATH):
        print("\n🔄 Loading existing FAISS index...")
        vector_db = FAISS.load_local(
            FAISS_DB_PATH,
            embedding_model,
            allow_dangerous_deserialization=True
        )
    else:
        print("\n🛠️ Generating new embeddings and saving FAISS index...")
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=300)
        texts = text_splitter.split_documents(documents)
        vector_db = FAISS.from_documents(texts, embedding_model)
        vector_db.save_local(FAISS_DB_PATH)

    retriever = vector_db.as_retriever(search_type="mmr", search_kwargs={"k": 5})
    llm = Ollama(model="llama2")

    # 🔹 Create an interactive Agent using ConversationalRetrievalChain
    rag_agent = ConversationalRetrievalChain.from_llm(llm, retriever=retriever)

    return rag_agent, llm

# 3️⃣ Load CSV file
def load_csv(file_path):
    return pd.read_csv(file_path)

# 🚀 Main execution
if __name__ == "__main__":
    # 🔹 Load analysis rules from PDF file
    pdf_file_path = "storying.pdf"
    if not os.path.exists(pdf_file_path):
        raise FileNotFoundError(f"🚨 Error: The file '{pdf_file_path}' was not found!")

    with open(pdf_file_path, "rb") as file:
        pdf_content = file.read()

    documents = load_analysis_rules_from_memory(pdf_content)

    # 🔹 Train RAG model and create an Agent
    rag_agent, llm = train_rag_system(documents)

    # 🔹 Load CSV data
    csv_file_path = "Regions.csv"
    if not os.path.exists(csv_file_path):
        raise FileNotFoundError(f"🚨 Error: The file '{csv_file_path}' was not found!")

    df = load_csv(csv_file_path)

    # 🔹 Create a DataAnalyzer instance and analyze data
    analyzer = DataAnalyzer(df, llm=llm)
    analysis_result, questions = analyzer.analysis_data()

    print("\n📊 Analysis Result:")
    print(analysis_result)

    # 🔹 Pass extracted questions to the Agent for answers
    for question in questions:
        print(f"\n❓ Question: {question}")
        result = rag_agent.invoke({"question": question})
        print("🔍 Answer:")
        print(result['answer'])
