In [4]:
import os
import gradio as gr
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama
from llama_parse import LlamaParse
import json

  from .autonotebook import tqdm as notebook_tqdm


Setup LLM with Ollama

In [5]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
LLAMACLOUD_API_KEY = os.getenv("LLAMACLOUD_API_KEY")

llm = Ollama(model="phi3.5:3.8b-mini-instruct-q8_0", api_key="OLLAMA_API_KEY")

Setup Embedding Model

In [6]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

Global Settings Config

In [7]:
from llama_index.core import Settings

Settings.llm = llm
Settings.embed_model = embed_model

Upload and Import ebooks function

In [1]:
import os
import tkinter as tk
from tkinter import filedialog
from ebooklib import epub
import fitz  # PyMuPDF

def upload_and_import_ebook(file_path=None, save_directory="data"):
    # Create a Tkinter root window
    root = tk.Tk()
    root.withdraw()  # Hide the root window

    # Open a file dialog to select an ebook or PDF file if no file path is provided
    if not file_path:
        file_path = filedialog.askopenfilename(
            title="Select an Ebook or PDF",
            filetypes=[("Ebook and PDF Files", "*.epub *.pdf")]
        )

    if file_path:
        content = ""
        file_extension = os.path.splitext(file_path)[1].lower()

        if file_extension == ".epub":
            # Load the ebook
            book = epub.read_epub(file_path)
            
            # Extract the content
            for item in book.get_items():
                if item.get_type() == ebooklib.ITEM_DOCUMENT:
                    content += item.get_body_content().decode('utf-8')
        
        elif file_extension == ".pdf":
            # Load the PDF
            pdf_document = fitz.open(file_path)
            
            # Extract the content
            for page_num in range(pdf_document.page_count):
                page = pdf_document.load_page(page_num)
                content += page.get_text()

        # Ensure the save directory exists
        os.makedirs(save_directory, exist_ok=True)

        # Save the content to a file in the save directory
        save_path = os.path.join(save_directory, os.path.basename(file_path) + ".txt")
        with open(save_path, "w", encoding="utf-8") as f:
            f.write(content)

        return save_path
    else:
        return None

# Example usage with file dialog
ebook_path = upload_and_import_ebook()
if ebook_path:
    print(f"Ebook content imported successfully to {ebook_path}.")
else:
    print("No ebook or PDF selected.")

# Example usage with file path
# ebook_path = upload_and_import_ebook("path/to/your/ebook_or_pdf.epub")
# if ebook_path:
#     print(f"Ebook content imported successfully to {ebook_path}.")
# else:
#     print("Invalid file path.")

2024-11-27 15:36:57.737 python[86799:21872050] +[IMKClient subclass]: chose IMKClient_Modern
2024-11-27 15:36:58.002 python[86799:21872050] The class 'NSOpenPanel' overrides the method identifier.  This method is implemented by class 'NSWindow'


Ebook content imported successfully to data/Linear-Algebra.pdf.txt.


Document Loader

In [8]:
docs = SimpleDirectoryReader(input_files=[ebook_path]).load_data()

Advanced RAG (Routing)

In [9]:
from llama_index.core import VectorStoreIndex

index = VectorStoreIndex.from_documents(docs)
query_engine = index.as_query_engine(similarity_top_k=5)

In [10]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata

vector_tool = QueryEngineTool(
    index.as_query_engine(),
    metadata=ToolMetadata(
        name="vector_search",
        description="Useful for searching for specific facts.",
    ),
)

summary_tool = QueryEngineTool(
    index.as_query_engine(response_mode="tree_summarize"),
    metadata=ToolMetadata(
        name="summary",
        description="Useful for summarizing an entire document.",
    ),
)

In [11]:
from llama_index.core.query_engine import RouterQueryEngine

query_engine = RouterQueryEngine.from_defaults(
    [vector_tool, summary_tool], select_multi=False, verbose=True, llm=llm
)

In [12]:
response = query_engine.query(
    "Tell me about the specific details about Elementary Matrixes"
)

[1;3;38;5;200mSelecting query engine 0: When looking to understand specific details about Elementary Matrixes, using a resource that is useful for searching for specific facts would be most relevant. This allows one to find precise information and data points regarding elementary matrix properties, operations, or examples..
[0m

In [13]:
print(response)

Elementary matrices play a crucial role in linear algebra when it comes to manipulating other types of matrices through row operations. These particular kinds of square matrices are derived from standard identity matrices by performing one elementary operation, such as swapping two rows or multiplying a row by a non-zero scalar and adding it to another row.

Elementary matrix transformations form the foundation for more complex processes like Gaussian elimination—an essential method used in solving systems of linear equations represented within rectangular arrays known as matrices. These transformation procedures not only aid problem solvers but are also pivotal when programming computers, which excel at handling numerical data stored or processed through such array structures.

In the context provided earlier, there is a reference to elementary matrix operations being part of finding an inverse for given linear equations and their associated systems. This ties into broader concepts li

Chat History to RAG

In [None]:
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.chat_engine import CondensePlusContextChatEngine

memory = ChatMemoryBuffer.from_defaults(token_limit=3900)

chat_engine = CondensePlusContextChatEngine.from_defaults(
    index.as_retriever(),
    memory=memory,
    llm=llm,
    context_prompt=(
        "You are a chatbot, able to have normal interactions, as well as talk"
        " about the Kendrick and Drake beef."
        "Here are the relevant documents for the context:\n"
        "{context_str}"
        "\nInstruction: Use the previous chat history, or the context above, to interact and help the user."
    ),
    verbose=True,
)