In [None]:
%%capture
!pip install llama-index==0.10.37 cohere==5.5.0 openai==1.30.1 llama-index-embeddings-openai==0.1.9 llama-index-llms-cohere==0.2.0 qdrant-client==1.9.1 llama-index-vector-stores-qdrant==0.2.8

In [None]:
import os

from getpass import getpass
import nest_asyncio

from dotenv import load_dotenv

nest_asyncio.apply()

load_dotenv()

In [35]:
import os
from getpass import getpass

CO_API_KEY = os.environ.get('CO_API_KEY') or getpass("Enter CO_API_KEY: ")
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY') or getpass("Enter OPENAI_API_KEY: ")
QDRANT_URL = os.environ.get('QDRANT_URL') or getpass("Enter QDRANT_URL: ")
QDRANT_API_KEY = os.environ.get('QDRANT_API_KEY') or getpass("Enter QDRANT_API_KEY: ")


In [None]:
!pip install requests

In [None]:
import requests
from pathlib import Path

# Create directory if it doesn't exist
def create_directory(directory_name):
    path = Path(directory_name)
    path.mkdir(parents=True, exist_ok=True)
    print(f"Directory '{directory_name}' created successfully.")

create_directory("rag_articles")

In [None]:
def download_pdf(url, directory):
    # Extract a simple filename from the URL (e.g., "2502.20364.pdf")
    filename = url.split("/")[-1] + ".pdf"
    pdf_path = Path(directory) / filename
    response = requests.get(url)
    response.raise_for_status()  # Optional: raises an exception for bad responses
    with open(pdf_path, "wb") as file:
        file.write(response.content)
    print(f"PDF downloaded and saved to {pdf_path}")
    return str(pdf_path)

In [None]:
# List of PDF links
pdf_links = [
    "https://arxiv.org/pdf/2502.20964",
    "https://arxiv.org/pdf/2502.20969",
    "https://arxiv.org/pdf/2502.20995",
    "https://arxiv.org/pdf/2502.21087",
    "https://arxiv.org/pdf/2502.21263"
]

In [None]:
downloaded_files = []
for url in pdf_links:
    file_path = download_pdf(url, "rag_articles")
    downloaded_files.append(file_path)

In [None]:
# Load the downloaded PDFs using SimpleDirectoryReader from llamaindex
from llama_index.core import SimpleDirectoryReader

# Option 1: Load using a list of file paths
documents = SimpleDirectoryReader(input_files=downloaded_files, filename_as_id=True).load_data()


print(f"Loaded {len(documents)} documents.")

In [None]:
# Create Node parser
from llama_index.core.node_parser import SentenceSplitter

sentence_splitter = SentenceSplitter(
    chunk_size=512,
    chunk_overlap=16,
    paragraph_separator="\n\n\n\n"
)

In [None]:
# Instantiate embedding model
from llama_index.embeddings.openai import OpenAIEmbedding

embed_model = OpenAIEmbedding(model_name="text-embedding-3-small")

In [None]:
import qdrant_client
from llama_index.vector_stores.qdrant import QdrantVectorStore

# initialize qdrant client
client = qdrant_client.QdrantClient(
    url=QDRANT_URL,
    api_key=QDRANT_API_KEY,
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="rag_articles",
    embed_model=embed_model,
)

In [None]:
from llama_index.core import StorageContext

# assign qdrant vector store to storage context
storage_context = StorageContext.from_defaults(
    vector_store=vector_store,
    )

In [None]:
from llama_index.core import  VectorStoreIndex

# create the index
index = VectorStoreIndex.from_documents(
    documents,
    show_progress=True,
    store_nodes_override=True,
    transformation=[sentence_splitter],
    embed_model=embed_model,
    storage_context=storage_context,
)

In [None]:
retirever = index.as_retriever(
    similarity_top_k=5,
    similarity_threshold=0.75)

In [None]:
import qdrant_client
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import StorageContext

embed_model = OpenAIEmbedding(model_name="text-embedding-3-small")

# initialize qdrant client
client = qdrant_client.QdrantClient(
    url=QDRANT_URL,
    api_key=QDRANT_API_KEY,
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="rag_articles",
    embed_model=embed_model,
)

# assign qdrant vector store to storage context
storage_context = StorageContext.from_defaults(
    vector_store=vector_store,
    )

# load your index from stored vectors
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    embed_model=embed_model,
    storage_context=storage_context
)

In [None]:
from llama_index.llms.cohere import Cohere

llm = Cohere(model="command-r-plus")

query_engine = index.as_query_engine(llm=llm, streaming=True)

response = query_engine.query(
    "How I can build an AI agent?"
)

response.print_response_stream()

In [None]:
response.source_nodes[0].get_text()

In [34]:
chat_engine = index.as_chat_engine(llm=llm)

chat_engine.streaming_chat_repl()

===== Entering Chat REPL =====
Type "exit" to exit.

Assistant:  AI agents can be used in a variety of specialized domains, including legal systems, research, recommender systems, cybersecurity, and global security. These domains benefit from the advanced capabilities that AI agents offer, such as natural language processing, machine learning, and automation. AI agents can process and analyze large amounts of data, identify patterns, make recommendations, and support decision-making processes in these domains.

Assistant:  AI can significantly enhance the legal system by facilitating complex connection identification and analysis within case law, statutes, and legal precedents. This capability can be leveraged to predict legal trends and uncover hidden relationships, ultimately contributing to improved efficiency and the delivery of justice. Additionally, AI proves valuable in legal research, streamlining the process of collecting, understanding, and retrieving relevant legal documents

KeyboardInterrupt: Interrupted by user