### Requirements

In [None]:
!pip install langchain langchain-core langchain-community langchain-chroma langchain-openai pypdf

### Setup

In [7]:
import getpass

openai_api_key = getpass.getpass("Enter your OpenAI API key: ")

# Load and spit a document

In [8]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = PyPDFLoader("./ieee-5g-roadmap-white-paper.pdf")
chunks = loader.load_and_split(
    text_splitter = RecursiveCharacterTextSplitter(
        separators=["\n\n", "\n", ". ", " ", ""],
        chunk_size=1000,
        chunk_overlap=200
))

chunks

[Document(metadata={'source': './ieee-5g-roadmap-white-paper.pdf', 'page': 0}, page_content='IEEE 5G AND BEYOND TECHNOLOGY ROADMAP WHITE PAPER \n \n \n \n \n \n \n \n \nIEEE 5G AND BEYOND  \nTECHNOLOGY ROADMAP  \nWHITE PAPER'),
 Document(metadata={'source': './ieee-5g-roadmap-white-paper.pdf', 'page': 1}, page_content='IEEE 5G AND BEYOND TECHNOLOGY ROADMAP WHITE PAPER \n \n3GPP™ and LTE™ are Trade Marks of ETSI registered for the benefit of its Members and of the 3GPP Organizational \nPartners. \nGSM™, the Global System for Mobile communication, is a registered Trade Mark of the GSM Association. \nAT&T, the AT&T logo, AT&T slogans and other AT&T product/service names and logos are trademarks and service \nmarks of AT&T Intellectual Property or AT&T affiliated company ("AT&T Marks"). \nIETF, Internet Engineering Task Force is a trademark of the IETF Trust. \nLinux® is a registered trademark owned by Linus Torvalds, owner of the mark on a world-wide basis.  \nThe OpenStack® Word Mark and

# Upload to the vector database

In [10]:
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma


db = Chroma(embedding_function=OpenAIEmbeddings(api_key=openai_api_key))
db.add_documents(chunks)

['4a2dfc32-3878-423c-a574-e727f64c6caf',
 '26f4b5ed-8bcd-45a3-b038-296bbd02ce9d',
 '2785b9a0-84e5-428b-b765-11d7e4ba3362',
 'd108a5e7-27df-46e6-a0ff-028534233e19',
 'f4931dea-157c-41b3-a31a-c5ee6ab4fcd2',
 '60a812f6-a431-4c98-8991-ac34421fc6c0',
 '5429cb44-76b1-4e44-a1d4-54801a735481',
 '750a91ef-673a-4d5a-a9fb-9932e3350cd4',
 'f32096f1-4375-432c-9442-56eac9656f83',
 'fda4bfc1-e016-407b-9feb-7adb8bd9d1d8',
 '7a58110f-12a7-4474-901b-63711942146d',
 'af4a5c15-88cd-40bf-8424-332d8afdcdda',
 '0a8ee3c1-195d-469a-92e3-6482770c85be',
 '53e91e2b-9dab-452d-8526-c2827152aef4',
 'f57aa0da-79ad-44e0-8546-a846c3ba524f',
 '050544e6-63ff-4972-9c6b-92fa25827a22',
 '9258a467-9b71-40be-9e91-0e0565345a57',
 '265205b9-0f95-44b8-95e2-d51d7182924c',
 '26950504-4252-4754-8e74-79d9d03efa4a',
 '496403d8-9021-4e62-aade-29268d7e052c',
 'e08ab42d-f4ed-441a-a1d0-c650ab9a356d',
 'be76476e-8f89-4e1f-bbb9-446f1f7831e8',
 '33922c1d-66cf-41c4-bb94-73a2f60155a2',
 '7fcb0131-4928-4f33-aa28-578400922408',
 '5c30f5e6-77e9-

# Search

In [11]:
db.similarity_search("Why is 5G needed?")

[Document(metadata={'page': 7, 'source': './ieee-5g-roadmap-white-paper.pdf'}, page_content='\uf0b7 1 Gigabit per second (Gb/s) simultaneously to many workers on the same office floor \n\uf0b7 Several hundreds of thousands of simultaneous connections for wireless sensors \n\uf0b7 Significantly enhanced spectral efficiency compared to 4G \n\uf0b7 Improved coverage  \n\uf0b7 Enhanced signaling efficiency  \n\uf0b7 Significantly reduced latency compared to Long Term Evolution (LTE)  \nIt is the purpose of this white paper to stimulate an industry-wide dialogue to synchronously address \nall the facets of the development and deployment of 5G by the year 2020. \n \n1. INTRODUCTION  \nThe mobile and fixed wireless industry has enjoyed tremendous g rowth over the past decades. Indeed, \nmobile has evolved from a niche technology, embodied by an anal og 1G voice system, to a full-fledged \ninternet on the move and end-to-end (E2E) digital 4G system. No w 5G communities—with many R&D, \nstandar

# RAG Chat

In [18]:
from langchain_openai import ChatOpenAI

# Initialize the OpenAI model
llm = ChatOpenAI(api_key=openai_api_key)

def rag_chat(query):
    # Search for the most similar document
    sources = db.similarity_search(query)

    prompt = f"""\
Answer any use questions based solely on the context below:

<context>
{" ".join([doc.page_content for doc in sources])}
</context>

Question: {query}
"""
    answer = llm.invoke(prompt)


    # Format the sources
    sources_text = "\n\nSources:\n" + "\n".join([f"- {doc.metadata['source']} (Page {doc.metadata['page']})" for doc in sources])
    
    return answer.content + sources_text

# Example usage
query = "What are the key benefits of 5G?"
print(rag_chat(query))

The key benefits of 5G include:
- Data rates of 10s of Mb/s for 10s of thousands of users
- Data rates of 100 Mb/s for metropolitan areas
- 1 Gigabit per second (Gb/s) simultaneously to many workers on the same office floor
- Several hundreds of thousands of simultaneous connections for wireless sensors
- Significantly enhanced spectral efficiency compared to 4G
- Improved coverage
- Enhanced signaling efficiency
- Significantly reduced latency compared to Long Term Evolution (LTE)

Sources:
- ./ieee-5g-roadmap-white-paper.pdf (Page 18)
- ./ieee-5g-roadmap-white-paper.pdf (Page 7)
- ./ieee-5g-roadmap-white-paper.pdf (Page 7)
- ./ieee-5g-roadmap-white-paper.pdf (Page 31)
