In [45]:
import cassio
from langchain.vectorstores.cassandra import Cassandra
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings
from datasets import load_dataset
import os


In [46]:
from PyPDF2 import PdfReader

In [47]:
pdf=PdfReader("data/rise-of-llm.pdf")

In [48]:
from typing_extensions import Concatenate

In [49]:
raw=''

for i,page in enumerate(pdf.pages):
    content=page.extract_text()
    if content:
        raw+=content


In [50]:
print(raw)

R&D www.managementsolutions.comThe rise of Large Language Models: 
from fundamentals to application Auge LLM-Eng- Vdef_Maquetación 1  30/05/2024  23:48  Página 1Design and Layout  
Marketing and Communication Department  
Management Solutions  
Photographs  
Photographic archive of Management Solutions  
AdobeStock  
Midjourney  
 
 
© Management Solutions 2024  
All rights reserved. Cannot be reproduced, distributed, publicly disclosed, converted, totally or partially, freely or with a c harge, in any way or procedure, without the  
express written authorization of Management Solutions. The information contained in this publication is merely to be used as a guideline. Management Solutions shall  
not be held responsible for the use which could be made of this information by third parties. Nobody is entitled to use this ma terial except by express authorization of  
Management Solutions.Auge LLM-Eng- Vdef_Maquetación 1  30/05/2024  23:48  Página 2Introduction
104
Index
Executive summar

## Initialize the connection to database


In [52]:
cassio.init(token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],database_id=os.environ["ASTRA_DB_ID"])

In [53]:
llm=OpenAI(openai_api_key=os.environ["OPEN_API_KEY"])
embeddings=OpenAIEmbeddings(openai_api_key=os.environ["OPEN_API_KEY"])

## Creating LandChain Vector Store-backed by AstraDB

In [54]:
astra_vector_store=Cassandra(
    embedding=embeddings,
    table_name='qa_mini_demo',
    session=None,
    keyspace=None
)

## Splitting the text

In [55]:
from langchain_text_splitters import CharacterTextSplitter

In [56]:
text_split=CharacterTextSplitter(
    separator='\n',
    chunk_size=800,
    chunk_overlap=200,
    length_function=len
)

text=text_split.split_text(raw)

In [57]:
text[:50]

['R&D www.managementsolutions.comThe rise of Large Language Models: \nfrom fundamentals to application Auge LLM-Eng- Vdef_Maquetación 1  30/05/2024  23:48  Página 1Design and Layout  \nMarketing and Communication Department  \nManagement Solutions  \nPhotographs  \nPhotographic archive of Management Solutions  \nAdobeStock  \nMidjourney  \n \n \n© Management Solutions 2024  \nAll rights reserved. Cannot be reproduced, distributed, publicly disclosed, converted, totally or partially, freely or with a c harge, in any way or procedure, without the  \nexpress written authorization of Management Solutions. The information contained in this publication is merely to be used as a guideline. Management Solutions shall',
 'express written authorization of Management Solutions. The information contained in this publication is merely to be used as a guideline. Management Solutions shall  \nnot be held responsible for the use which could be made of this information by third parties. Nobody is entit

In [58]:
astra_vector_store.add_texts(text)
print('Inserted %i headlines' % len(text))
astra_vector_index=VectorStoreIndexWrapper(vectorstore=astra_vector_store)

Inserted 297 headlines


In [59]:
first_question=True
while True:
    if first_question:
        query_text=input("\n Enter your question (or type quit to exit):").strip()
    else:
        query_text=input("\n What's your next question (or type quit to exit)").strip()
    if query_text=='quit':
        break
    if query_text=='':
        continue

    first_question=False

    print("\n Question: \"%s\"" % query_text)
    answer=astra_vector_index.query(query_text,llm=llm).strip()
    print("\n Answer: \"%s\"" % answer)

    print("First documents by relevance")
    for doc, score in astra_vector_store.similarity_search_with_score(query_text,k=4):
        print("  [%0.4f] \"%s\"" %(score,doc.page_content[:100]))

 


 Question: "what is llm"

 Answer: "LLM stands for "Legal and Legislative Material" and refers to a type of information system used in organizations to ensure compliance with ethical, security, and regulatory standards."
First documents by relevance
  [0.8999] "systems, from design to deployment and maintenance, to 
ensure that they conform to ethical, securit"
  [0.8999] "systems, from design to deployment and maintenance, to 
ensure that they conform to ethical, securit"
  [0.8999] "systems, from design to deployment and maintenance, to 
ensure that they conform to ethical, securit"
  [0.8999] "systems, from design to deployment and maintenance, to 
ensure that they conform to ethical, securit"
