In [16]:
# LangChain Components
from langchain.vectorstores.cassandra import Cassandra
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter

# Support For Dataset Retrieval With Hugging Face
from datasets import load_dataset

# With CassIO, the engine powering the Astra DB integration in LangChain,
# we can will also initialize the DB connection:
import cassio

from PyPDF2 import PdfReader
import os
from dotenv import load_dotenv, find_dotenv

from typing_extensions import Concatenate

## SetUp

### Provide Your Secrets

In [7]:
load_dotenv(find_dotenv(), override=True)
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
ASTRA_DB_APPLICATION_TOKEN = os.environ.get("ASTRA_DB_APPLICATION_TOKEN")
ASTRA_DB_ID = os.environ.get("ASTRA_DB_ID")

In [8]:
pdfreader= PdfReader(r'C:\github_repos\gemini-pdfs\Chat With PDF Using Langchain And Astradb\Sridhar Alla, Suman Kalyan Adari - Beginning MLOps with MLFlow_ Deploy Models in AWS SageMaker, Google Cloud, and Microsoft Azure-Apress (2021).pdf')

In [10]:
# Read Text From PDFfile
raw_text= ''
for i, page in enumerate(pdfreader.pages):
    content= page.extract_text()
    if content:
        raw_text += content

In [13]:
print(raw_text)

Beginning MLOps 
with MLFlow      
Deploy Models in AWS SageMaker,  
Google Cloud, and Microsoft Azure
—
Sridhar Alla
Suman Kalyan AdariBeginning MLOps 
with MLFlow
Deploy Models in AWS 
SageMaker, Google Cloud, 
and Microsoft Azure
Sridhar Alla
Suman Kalyan AdariBeginning MLOps with MLFlow
ISBN-13 (pbk): 978-1-4842-6548-2   ISBN-13 (electronic): 978-1-4842-6549-9
https://doi.org/10.1007/978-1-4842-6549-9
Copyright © 2021 by Sridhar Alla, Suman Kalyan Adari 
This work is subject to copyright. All rights are reserved by the Publisher, whether the whole or 
part of the material is concerned, specifically the rights of translation, reprinting, reuse of 
illustrations, recitation, broadcasting, reproduction on microfilms or in any other physical way, 
and transmission or information storage and retrieval, electronic adaptation, computer software, 
or by similar or dissimilar methodology now known or hereafter developed.
Trademarked names, logos, and images may appear in this book. Rather t

### Initialize the Connection To DataBase

In [12]:
cassio.init(token=ASTRA_DB_APPLICATION_TOKEN, database_id=ASTRA_DB_ID)

### Create The LangChain Embedding and LLM Objects For Later Usage

In [14]:
llm= OpenAI(openai_api_key=OPENAI_API_KEY)
embedding= OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

  warn_deprecated(
  warn_deprecated(


### Create Your LangChain Vector Store (Backed By Astra DB)

In [15]:
astra_vector_store= Cassandra(
    embedding=embedding,
    table_name='qa_mini_demo',
    session=None,
    keyspace=None
)

In [17]:
text_splitter= CharacterTextSplitter(
    separator='\n',
    chunk_size=800,
    chunk_overlap=200,
    length_function=len,

)
texts= text_splitter.split_text(raw_text)

In [19]:
texts[:500]

['Beginning MLOps \nwith MLFlow      \nDeploy Models in AWS SageMaker,  \nGoogle Cloud, and Microsoft Azure\n—\nSridhar Alla\nSuman Kalyan AdariBeginning MLOps \nwith MLFlow\nDeploy Models in\xa0AWS \nSageMaker, Google Cloud, \nand\xa0Microsoft Azure\nSridhar\xa0Alla\nSuman\xa0Kalyan\xa0AdariBeginning MLOps with MLFlow\nISBN-13 (pbk): 978-1-4842-6548-2   ISBN-13 (electronic): 978-1-4842-6549-9\nhttps://doi.org/10.1007/978-1-4842-6549-9\nCopyright © 2021 by Sridhar Alla, Suman Kalyan Adari \nThis work is subject to copyright. All rights are reserved by the Publisher, whether the whole or \npart of the material is concerned, specifically the rights of translation, reprinting, reuse of \nillustrations, recitation, broadcasting, reproduction on microfilms or in any other physical way,',
 'part of the material is concerned, specifically the rights of translation, reprinting, reuse of \nillustrations, recitation, broadcasting, reproduction on microfilms or in any other physical way, \nand tr

### Load The Dataset Into The Vector Store

In [20]:
astra_vector_store.add_texts(texts[:500])
print('Inserted %i Headlines.' % len(texts[:500]))
astra_vector_index= VectorStoreIndexWrapper(vectorstore=astra_vector_store)

Inserted 500 Headlines.


### Run Q&A Cycle

In [22]:
first_question= True
while True:
    if first_question:
        query_text= input("\nEnter your question (or type 'quit' to exit): ").strip()
    else:
        query_text= input("\nWhat's your next question (or type 'quit' to exit): ").strip()

    if query_text.lower()=="quit":
        break

    if query_text=="":
        continue

    first_question= False

    print("\nQUESTION: \"%s\"" % query_text)
    answer= astra_vector_index.query(query_text, llm=llm).strip()
    print("ANSWER: \"%s\"n" % answer)

    print("FIRST DOCUMENTS BY RELEVANCE:")
    for doc, score in astra_vector_store.similarity_search_with_score(query_text, k=4):
        print("   [%0.4f] \"%s ...\"" % (score, doc.page_content[:150]))


QUESTION: "What is Model Validation in MLOps?"
ANSWER: "Model Validation in MLOps is a stage where the model goes through a process to seek the best hyperparameters. This can be done by using a script to iterate through various configurations of hyperparameter values and utilizing k-fold cross-validation. The goal of this stage is to help tune the model’s hyperparameters and it can even be automated to save time and resources in the long run."n
FIRST DOCUMENTS BY RELEVANCE:
   [0.9343] "other mechanism that the team has implemented, and the process moves 
on to the validation stage.
 Model Validation
In this stage, the model begins th ..."
   [0.9319] "new model or just update the current model.Chapter 3  What Is MLOps?97 3. Automated model building and analysis:  In this 
step, data scientists and m ..."
   [0.9257] "can allow for deployment on a simple click of a button. Usually, the 
deployment is to a staging environment first, where the functionality can 
be te ..."
   [0.9241] "