In [1]:
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS 
import openai
import import_ipynb
from Config import *
import os
import warnings
warnings.filterwarnings("ignore")

#### Environmental Variables

In [2]:
os.environ["OPENAI_API_TYPE"] = key_value_dict['api_type']
os.environ["OPENAI_API_VERSION"] = key_value_dict['api_version']
os.environ["OPENAI_API_BASE"] = key_value_dict['api_base']
os.environ["OPENAI_API_KEY"] = key_value_dict['api_key']

In [3]:
# location of the pdf file/files. 
doc_reader = PdfReader('knn.pdf')

In [4]:
# read data from the file and put them into a variable called raw_text
raw_text = ''
for i, page in enumerate(doc_reader.pages):
    text = page.extract_text()
    if text:
        raw_text += text

In [5]:
# Splitting up the text into smaller chunks for indexing
text_splitter = RecursiveCharacterTextSplitter(        
    chunk_size = 800,
    chunk_overlap  = 200 #striding over the text
)
texts = text_splitter.split_text(raw_text)

In [6]:
texts[:2]

['K-Nearest Neighbour\nTushar B. Kute,\nhttp://tusharkute.com\nWhat sort of Machine Learning?\n•An idea that can be used for machine learning—\nas does another maxim involving poultry: "birds \nof a feather flock together." \n•In other words, things that are alike are likely to \nhave properties that are alike. \n•We can use this principle to classify data by \nplacing it in the category with the most similar, \nor "nearest" neighbors.\nNearest Neighbor Classification\n•In a single sentence, nearest neighbor classifiers are defined \nby their characteristic of classifying unlabeled examples by \nassigning them the class of the most similar labeled examples. \nDespite the simplicity of this idea, nearest neighbor methods \nare extremely powerful. They have been used successfully for:',
 'assigning them the class of the most similar labeled examples. \nDespite the simplicity of this idea, nearest neighbor methods \nare extremely powerful. They have been used successfully for:\n–Computer 

In [7]:
print(f"You have a total of {len(texts)} chunks")

You have a total of 11 chunks


In [8]:
from langchain.embeddings import AzureOpenAIEmbeddings

In [9]:
embeddings = AzureOpenAIEmbeddings(
    deployment=key_value_dict["embed_eng_dep_nm"],
    model=key_value_dict["embedding_model"],
    chunk_size=1,
)

In [10]:
db = FAISS.from_texts(texts, embeddings)

In [11]:
from langchain.prompts import PromptTemplate

In [12]:
prompt_template = """ You are an AI Chatbot assistant trained on the context provided. Use the following pieces of context to
answer the question asked:
Provide me all the steps involved in resolving/guiding with clear explanation, 
paraphrase the steps to fluent english so that it feels like chatting with a human
Always answer in way that you are creator and holds responsible always in providing steps with proper formatting,
give me answer in clear number wise steps always,
Strictly, Don't provide answers if any questions which is not from the context provided,
If you don't know the answer, just say that Out of Context.

{context}
Question: {question}

Answer: """

In [13]:
prompt = PromptTemplate(
            template=prompt_template, input_variables=["context", "question"]
        )

In [14]:
from langchain.chains.question_answering import load_qa_chain
from langchain.chat_models import AzureChatOpenAI

In [15]:
 llm = AzureChatOpenAI(
            deployment_name=key_value_dict["comp_eng_dep_nm"],
            temperature=0,
            openai_api_version=key_value_dict["api_version"])

In [16]:
chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)

In [17]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
            memory_key="chat_history", return_messages=True
        )

In [18]:
query = "Explain about KNN classifier?"

In [19]:
docs = db.similarity_search(query, include_metadata=True, k=3)
result = chain.run(input_documents=docs, question=query,memory=memory)

In [20]:
print(result,sep = "\n")

Sure, I can help you with that. Here are the steps involved in understanding the KNN classifier:

1. Understand the concept: KNN is a machine learning algorithm that classifies data by placing it in the category with the most similar, or "nearest" neighbors. This means that it looks at the properties of the data and compares them to other data points to determine which category it belongs to.

2. Install necessary packages: To use KNN, you will need to install some Python packages such as pandas, numpy, matplotlib.pyplot, and sklearn. These packages are used for data analytics, numerical computing, plotting graphs, and classification and regression classes.

3. Choose a sample application: A good project to start with on KNN is the classification of iris flowers. This is a well-understood project that allows you to practice with supervised learning algorithms. It is a multi-class classification problem that may require some specialized handling.

4. Prepare the dataset: The dataset is 

In [23]:
query = "Who is donald Trump?"
docs = db.similarity_search(query, include_metadata=True, k=3)
result = chain.run(input_documents=docs, question=query,memory=memory)
print(result,sep = "\n")

Out of Context.
