In [1]:
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.llms import GooglePalm
from langchain.vectorstores import FAISS
from dotenv import load_dotenv
import pandas as pd
import os

load_dotenv()

api = os.environ['GOOGLE_API_KEY']

In [2]:
llm = GooglePalm(google_api_key= api, temperature=0.7)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
print(llm('what is the use of huggingface instrucatorembeddings'))

The use of huggingface instrucator embeddings is to provide a way to represent text data in a way that can be used for machine learning tasks. This can be done by using a language model to learn the embeddings of words and phrases in a text corpus. The embeddings can then be used as features for a variety of machine learning tasks, such as text classification, natural language inference, and question answering.

Huggingface instrucator embeddings are particularly useful for tasks that require understanding the meaning of text. This is because the embeddings are learned from a large corpus of text, which means that they capture the semantic relationships between words and phrases. This makes them more effective than other types of text representations, such as bag-of-words or TF-IDF, for tasks that require understanding the meaning of text.

Here are some of the benefits of using huggingface instrucator embeddings:

* They are learned from a large corpus of text, which means that they c

### We are using CSVLoader to load our csv files

In [4]:
from langchain.document_loaders import CSVLoader

In [5]:
loader = CSVLoader(file_path='codebasics_faqs.csv', source_column='prompt')
data = loader.load()

### Using HuggingFaceInstructorEmbeding 

In [6]:
embedding = HuggingFaceInstructEmbeddings()

load INSTRUCTOR_Transformer
max_seq_length  512


In [7]:
e = embedding.embed_query('Upendra Is Hero I dont Know How ')

In [8]:
# e

### Faiss

In [9]:
# vectorstore = FAISS.from_documents(documents= data, embedding= embedding)

In [10]:
vectorstore.save_local('faiss_index')

In [28]:
retriever = vectorstore.as_retriever()

##### now you can see in this it is giving similar answer from csv bt it is not like human interactive so we will use llm to so

In [52]:
retriever.get_relevant_documents("what is the duration of data analytics course and how about internship?")

[Document(page_content='prompt: What is different in this course from thousands of other Power BI courses available online?\nresponse: Most of the courses available on the internet teach you how to build x & y without any business context and do not prepare you for the real business world. This course is rather an experience in which you will learn how to use Power BI & other non-technical skills to solve a real-life business problem using analytics. Here you focus on solving a business problem and in that process learn how Power BI can be used as a tool. This is how you will do the work when you start working as a data analyst/ Business analyst/ Power BI developer in the industry. This course will prepare you for not just fetching the job but, shine in it & grow further.', metadata={'source': 'What is different in this course from thousands of other Power BI courses available online?', 'row': 36}),
 Document(page_content='prompt: I have never done programming and belong to a non-techn

In [45]:
from langchain.chains import RetrievalQA

In [47]:
qa = RetrievalQA.from_chain_type(llm, 
                                 retriever= retriever, 
                                 input_key= "query", 
                                 return_source_documents= True)

In [55]:
qa("what is the duration of data analytics course and how about internship?").get('result')

'3 months internship after course'

#### see in this llm giving answers for two different questions without changing the meaning

now we will give some propmts coz instead of using the given information llm also use its own genral knowledge

In [56]:
from langchain.prompts import PromptTemplate

In [58]:
prompt_template = """Given the following context and a question, generate an answer based on this context only.
In the answer try to provide as much text as possible from "response" section in the source document context without making much changes.
If the answer is not found in the context, kindly state "I don't know." Don't try to make up an answer.

CONTEXT: {context}

QUESTION: {question}"""

In [59]:
prompt = PromptTemplate(template= prompt_template, input_variables=['context','question'])

In [60]:
qa = RetrievalQA.from_chain_type(llm, 
                                 retriever= retriever, 
                                 input_key= "query", 
                                 return_source_documents= True,chain_type_kwargs={"prompt":prompt})

## Now we are deploying it using Streamlit 

In [72]:
qa('can i have some cola').get('result')

"? response: I don't have any cola."