In [3]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.document_loaders.csv_loader import CSVLoader 
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

In [3]:
def load_pdf(data):
    loader = DirectoryLoader(data, 
                             glob='*.csv', 
                             loader_cls=CSVLoader)
    documents = loader.load()
    return documents

In [4]:
extracted_data = load_pdf(r"E:\projects\EduBotIQ\Data Prepration")

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter 

#Create text chunk
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [6]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 2847


In [4]:
from langchain.embeddings import HuggingFaceEmbeddings

#download embedding model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [5]:
embeddings = download_hugging_face_embeddings()

In [6]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False)

In [10]:
query_result = embeddings.embed_query("Hello World")
print("Length", len(query_result))

Length 384


In [16]:
def create_db(text_chunks, embeddings):
    DB_FAISS_PATH = 'vectorstore/db_faiss'
    
    db = FAISS.from_documents(text_chunks, embeddings)
    db.save_local(DB_FAISS_PATH)

In [17]:
create_db(text_chunks, embeddings)

In [18]:
from langchain_community.vectorstores import FAISS
# Load the FAISS database with the embeddings
db = FAISS.load_local("vectorstore/db_faiss", embeddings=embeddings)

# Encode your query text
query_text = "What is Object Detection"

docs=db.similarity_search(query_text, k=3)
print(docs)

[Document(page_content=': 4\nquestion: What is object recognition in Computer Vision?', metadata={'source': 'E:\\projects\\EduBotIQ\\Data Prepration\\new_train.csv', 'row': 4}), Document(page_content=': 18\nquestion: How are object detection models evaluated in Computer Vision?', metadata={'source': 'E:\\projects\\EduBotIQ\\Data Prepration\\new_train.csv', 'row': 18}), Document(page_content='answer: Object recognition is a Computer Vision task that involves identifying and classifying objects in images or videos. Convolutional Neural Networks (CNNs) are often used for object recognition, enabling machines to recognize and categorize objects within a visual scene.', metadata={'source': 'E:\\projects\\EduBotIQ\\Data Prepration\\new_train.csv', 'row': 4})]


In [19]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [20]:
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt": PROMPT}

In [21]:
llm=CTransformers(model=r"E:\projects\EduBotIQ\tiny_model\tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
                  model_type="llama",
                  config={'max_new_tokens':256,
                          'temperature':0.5})

In [7]:
from langchain_community.vectorstores import FAISS
# Load the FAISS database with the embeddings
db = FAISS.load_local("vectorstore/db_faiss", embeddings=embeddings)


retriever=db.as_retriever(search_kwargs={'k': 2})

In [8]:
retriever.search_type

'similarity'

In [9]:
retriever.search_kwargs

{'k': 2}

In [22]:
qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=db.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [23]:
while True:
    user_input=input(f"Input Prompt:")
    result=qa({"query": user_input})
    print("Response : ", result["result"])

  warn_deprecated(


Response :  Object Detection Models Evaluated in Computer Vision

Object detection is a fundamental task in computer vision, where an algorithm is trained to identify objects of interest in images or videos. Object detection models are typically trained on large datasets containing labeled images with their corresponding object labels. The goal of object detection is to accurately predict the location and size of each object in the image or video.

Different types of object detection models can be categorized based on their architecture, training approach, and performance metrics. Some common types include:

1. Fully Convolutional Networks (FCN)
2. ResNet
3. Inception-v4
4. MobileNetV2
5. ShuffleNet
6. YOLOv3
7. SSD
8. RetinaNet
9. Faster R-CNN
10. PASCAL VOC

In this context, the question asks about object detection models evaluated in computer vision. Object detection models are evaluated using various metrics such as precision, recall, accuracy, and F1 score. These metrics help to d