In [49]:

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import UnstructuredWordDocumentLoader
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import TextLoader
from pypdf import PdfReader
import re

In [5]:
def count_paragraphs(corpus):
    # Split the corpus into paragraphs based on newline characters
    paragraphs = corpus.split('\n\n')  # Assuming paragraphs are separated by two newline characters

    # Count the number of paragraphs
    num_paragraphs = len(paragraphs)
    
    paragraph_lengths = [len(paragraph) for paragraph in paragraphs]
    
    if paragraph_lengths:
        avg_paragraph_length = sum(paragraph_lengths) / len(paragraph_lengths)
    else:
        avg_paragraph_length=0
    
    return num_paragraphs,avg_paragraph_length,paragraphs

In [14]:
pdf_data=PdfReader("./Car Repair Guide.pdf")
text_data_pdf=" "
for s_page_id in range(len(pdf_data.pages)):
    single_page_content=pdf_data.pages[s_page_id].extract_text()
    text_data_pdf=text_data_pdf+"\n\n"+single_page_content
    # print(single_page_content)

In [19]:
def remove_url_from_str(input_text):
    url_pattren=re.compile(r'https?://\S+|www\.\S+')
    text_without_url=url_pattren.sub(" ",input_text)
    return text_without_url

In [21]:
total_number_of_paragraph,avg_single_paragraph_length,list_of_paragraph=count_paragraphs(text_data_pdf)
list_of_paragraph=[remove_url_from_str(s_string)for s_string in list_of_paragraph]

In [26]:
text_file=open("./car_info.txt","w")
text_file.write(text_data_pdf)
text_file.close()

In [27]:
langchain_loader=TextLoader("./car_info.txt")
langchain_loader.load()



In [28]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=300)
docs = text_splitter.split_documents(langchain_loader.load())

In [36]:
# Loading Text Embedding ModuleNotFoundError
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
model_name="BAAI/bge-small-en-v1.5"
model_kwargs={"device":"cpu"}
encode_kwargs = {"normalize_embeddings": True}
hf_embedding_model = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)



In [37]:
hf_embedding_model

HuggingFaceBgeEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='BAAI/bge-small-en-v1.5', cache_folder=None, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': True}, query_instruction='Represent this question for searching relevant passages: ', embed_instruction='')

In [38]:
# Create Vector Database
from langchain_community.vectorstores import FAISS
vec_database=FAISS.from_documents(docs,hf_embedding_model)
vec_database.save_local("car_repair_guide")


In [45]:
load_saved_db=FAISS.load_local("./car_repair_guide/",hf_embedding_model,allow_dangerous_deserialization=True)
text_retriver=load_saved_db.as_retriever(search_kwargs={"k":5})

In [46]:
query = 'What precautions needs to be taken in terms of clothing and hair'
context=text_retriver.get_relevant_documents(query)
context

  warn_deprecated(


[Document(page_content='loose strands of hair. Bundle your hair up if it\x01s long. Do not w ear necklaces, rings or \nother jewelry. Some may remember Mr. T, the Mohawk-bearing muscle man from the \n1980s sitcom The A-Team . He often wore about 50 lbs. of jewelry and frequently \nworked on motor vehicles. This was a blatant violation of mechanic safety rules. Only a \nperson who can get thrown out of an airplane, flip a jeep, and come out of machine gun fire without a scratch can work on engines that way. \nIdeally the best clothing for performing automotive maintenance and repairs is a \nmechanic\x01s jumpsuit. It\x01s durable, comfortable, there are pockets for tools, and there is nothing loose that can get caught and pulled into moving parts. \nAvoid slips, falls and hazardous chemicals: Hazardous chemicals can include gasoline, oil, coolant, and other vehicle fluids. Avoid \ncontact with eyes and mouth. If you ever have contact with eyes then flush with cool water for several minu

In [47]:
query = 'When and how to check engine oil level'
context=text_retriver.get_relevant_documents(query)
context

[Document(page_content='Crawford\x01s Guide to Beginners Auto Maintenance & Repair  www.CrawfordsAutoService.com \n17 Engine Oil . Engine oil should be checked each time the vehicle is refueled (i.e. each \ntime you go to the gas station). Most engines, but not all, have a dipstick to indicate the \noil level. Typically the handle is yellow. Follow these steps to check the oil: \n\uf0b7Turn the engine off. \n\uf0b7Remove the dipstick. \n\uf0b7Wipe off the end of the dipstick with a rag or paper towel. \n\uf0b7Put the dipstick back in. \n\uf0b7Take it out to look at the level at the tip \nThe stick will have marks on it. The \x02add\x03 mark typically indicates one quart low. If an \nengine is leaking oil then the price of repairs can vary depending on which repair is needed and the make and model of the vehicle. \nOil dipstick (above) and oil level on a dipstick (below) \nImage credits: both CC-BY-SA-Dvortygirl on Wikipedia', metadata={'source': './car_info.txt'}),
 Document(page_conte

In [48]:
query = 'Below what temperature we need antifreeze washer fluid'
context=text_retriver.get_relevant_documents(query)
context

[Document(page_content='Crawford\x01s Guide to Beginners Auto Maintenance & Repair  www.CrawfordsAutoService.com \n20 Windshield Washer Fluid . There may be a clear reservoir or a dipstick for windshield \nwasher fluid. Refer to the owner\x01s manual for the location. If you live or travel in cold \nclimates, below 32 degrees, you need to make sure that you use washer fluid with \nantifreeze. \nWindshield washer fluid \nImage credit: CC-BY-SA-Hamedog on Wikipedia \nTransmission Fluid . Refer to the owner\x01s manual for the location of the transmission \nfluid. Some vehicles have a dipstick but many new vehicles do not have one. In some \nvehicles you have to go underneath the vehicle to check this fluid. If the fluid is low, then there is a leak and it should be investigated. The fluid should also be red. If it is black and/or has a burnt smell then there is a problem with the transmission. \nNew transmission fluid, red in color', metadata={'source': './car_info.txt'}),
 Document(page