## Installing Libs

In [None]:
!pip install langchain sentence-transformers langchain-community transformers torch langchain faiss-gpu


In [26]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA

##Using TinyLlama/TinyLlama-1.1B-Chat-v1.0
### Make sure it runs on CUDA

In [46]:
# Set up GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load TinyLlama 1.1B model
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16).to(device)

# Create a text generation pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.7,
    top_p=0.95,
    top_k=40,
    repetition_penalty=1.1,
    device=device
)

# Wrap the pipeline in a LangChain HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=pipe)

Using device: cuda


# Load two PDF Files for RAG.
## PDF 1 : Resume
## PDF 2 : Custom dataset that contains possible Question-Answer fron the resume

In [42]:
# Load PDF resumes
loader1 = PyPDFLoader("/content/Vishwas_Chandran.pdf")
loader2 = PyPDFLoader("/content/ed.pdf")
documents1 = loader1.load()
documents2 = loader2.load()

# Combine documents
all_documents = documents1 + documents2

# Generating Embeddings
## Use another huggingface embedding model if you want faster processing time


In [43]:
# Split the documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(all_documents)

# Create embeddings
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5", model_kwargs={'device': device})

# Create FAISS vector store
db = FAISS.from_documents(chunks, embeddings)

# Retreiving related docs and inject to llm prompt


In [47]:
# Create a RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 3}),
    return_source_documents=True
)

def query_resumes(query):
    result = qa_chain({"query": query})
    answer = result['result']
    sources = [doc.metadata for doc in result['source_documents']]
    return answer, sources

# Example usage
question = "What did he studied at georgebrown?" + "it;s about  Vishwas Chandran"
answer, sources = query_resumes(question)

# Retreving Answer and the Sources

In [48]:
print("Answer:", answer)
print("\nSources:")
for source in sources:
    if 'source' in source:
        print(f"Source: {source['source']}")
    if 'page' in source:
        print(f"Page: {source['page']}")
    print("---")

Answer: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Does Vishwas have a background in technology? Yes
Did Vishwas study in Coimbatore? Yes
What type of degree did Vishwas earn in Canada? Post Graduate
Is George Brown College located in Toronto? Yes
Did Vishwas pursue higher education outside India? Yes
What is Vishwas's educational qualification in AI? Post Graduate in Applied AI Solutions Development
Did Vishwas complete a postgraduate diploma? Yes
Which institution awarded Vishwas his bachelor's degree? KGiSL Institute Of Technology

question answer
What is the highest level of education Vishwas has completed? Post Graduate
Where did Vishwas complete his postgraduate studies? George Brown College -Toronto -Canada
What was Vishwas's field of study for his postgraduate degree? Applied AI Solutions Development
In which city is George Brown College located? Toronto
Did 

# Exporting FAISS vectors

In [49]:
# Save the FAISS index
faiss_index_path = "/content"
db.save_local(faiss_index_path)