In [None]:
# Step 1: Import Libraries and Load Models
from langchain_community.llms.ctransformers import CTransformers
from transformers import AutoTokenizer, AutoModel
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document
from langchain.docstore.in_memory import InMemoryDocstore
import faiss
import torch
import numpy as np

In [None]:
llm=CTransformers(model = "TheBloke/Llama-2-7b-GGML",model_type="llama")

In [None]:
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")

In [None]:
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")

In [None]:
def embed_texts(texts):
    inputs=tokenizer(texts,return_tensors="pt",truncation = True, padding = True)
    with torch.no_grad():
        outputs = model(**inputs)
        embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
        return embeddings

In [None]:
embedding_example = embed_texts(["test"])
embedding_dim = embedding_example.shape[1]

index = faiss.IndexFlatL2(embedding_dim)

docstore = InMemoryDocstore()

index_to_docstore_id = {}

vectorstore = FAISS(embedding_function=embed_texts, index=index, docstore=docstore, index_to_docstore_id=index_to_docstore_id)

In [None]:
# Step 2: Prepare Your Documents
documents = [
    Document(page_content="I am Moguloju Sai, a Data Science graduate from Hyderabad, Telangana, with a solid foundation in Machine Learning, Data Science, and hands-on project work. With 4 months of internship experience, I have developed a strong technical background, eager to apply my analytical and technical skills to solve real-world problems and contribute to impactful data-driven solutions."),
    Document(page_content="I have completed internships as a Junior Data Scientist at Coapps.ai, where I worked extensively with the Streamlit framework, focusing on a project aimed at detecting fake news using Machine Learning and NLP. This project allowed me to gain valuable experience in deploying sophisticated algorithms to address the challenge of information authenticity."),
    Document(page_content="I have technical proficiency in Python, Java, R, MySQL, and various tools like Excel and Tableau. Additionally, I have certifications in Data Science with Python (NPTEL), Python Programming (Internshala), AWS Fundamentals (Coursera), and Machine Learning and Artificial Intelligence (EducateNXT).")
]

In [None]:
# Embed the documnets and add them to the vectorstore
texts = [doc.page_content for doc in documents]
embeddings = embed_texts(texts)

for i, embedding in enumerate(embeddings):
    index.add(np.array([embedding],dtype=np.float32))
    index_to_docstore_id[i] = documents[i].page_content

In [None]:
# Define a Simple Retriever
def simple_retriever(query):
    query_embedding = embed_texts([query])
    D, I = index.search(query_embedding,k=1)
    return index_to_docstore_id[I[0][0]] if len(I) > 0 and I[0][0] in index_to_docstore_id else " No matching document found"

In [None]:
# Create the RAG Chain
class SimpleRetrievalQA:
    def __init__(self, llm, retriever):
        self.llm = llm
        self.retriever = retriever
        
    def run(self, query):
        context = self.retriever(query)
        response = self.llm(f'Context: {context}\n\nQuestion: {query}\n\nAnswer:')
        
qa_chain = SimpleRetrievalQA(llm=llm, retriever = simple_retriever)

In [None]:
# Ask Question Using the RAG Model
question = "Who is Moguloju Sai?"

# Get the answer from the RAG model
answer = qa_chain.run(question)

print(answer)