In [1]:
import os

curr_dir = os.listdir(".")

extensions = (".pdf", ".docx", ".pptx")

curr_dir_files = [file for file in curr_dir if file.endswith(extensions)]
print(curr_dir_files)

MODEL = "llama3.2"

['Cover Letter.docx', 'EP-1.pdf', 'EP-2.pdf', 'samplepptx.pptx']


In [2]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import Docx2txtLoader
from langchain_community.document_loaders import UnstructuredPowerPointLoader


pages = []

for file in curr_dir_files:
    if file.endswith(".pdf"):
        pdfLoader = PyPDFLoader(file)
        pages += pdfLoader.load()
    elif file.endswith(".docx"):
        docxLoader = Docx2txtLoader(file)
        pages += docxLoader.load()
    elif file.endswith(".pptx"):
        pptxLoader = UnstructuredPowerPointLoader(file)
        pages += pptxLoader.load()

print(pages)




[Document(metadata={'source': 'Cover Letter.docx'}, page_content='Dear Hiring Manager,\n\nI am excited to apply for the Associate Full Stack Developer position at ABBYY. With hands-on experience in full-stack development and a background at Iron Mountain, a competitor in the industry, I bring a unique perspective and a proven ability to contribute to innovative solutions.\n\nAt Iron Mountain, I developed and optimized applications using technologies such as ReactJS, NodeJS, ExpressJS, and MongoDB. A notable project was DevTinder, a full-stack application where I created RESTful APIs, implemented JWT-based authentication, and ensured database integrity through robust data validation. Leveraging tools like Redux for state management and Tailwind CSS for responsive designs, I focused on delivering efficient and user-centric solutions. Additionally, my experience with real-time problem-solving in cloud-based environments has sharpened my ability to tackle challenges effectively.\n\nI admir

In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=120)
chunks = splitter.split_documents(pages)



In [4]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings

# Pull the model first

# Create the embedding and vectorstore
embedding = OllamaEmbeddings(model="llama3.2:3b")
vectorstore2 = FAISS.from_documents(chunks, embedding)

  embedding = OllamaEmbeddings(model="llama3.2:3b")


In [5]:
retriever = vectorstore2.as_retriever()

In [6]:
from langchain_ollama import ChatOllama

model = ChatOllama(model="llama3.2:3b", temperature=0)

In [7]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

chain = model | parser


In [8]:
from langchain.prompts import PromptTemplate

template ="""
You are an assistant that provides answers to questions based on
a given context. 

Answer the question based on the context. If you can't answer the
question, reply "I don't know".

Be as concise as possible and go straight to the point.

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)



In [9]:
chain = prompt | model | parser





In [10]:
from operator import itemgetter

chain = (
    {
    "context" : itemgetter("question") | retriever,
    "question" : itemgetter("question"),
    }
    | prompt
    | model
    | parser
    
)

In [13]:
question = input("Enter your question: ")

print(f"Question: {question}")
print(f"Ansewer : {chain.invoke({'question': question})}")
print("************************")

Question: Do we have any ppt file in the context?
Ansewer : I don't know.
************************
