## PATHS

In [1]:
directory_path = './pdfs/'

embeddings_directory  = './embeddings/'

#### DATA INGESTION

In [63]:
from langchain_community.document_loaders import PyPDFLoader

file_name = 'VT-ADL.pdf'

loader = PyPDFLoader(directory_path + file_name)
docs = loader.load()
print(docs[0])


page_content='VT-ADL: A Vision Transformer Network for Image
Anomaly Detection and Localization
Pankaj Mishra
University of Udine, Italy
Email: mishra.pankaj@spes.uniud.itRiccardo Verk
University of Udine, Italy
Email: verk.riccardo@spes.uniud.itDaniele Fornasier
beanTech srl, Italy
Email: daniele.fornasier@beantech.it
Claudio Piciarelli
University of Udine, Italy
Email: claudio.piciarelli@uniud.itGian Luca Foresti
University of Udine, Italy
Email: gianluca.foresti@uniud.it
Abstract —We present a transformer-based image anomaly
detection and localization network. Our proposed model is
a combination of a reconstruction-based approach and patch
embedding. The use of transformer networks helps preserving
the spatial information of the embedded patches, which is later
processed by a Gaussian mixture density network to localize the
anomalous areas. In addition, we also publish BTAD, a real-world
industrial anomaly dataset. Our results are compared with other
state-of-the-art algorithms usin

#### TEXT SPLITTING

In [64]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=30)
final_docs = text_splitter.split_documents(docs)
print(len(final_docs))


111


### OLLAMA EMBEDDINGS AND CREATING VECTORSTOREDB(FAISS)

In [65]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

embedding = (
    OllamaEmbeddings(model = "llama3.2")
)

db = FAISS.from_documents(final_docs, embedding)


In [66]:
## SAVING THE EMBEDDINGS
db.save_local(embeddings_directory+"VIT-ADL")


In [2]:
## LOADING THE EMBEDDINGS
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings

# Load the FAISS index from the local directory
embedding = OllamaEmbeddings(model="llama3.2")
db = FAISS.load_local(embeddings_directory+"VIT-ADL", embedding, allow_dangerous_deserialization=True)


In [3]:
query = "vision transformer"
res = db.similarity_search(query)
res[0].page_content

'Fig. 2. Left image: model overview. Image is split into patches, which are augmented with positional embedding. The resulting sequence is fed to the'

In [4]:
from langchain.llms import Ollama

llama = Ollama(model="llama3.2")

llama

Ollama(model='llama3.2')

In [5]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""
    Based on the following context, explain the concept of '{query}' if it is mentioned in the context.
    <context>
    {context}
    </context>
    If the context includes a section heading related to '{query}', focus on that part.
""")



document_chain = create_stuff_documents_chain(llm=llama, prompt=prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context', 'query'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'query'], input_types={}, partial_variables={}, template="\n    Based on the following context, explain the concept of '{query}' if it is mentioned in the context.\n    <context>\n    {context}\n    </context>\n    If the context includes a section heading related to '{query}', focus on that part.\n"), additional_kwargs={})])
| Ollama(model='llama3.2')
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, config_factories=[])

In [14]:
## CREATING A QA CHAIN
retriever = db.as_retriever(search_kwargs={"k": 10},)
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm=llama, 
    chain_type="stuff",
    retriever=retriever,
)

qa_chain



RetrievalQA(verbose=False, combine_documents_chain=StuffDocumentsChain(verbose=False, llm_chain=LLMChain(verbose=False, prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{context}\n\nQuestion: {question}\nHelpful Answer:"), llm=Ollama(model='llama3.2'), output_parser=StrOutputParser(), llm_kwargs={}), document_prompt=PromptTemplate(input_variables=['page_content'], input_types={}, partial_variables={}, template='{page_content}'), document_variable_name='context'), retriever=VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000018E6C54F4D0>, search_kwargs={'k': 10}))

In [16]:
response = qa_chain.invoke("What is the method used in this paper to detect and remove anomaly. Explain it as well")
print(response['result'])

The method used in this paper for detecting and removing anomalies is a transformer-based framework that combines reconstruction and patch-based learning for image anomaly detection and localization.

Here's a breakdown of how it works:

1. The input images are first encoded using a transformer encoder, which produces encoded features.
2. These encoded features are then summed into a reconstruction vector, which is used as the input to the decoder.
3. The reconstructed output is compared with the original image to compute two loss functions:
	* Reconstruction loss (-LL): measures how well the model can reconstruct the original image from the encoded features.
	* Anomaly score (MSE or SSIM): measures the difference between the original image and the reconstructed output, which is used as a score for anomaly detection.
4. The combined loss function is weighted to balance between the reconstruction loss and the anomaly score.

To detect anomalies, the model uses a combination of two strat