In [None]:
import os
import getpass

os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')

In [7]:
# !pdftotext "Ballistic impact performance of UHMWP fabric impregnated with shear.pdf" "output.txt"
!pip install pyPdf
!pip install pdfminer.six
!pdf2txt.py -o output.txt "Ballistic impact performance of UHMWP fabric impregnated with shear.pdf"

In [None]:
!pip install langchain
!pip install openai
!pip install tiktoken
!pip install chromadb

In [11]:
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

# Load the document, split it into chunks, embed each chunk and load it into the vector store.
raw_documents = TextLoader('output.txt').load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)
db = Chroma.from_documents(documents, OpenAIEmbeddings())
retriever = db.as_retriever()



In [12]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough


template = """Answer the question based only on the following context:

{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
model = ChatOpenAI()


def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])


chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [15]:
chain.invoke("can you explain me the work of this document in detail")

'Based on the provided context, this document appears to be a report or study that investigates the ballistic performance of a projectile made of aluminium. The document describes the geometry and composition of the projectile and discusses the use of an O-ring sealing to reduce friction and gas leakage during firing.\n\nThe study measures the pre-impact and post-impact velocities of the projectile using a high-speed camera. The impact velocity is controlled by adjusting the compressor pressure and using overhead projector sheets and butter paper sheets. Ballistic tests are conducted on panels made of UD-UHMWP (unidirectional ultra-high molecular weight polyethylene) with varying numbers of layers.\n\nThe target panels are placed in a pendulum fixture and impacted by the projectile. The study measures the energy absorption, ballistic limit, and back face signature of the panels. Multiple tests are conducted for each velocity to account for measurement variability, and average values wi