In [None]:
!sudo apt-get install -y pciutils
!curl -fsSL https://ollama.com/install.sh | sh # download ollama api
from IPython.display import clear_output

# Create a Python script to start the Ollama API server in a separate thread
import os
import threading
import subprocess
import requests
import json

def ollama():
  os.environ["OLLAMA_HOST"] = "127.0.0.1:11434"
  os.environ["OLLAMA_ORIGINS"] = "*"
  subprocess.Popen(['ollama','serve'])

ollama_thread = threading.Thread(target=ollama)
ollama_thread.start()

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
pciutils is already the newest version (1:3.7.0-6).
0 upgraded, 0 newly installed, 0 to remove and 20 not upgraded.
>>> Cleaning up old version at /usr/local/lib/ollama
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
############################################################################################# 100.0%
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> NVIDIA GPU installed.
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.


Bringing in the documents

In [None]:
os.getcwd()
os.chdir("/content/sample_data/AI_Policy")
list_dir = os.listdir()
[x for x in list_dir if x.endswith(".pdf")]


['(HAF) TAB 3 - Promote a Healthy and Vibrant DAF Data and AI Ecosystem LOE6 2.pdf',
 'Strategy Considerations for GenAI at USAFA 1.pdf',
 '(HAF) TAB 2 - Enhance the Total Force Through Training, Education, and Professional Development (LOE3) (1) 2.pdf',
 'USAFA AI Controls and Approvals Matrix cao 10 Sept 24 1.pdf',
 'USAFA AI Lab Acq Strat cao 16 Sep 24 1.pdf',
 'Department-of-Defense-Responsible-Artificial-Intelligence-Strategy-and-Implementation-Pathway 1.pdf',
 'Executive Order on the Safe, Secure, an.. 2.pdf',
 'DoD Autonomous Weapon Systems 300009p 1.pdf',
 'M-24-10-Advancing-Governance-Innovation-and-Risk-Management-for-Agency-Use-of-Artificial-Intelligence 2.pdf']

In [None]:
#!pip install langchain-community
#!pip install pypdf
#!pip install langchain-ollama
#!pip install tiktoken
#!pip install faiss-cpu
!ollama pull llama3.3:70B

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
pulling 4824460d29f2... 100% ▕▏  42 GB                         
pulling 948af2743fc7... 100% ▕▏ 1.5 KB                         
pulling bc371a43ce90... 100% ▕▏ 7.6 KB                         
pulling 53a87df39647... 100% ▕▏ 5.6 KB                         
pulling 56bb8bd477a5... 100% ▕▏   96 B                         
pulling c7091aa45e9b... 100% ▕▏  562 B                         
verifying sha256 digest ⠋ [?25h[?25l[2K[1G[A[2K[1G[A[2K[1G[A[2K[1G[A[2K[1G[A[2K[1G[A[2K[1G[A[2K[1Gpulling manifest 
pulling 4824460d29f2... 100% ▕▏  42 GB                         
pulling 948af2743fc7... 100% ▕▏ 1.5 KB                         
pulling bc371a43ce90... 100% ▕▏ 7.6 KB                         
pulling 53a87df39647... 100% ▕▏ 5.6 KB                         
pulling 56bb8bd477a5... 100% ▕▏   96 B                         
pulling c7091aa45e9b... 100% ▕▏  562 B                         
verifying sha256 digest ⠙ 

In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
import langchain
import langchain_community
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
# Load documents from the folder
loader = PyPDFDirectoryLoader("/content/sample_data/AI_Policy")
docs_list = loader.load()


In [None]:
# Initialize a text splitter with specified chunk size and overlap
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
# Split the documents into chunks
doc_splits = text_splitter.split_documents(docs_list)

In [None]:
from langchain_ollama import ChatOllama
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
# Define the prompt template for the LLM
prompt = PromptTemplate(
    template="""You are an assistant for question-answering tasks.
    Use the following documents to answer the question.
    If you don't know the answer, just say that you don't know.
    Use three sentences maximum and keep the answer concise:
    Question: {question}
    Documents: {documents}
    Answer:
    """,
    input_variables=["question", "documents"],
)

In [None]:
# Initialize the LLM with Llama 3.1 model
llm = ChatOllama(
    model="llama3.1",
    temperature=0,
)

In [None]:
# Create a chain combining the prompt template and LLM
rag_chain = prompt | llm | StrOutputParser()

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings()
vectorstore = FAISS.from_documents(doc_splits, embeddings)

# Create a retriever from the vectorstore
retriever = vectorstore.as_retriever()
# Define the RAG application class
class RAGApplication:
    def __init__(self, retriever, rag_chain):
        self.retriever = retriever
        self.rag_chain = rag_chain
    def run(self, question):
        # Retrieve relevant documents
        documents = self.retriever.invoke(question)
        # Extract content from retrieved documents
        doc_texts = "\\n".join([doc.page_content for doc in documents])
        # Get the answer from the language model
        answer = self.rag_chain.invoke({"question": question, "documents": doc_texts})
        return answer
# Initialize the RAG application
rag_application = RAGApplication(retriever, rag_chain)
# Example usage
question = "What is prompt engineering"
answer = rag_application.run(question)
print("Question:", question)
print("Answer:", answer)


  embeddings = HuggingFaceEmbeddings()


Question: What is prompt engineering
Answer: I don't know the answer to your question about "prompt engineering". The provided documents appear to be related to procurement and program management for an AI Lab, but do not mention prompt engineering.


In [None]:
question = "What is USAFA AI policy"
answer = rag_application.run(question)
print("Question:", question)
print("Answer:", answer)

Question: What is USAFA AI policy
Answer: The USAFA AI policy requires compliance with DoD Information Enterprise policies and architecture (DoDI 5000.82) and implementation of core principles such as tailored cybersecurity and CCA considerations. The AI Lab does not require strict compliance, but will implement many of these principles in a scaled and escalated manner. A Cybersecurity Strategy consistent with DoD policies is also required for the program.


In [None]:
question = "Can an USAFA AI policy template be developed"
answer = rag_application.run(question)
print("Question:", question)
print("Answer:", answer)

Question: Can an USAFA AI policy template be developed
Answer: Based on the provided documents, a USAFA AI policy template can be developed as part of the USAFA AI Strategy initiative, which is currently in its inception stage. The strategy outlines key focus areas, including establishing trust in AI/ML through training and guidance, determining necessary infrastructure, and acquiring expertise to upskill and scale AI/ML advancement. This effort will also align with DoD and AF policies on responsible AI use.
