<a href="https://colab.research.google.com/github/Colsai/AI_RAG_Modeling/blob/main/RAG_modeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# OpenAI-Based Basic RAG

In [33]:
#Import Dependencies
!pip install langchain_community langchain openai chromadb tiktoken

Collecting tiktoken
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tiktoken
Successfully installed tiktoken-0.7.0


In [72]:
import os
import pandas as pd
from google.colab import userdata
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from openai import OpenAI

#Use Google Colab to Retrieve Secrets
openai_api_key = userdata.get('openai_key')

#OpenAI API Key
model_select = "gpt-3.5-turbo-16k"

client = OpenAI(
    # defaults to os.environ.get("OPENAI_API_KEY")
    api_key=userdata.get('openai_key'),
)

#Data Load Functions
def return_oig_work_plans(site:str = "https://oig.hhs.gov/reports-and-publications/workplan/active-item-table.asp") -> pd.DataFrame:
    '''

    '''
    try:

        temp_df = pd.read_html(site)[0][0:-1]

    except Exception as e:

        raise Exception(f"Error: {e}")

    return temp_df

#Response Functions
def user_query_similarity_search(query:str = ''):
    '''

    '''
    docs_chroma = db_chroma.similarity_search_with_score(query, k = 5)

    join_context = "\n\n".join([doc.page_content for doc, _score in docs_chroma])

    return join_context

def generate_response(user_query, temperature=.1, max_tokens=2500) -> str:
    response = client.chat.completions.create(
        model=model_select,
        messages=[
            {"role": "system", "content": "Take the role of a federal expert at HHS OIG. Provide insight into a specific question to the public.",},
            {"role": "user", "content": user_query}],
        temperature=temperature,
        top_p = 1,
        max_tokens=max_tokens
    )
    return response.choices[0].message.content

def generate_rag_response(question: str = 'Tell me about HHS OIG work occurring on covid-19.') -> str:
    '''

    '''
    context = user_query_similarity_search(query = question)

    response_template = \
    f""" Please use your expertise to answer this question: {question}.
    To answer the question in more detail, use the following context {context}.
    Please provide a detailed answer, use bullet points when applicable, and use quotations and sources from the context where appropriate.
    Do not make assumptions or guesses about current work.
    """

    prompt_template = ChatPromptTemplate.from_template(response_template)

    prompt = prompt_template.format(context = context, question = question)

    return generate_response(response_template)

#Run Main
if __name__ == '__main__':

    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

    oig_summaries = list(return_oig_work_plans()['Summary'])

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50)

    chunks = [text_splitter.split_text(summary) for summary in oig_summaries]

    len(chunks)

    # embed the chunks as vectors and load them into the database
    from langchain.schema import Document

    # Convert chunks to Document objects
    documents = [Document(page_content=chunk) for sublist in chunks for chunk in sublist]

    # embed the chunks as vectors and load them into the database
    db_chroma = Chroma.from_documents(documents, embeddings, persist_directory=os.getcwd())

    user_question = input("Please ask a question: ")

    ai_response = generate_rag_response(user_question)

    print(ai_response)

Please ask a question: Tell me about the current evaluations at HHS OIG
As a federal expert at HHS OIG, I can provide insight into the current evaluations being conducted at HHS. Based on the provided context, here is an overview of the ongoing evaluations:

1. Evaluation of HHS Information Security Program:
   - HHS OIG conducts an annual, independent evaluation to assess the effectiveness of HHS's information security program and practices.
   - The evaluation aims to determine compliance with the Federal Information Security Modernization Act (FISMA).
   - HHS and selected HHS operating divisions' compliance with FISMA will be reviewed.

2. Evaluation of Mobile App Development and Vetting:
   - HHS OIG is performing an evaluation specifically focused on the development and vetting of mobile apps within HHS and its operating divisions (OpDivs).
   - The evaluation is prompted by the increasing use of mobile apps by HHS and OpDivs to provide access to health services.
   - The objecti