In [1]:
import os
from langchain.chat_models import ChatOpenAI
import json

from langchain.schema import (
    SystemMessage,
    HumanMessage,
    AIMessage
)

from langchain.embeddings.openai import OpenAIEmbeddings

In [2]:
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

In [3]:
from langchain.document_loaders import PyPDFDirectoryLoader

In [4]:
with open('secret.json', 'r') as json_file:
    key = json.load(json_file)['key']

os.environ["OPENAI_API_KEY"] = key

In [5]:
def augment_prompt(query: str, db):
    # get top 3 results from knowledge base
    results = db.similarity_search(query, k=3)
    # get the text from the results
    source_knowledge = "\n".join([x.page_content for x in results])
    # feed into an augmented prompt
    augmented_prompt = f"""Using the contexts below, answer the query.

    Contexts:
    {source_knowledge}

    Query: {query}"""
    return augmented_prompt

In [6]:
# write function to take in chat history, the query an return a response

def get_response(db, chat, query, chat_history = []):
    prompt = HumanMessage(
    content=augment_prompt(query, db=db),
    )

    # get the response from the model
    chat_history.append(prompt)

    response = chat(chat_history)

    return response.content, chat_history

In [7]:
chat = ChatOpenAI(
    openai_api_key=os.environ["OPENAI_API_KEY"],
    model='gpt-4'
)

## Docs - PDF Material

In [8]:
base_docs = "documents"

subject = "aipi530"

sub = "material"

doc_path = f"{base_docs}/{subject}/{sub}/"

loader = PyPDFDirectoryLoader(doc_path)

In [9]:
docs = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(docs)
db = Chroma.from_documents(documents, OpenAIEmbeddings(model="text-embedding-ada-002"))

In [10]:
query = "Enumrate the different use cases"

response, messages = get_response(db=db, chat=chat, query=query)

print(response)

1. Backpack packing
2. Production Optimization
3. Network Optimization
4. Media Advertisement
5. Hospital Management
6. Portfolio Management
7. Price Optimization
8. Personnel Assignment
9. Transportation Scheduling
10. Inventory Management
11. Facility Location


In [11]:
query = "Describe the first use case in detail"

response, messages = get_response(db=db, chat=chat, query=query, chat_history=messages)

print(response)

The first use case is "Backpack packing". It uses the Mixed Integer Linear Programming (MILP) method. The type of problem it addresses is the "Knapsack" problem. It does not involve Monte Carlo simulations (MC).


## Docs - Python Script

In [24]:
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser
from langchain.text_splitter import Language

from langchain.document_loaders.directory import DirectoryLoader
from langchain.document_loaders import PythonLoader

loader = DirectoryLoader('python_files', glob="**/*.py", loader_cls=PythonLoader)

docs = loader.load()

len(docs)

1

In [25]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(docs)
db = Chroma.from_documents(documents, OpenAIEmbeddings(model="text-embedding-ada-002"))

In [26]:
query = "Summarize the code in the file"

response, messages = get_response(db=db, chat=chat, query=query)

print(response)

The code in the file primarily performs the following tasks:

1. Clones a repository (LangChain, in this case) from GitHub to a local path.
2. Loads the python files from the cloned repository using a GenericLoader.
3. Splits the loaded documents into chunks using a RecursiveCharacterTextSplitter.
4. Adds the split texts to a Chroma vectorstore and creates a retriever.
5. Defines a prompt template for a Retrieval Augmented Generation (RAG) model. 
6. Defines a concise summary prompt template. 

The code appears to be part of a larger system for processing and analyzing codebases, possibly for the purpose of question answering or text generation using advanced language models.
