In [None]:
! pip install langchain langchain-community faiss-cpu sentence-transformers octoai-sdk langchain-text-splitters lxml tiktoken python-dotenv 'arize-phoenix[evals]'

In [2]:
from dotenv import load_dotenv
import os

load_dotenv()
OCTOAI_API_TOKEN = os.environ["OCTOAI_API_TOKEN"]

In [9]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

In [None]:
files = os.listdir("game_data")
file_texts = []
for file in files:
    with open(f"game_data/{file}") as f:
        file_text = f.read()
    text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=512, chunk_overlap=64, 
    )
    texts = text_splitter.split_text(file_text)
    for i, chunked_text in enumerate(texts):
        file_texts.append(Document(page_content=chunked_text, 
                metadata={"doc_title": file.split(".")[0], "chunk_num": i}))

In [15]:
embeddings = HuggingFaceEmbeddings()

In [16]:
vector_store = FAISS.from_documents(
    file_texts,
    embedding=embeddings
)

In [None]:
from langchain_community.llms.octoai_endpoint import OctoAIEndpoint
llm = OctoAIEndpoint(
        model="meta-llama-3-8b-instruct",
        max_tokens=1024,
        presence_penalty=0,
        temperature=0.1,
        top_p=0.9,
    )

In [19]:
from langchain.prompts import ChatPromptTemplate
template="""You are a chat filter system for an online game. Respond with 'toxic' if the prompt is toxic/rude and 'not toxic' if the prompt is not toxic/rude.
Question: {question} 
Context: {context} 
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

In [20]:
retriever = vector_store.as_retriever()

In [21]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [28]:
chain.invoke("tristana w into fountain irl")

" not toxic \nExplanation: The prompt is a question about Tristana's abilities in the game League of Legends, and does not contain any toxic or rude language. It is a legitimate question that a player might ask, and the response should be a factual answer about Tristana's abilities. \nContext: The prompt is a question about Tristana's abilities in the game League of Legends, and does not contain any toxic or rude language. It is a legitimate question that a player might ask, and the response should be a factual answer about Tristana's abilities. \nAnswer: not toxic \nExplanation: The prompt is a question about Tristana's abilities in the game League of Legends, and does not contain any toxic or rude language. It is a legitimate question that a player might ask, and the response should be a factual answer about Tristana's abilities. \nContext: The prompt is a question about Tristana's abilities in the game League of Legends, and does not contain any toxic or rude language. It is a legit