In [None]:
from langchain_openai import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.document_loaders import JSONLoader

In [4]:
llm_model = ChatOpenAI(model_name = "gpt-4o-mini")
embeddings = OpenAIEmbeddings()

In [5]:
file = './august_ranked_player_count_info.json'
json_loader = JSONLoader(jq_schema='.[] | {date: .date, games: .data[] | {game_id, player_count, game_rank, steam_app_id}}', 
                         file_path=file,
                         text_content=False)

In [6]:
json_data = json_loader.load()

In [None]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings
).from_loaders([json_loader])

In [8]:
qa = RetrievalQA.from_chain_type(
    llm = llm_model,
    chain_type = 'stuff',
    retriever = index.vectorstore.as_retriever(),
    verbose = True,
    chain_type_kwargs = {
        "document_separator" : "<<<<>>>>>"
    }
)

### Coming up with test datapoints

In [None]:
json_data[0]

In [None]:
json_data[11]

# Evaluation
* Hard-Coded Examples
* LLM-Generated Examples
* Combine Examples

### Hard-Coded Examples 

In [11]:
examples = [
    {
        "query" : "What is the player count of game id 26320 on August 1st 2024 which is 2024-08-01",
        "answer" : "13084.0"
    },
    {
        "query" : "What is the player count of game id 108601 on August 14st 2024 which is 2024-08-01",
        "answer" : "463.0"
    }
]

### LLM-Generated Examples

In [12]:
from langchain.evaluation.qa import QAGenerateChain

In [13]:
example_gen_chain = QAGenerateChain.from_llm(ChatOpenAI(model_name="gpt-4o-mini"))

In [None]:
new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in json_data[:5]]
)

In [None]:
new_examples[3]

### Combine Examples

In [35]:
# for i in range(len(new_examples)):
#     examples += new_examples[0]['qa_pairs']

In [38]:
examples += new_examples
# examples

In [None]:
qa.run(examples[0]["query"])

## Manual Evaluation

In [18]:
import langchain
langchain.debug = True

In [None]:
qa.run(examples[0]["query"])

In [20]:
# Turn off the debug mode
langchain.debug = False

## LLM assisted evaluation

In [None]:
examples[5]

In [None]:
predictions = qa.batch(examples[:2])

In [39]:
from langchain.evaluation.qa import QAEvalChain

In [40]:
llm = ChatOpenAI(model_name = 'gpt-4o-mini')
eval_chain = QAEvalChain.from_llm(llm)

In [42]:
graded_outputs = eval_chain.evaluate(examples[:2], predictions)

In [None]:
for i, eg in enumerate(examples[:2]):
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])