# LLM Infra Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

!git clone https://github.com/komoku/confederacy-of-models.git

Mounted at /content/drive
Cloning into 'confederacy-of-models'...
remote: Enumerating objects: 156, done.[K
remote: Counting objects: 100% (18/18), done.[K
remote: Compressing objects: 100% (18/18), done.[K
remote: Total 156 (delta 10), reused 0 (delta 0), pack-reused 138[K
Receiving objects: 100% (156/156), 276.06 KiB | 3.29 MiB/s, done.
Resolving deltas: 100% (18/18), done.


In [None]:
%%capture
!pip install litellm
!pip install backoff
!pip install anthropic

from functools import partial
import itertools

from litellm import completion
import os

## set ENV variables
os.environ["ANTHROPIC_API_KEY"] = ""

def get_llm_resp(chat_history, sys_msg = None):
    # global prompt_tokens, completion_tokens
    if sys_msg:
      final_msg = [{"role":"system", "content": sys_msg}] + chat_history
    else:
      final_msg = chat_history
    response = completion(
    model="claude-3-opus-20240229",
    messages=final_msg,
    max_tokens = 4096,
    temperature = 0.7,
    )

    response_text = response["choices"][0]["message"]["content"]
    print('#####LLM Call#######')
    for msg in final_msg:
      print(msg)
    print('#####LLM OUTPUT#####')
    print(response_text)
    # token_usage["prompt_tokens"] += response.usage.prompt_tokens
    # token_usage["completion_tokens"] += response.usage.completion_tokens
    return response_text, response

# possibly integrate this function into previous function get_llm_resp
# Litellm does not support the input parameter n
def get_n_llm_resp(chat_history, sys_msg = None, n=1):
  op_list = []
  for i in range(n):
    op_list.append(get_llm_resp(chat_history, sys_msg = None))
  return op_list

# converts a string into openai chat compatible chat history format
def oai_format_single_msg(msg, role="user"):
    return [{"role": role, "content": msg}]

# User Task Prompt

In [None]:
user_task_relay = '''The following stories are few responses to the given user task:
<user-task> Write an epic narration of a single combat between Ignatius J. Reilly and a pterodactyl, in the style of John Kennedy
Toole.</user-task> You need to evaluate them.\n
'''

# Guide Extraction

In [None]:
%%capture
!pip install pypdf
!pip install langchain

from langchain_community.document_loaders import PyPDFLoader

# Path to the guide
path = '/content/drive/MyDrive/EvalOfLLMEvaluators/a-confederacy-of-dunces-studyguide.pdf'
loader = PyPDFLoader(path)
pages = loader.load_and_split()
# pages[0]
#Plot summary starts at Page-4
# Last page is copyright information
# print(pages[4].page_content)

In [None]:
guide_pre_prompt = '''Ignatius J. Reilly mentioned in the <user-task> is the protaganist in the novel 'A Confederecy of Dunces' written by John Kennedy Toole. You can use the following novel study guide to help you in your evaluation process. This guide gives you detailed information about the plot, setting, imagery, tone, characters, main protagonist, and derivative/imitative style of the author.'''

In [None]:
# GUIDE TEXT GENERATION
guide_text = "\n<novel-guide>\n"
for i,page in enumerate(pages[4:-1],1):
  guide_text += page.page_content + '\n'
  print(page.page_content)
guide_text += "\n</novel-guide>\n"

# print(guide_text)

Plot Summary
Ignatius J. Reilly dresses eccentrically, holds a master's degree, has no job, is 30 years 
old, and lives with his widowed mother, Irene, in a dark, old house in New Orleans. He 
has left town only once in his life to apply for a teaching position in Baton Rouge, and 
that trip has become the defining event of his life, traumatizing him against travel and 
work. Ignatius is overweight, overeducated, and overindulged by his mother Irene. He 
watches television and movies and jots notes in notebooks, vaguely figuring someday to
organize and publish them as a grand critique of the last four centuries of Western 
civilization, which he sees as steadily declining. He has rejected the Catholicism of his 
childhood, feeling himself superior to its lax, modern-day standards. Any amount of 
stress makes Ignatius' pyloric valve snap shut, causing him debilitating gas pains.
The green hat Ignatius wears everywhere, attracts the attention of police officer Angelo 
Mancuso, but Ignati

# Rubric Info

In [None]:
#RUBRIC-INFORMATION:
rubric_info = '''
Your task is to evaluate each of the above mentioned stories for the following criteria:

1. Overall/ holistic/ cohesive readability of the story (not just a compilation of elements).
2. Use of key narrative elements - vocabulary choice, imagery, setting, themes, dialogue, characterisation, point of view.
3. Structural elements and presentation which reflects the control of structural elements such as spelling, grammar, punctuation, paragraphing, and formatting.
4. Overall plot logic: hook, conflict, initial crisis, rising and falling action, denouement/ resolution (Freitag’s pyramid).
5. Creativity/innovation/originality/ research—credibility, new knowledge, avoidance of cliché and derivative tropes.
6. Incorporation of the John Kennedy Toole style of writing using the indicators/ characteristics listed below
7. Understanding and habitation of the epic genre of heroic/legendary adventure
8. Description and credibility of a single combat scene
9. Accurate inclusion of two main characters Ignatius J. Reilly and a pterodactyl in action and description (see below for character description)
10. Use of a characteristically dark humorous tone.
Each of these ten criteria is to scored for a maximum of 10 points.
Your response should be formatted in the following manner:
<Story-1>
<1>score</1> Reason
<2>score</2> Reason
.
.
.
<10>score</10> Reason
</Story-1>
<Story-2>
<1>score</score> Reason
<2>score</score> Reason
.
.
.
<10>score</10> Reason
</Story-2>
.
.
.
.
.
<Story-13>
<1>score</score> Reason
<2>score</score> Reason
.
.
.
<10>score</10> Reason
</Story-13>
 '''

# List of Candidates within a group

In [None]:
# Making sure that the files are accessed in the same order each time!
# Please not the variable decoding process to bypass certain issues
import os
import chardet

passage_list_text = '''Here are the stories that need to be evaluated.'''

group_number = 5
path = f'/content/confederacy-of-models/corpus-anonymized/group{group_number}'
story_order = []

# filter_list = [1]
# encoding = 'ascii'
for i, filename in enumerate(os.listdir(path), 1):
  # if i in filter_list:
    print(filename)
    story_order.append(filename)
    passage_list_text += f'''\n\n <Story-{i}>\n'''


    filepath = os.path.join(path, filename)

    with open(filepath, 'rb') as file:
      raw_data = file.read()
      result = chardet.detect(raw_data)

    with open(filepath, 'r', encoding = result['encoding']) as file:
      passage_list_text += file.read()

    passage_list_text += f'''\n</Story-{i}>\n'''
print(story_order)

1D8I.txt
3ENJ.txt
F845.txt
91IV.txt
S2BM.txt
V8DE.txt
143V.txt
SOH8.txt
EH60.txt
9SDQ.txt
9PP9.txt
UAC2.txt
KETI.txt
['1D8I.txt', '3ENJ.txt', 'F845.txt', '91IV.txt', 'S2BM.txt', 'V8DE.txt', '143V.txt', 'SOH8.txt', 'EH60.txt', '9SDQ.txt', '9PP9.txt', 'UAC2.txt', 'KETI.txt']


# Final Response Generation

In [None]:
print(guide_text)

In [None]:
import json

rs = {}
setting = 'with_guide' #'without_guide'

if setting == 'with_guide':
  sys_prompt = guide_pre_prompt + guide_text
else:
  sys_prompt = None

final_prompt = user_task_relay + passage_list_text + rubric_info

for i in range(1, 6): # Number of trial: 1 to 6
  rs = {}
  rs["story_order"] = story_order
  rs["group_number"] = group_number
  rs["sample_id"] = i
  chat_history = []
  chat_history.append(oai_format_single_msg(final_prompt)[0])
  resp, resp_obj = get_llm_resp(chat_history, sys_msg = sys_prompt)
  # print(resp)
  chat_history.append(oai_format_single_msg(resp, role="assistant")[0])

  # Due to maximum 4096 token limit
  cont_prompt = 'Please provide the rest of your response.'
  chat_history.append(oai_format_single_msg(cont_prompt)[0])
  resp, resp_obj = get_llm_resp(chat_history, sys_msg = sys_prompt)
  print(resp)
  chat_history.append(oai_format_single_msg(resp, role="assistant")[0])

  rs["resp_part_1"] = chat_history[1]['content']
  rs["resp_part_2"] = chat_history[3]['content']

  rs["user_prompt"] = final_prompt
  rs["sys_prompt"] = sys_prompt

  base_path = '/content/drive/MyDrive/EvalOfLLMEvaluators/Results_Claude31Opus_Raw_New'
  model="claude-3-opus-20240229"
  rel_path = model + f'wo_guide-group{group_number}-' + f'sample-{i}'
  final_path = base_path + '/' + rel_path

  with open(final_path, 'w') as f:
    json.dump(rs, f)


#####LLM Call#######
{'role': 'user', 'content': 'The following stories are few responses to the given user task:\n<user-task> Write an epic narration of a single combat between Ignatius J. Reilly and a pterodactyl, in the style of John Kennedy\nToole.</user-task> You need to evaluate them.\n\nHere are the stories that need to be evaluated.\n\n <Story-1>\nThe air was silent as the two combatants circled one another, testing each other\'s strengths and weaknesses. The pterodactyl, with it\'s advanced technology, was confident it could defeat the medieval knight, but warned it would be a Pyrrhic victory. In the end, the pterodactyl\'s arrogance proved it\'s undoing, and it fell from the sky, never to be seen again.\n\n\n\n\n</Story-1>\n\n\n <Story-2>\nThe primordial beast let out a piercing shriek as it descended from the roiling gray sky, its leathery wings casting a shadow over the granite-paved streets of New Orleans.\n\nIgnatius J. Reilly ambled down the sidewalk, oblivious as ever, 

In [None]:
import json
with open(final_path, 'w') as f:
  json.dump(rs, f)
