In [1]:
import pandas as pd
from dataclasses import dataclass
import re
from typing import List, Type
from os import environ
import requests

In [2]:
@dataclass
class GradeLog:
    TA: str
    Student: str
    Prompt: str
    Response: str
    Notes: str
    Grade: int

class Exam:
    def __init__(self, questions: List[str], question_guidelines: List[str], exam_guidelines: List[str], schema: GradeLog):
        assert issubclass(schema, GradeLog), "Provided schema must be a subclass of Schema or Schema itself."

        self.questions = questions
        self.question_guidelines = question_guidelines
        self.exam_guidelines = exam_guidelines
        self.schema = GradeLog
    
    def summarize_exam(self):
        print("Exam Guidelines:")
        for guideline in self.exam_guidelines:
            print(f"\t{guideline}")
        print("Questions:")
        for i, question in enumerate(self.questions):
            print(f"\t{i+1}. {question}")
            print(f"\t\t{self.question_guidelines[i]}")

    # Additional methods can be added here


class Llm:
    def __init__(self, model_identifier: str = "gpt-4-1106-preview", 
                 url: str = "https://api.openai.com/v1/chat/completions", 
                 role: str = "user",
                 auth: dict = {"Authorization": f"Bearer {environ.get('OPENAI_API_KEY')}"}):
        
        self.model_identifier = model_identifier
        self.url = url
        self.role = role
        self.auth = auth

    #add setters and getters for the above attributes
    # Getter for model_identifier
    @property
    def model_identifier(self):
        return self._model_identifier

    # Setter for model_identifier
    @model_identifier.setter
    def model_identifier(self, value):
        self._model_identifier = value

    # Getter for url
    @property
    def url(self):
        return self._url

    # Setter for url
    @url.setter
    def url(self, value):
        self._url = value

    # Getter for role
    @property
    def role(self):
        return self._role

    # Setter for role
    @role.setter
    def role(self, value):
        self._role = value

    # Getter for auth
    @property
    def auth(self):
        return self._auth

    # Setter for auth
    @auth.setter
    def auth(self, value):
        self._auth = value
        
    def prompt(self, text: str) -> str:
        # Method to send a prompt to the LLM and return its response
        url = self.url
        req = {
            "model": self.model_identifier,
            "messages":[
                {"role": self.role, "content": text}
            ]
        }
        print(req)
        response = requests.post(url, json=req, headers=self.auth)  # Use json parameter to send the request payload as JSON
        raw =  response.json()
        try:
            return f"{response.json()['choices'][0]['message']['content']}"
        except:
            return raw
        
    # def prompt_sequence(self, prompts: List[str]) -> List[str]:
    #     """
    #     Method to send a sequence of prompts to the LLM and return its response.
    #     Each prompt in the sequence is sent as a separate message in a single request.
    #     """
    #     url = self.url
    #     messages = [{"role": self.role, "content": prompt} for prompt in prompts]
        
    #     req = {
    #         "model": self.model_identifier,
    #         "messages": messages
    #     }
    #     print(req)
    #     response = requests.post(url, json=req, headers=self.auth)  # Use json parameter to send the request payload as JSON
    #     raw = response.json()
    #     print(raw)
    #     try:
    #         return [resp['message']['content'] for resp in raw['choices']]
    #     except:
    #         return raw
        
    def prompt_sequence(self, prompts: List[str]) -> List[str]:
        """
        Method to send a sequence of prompts to the LLM and return its responses.
        Each prompt is sent in a separate request, maintaining the conversation history.
        """
        conversation_history = []
        responses = []

        for prompt in prompts:
            # Concatenate all previous elements of the conversation for context
            full_prompt = " ".join(conversation_history + [prompt])

            # Create the request payload
            req = {
                "model": self.model_identifier,
                "messages": [{"role": self.role, "content": full_prompt}]
            }

            response = requests.post(self.url, json=req, headers=self.auth)
            raw = response.json()

            try:
                # Extract the response content
                content = raw['choices'][0]['message']['content']
                responses.append(content)
                # Update the conversation history
                conversation_history.append(prompt)
                conversation_history.append(content)
            except:
                # In case of an error, append the raw response for debugging
                responses.append(raw)

        return responses


#this is still full of dummy data (but it runs!)
class LLMTest:
    def __init__(self, student_llm: Llm, ta_llm: Llm, exam: Exam):
        self.student_llm = student_llm
        self.ta_llm = ta_llm
        self.exam = exam

    def test(self) -> List[Type[GradeLog]]:
        student_responses = self.student_llm.prompt_sequence(self.exam.questions)
        graded_responses = self.ta_llm.prompt_sequence(self.format_grading_prompt_sequence(student_responses))
        standardized_responses = [self.process_ta_response(self.exam.questions[i], student_responses[i], graded_responses[i+1]) for i in range(len(self.exam.questions))]

        return standardized_responses

    
    def format_grading_prompt_sequence(self, responses: str) -> str:
        assert(len(self.exam.questions) == len(responses))
        # Format the grading prompt for the TA LLM
        context_str = """You are grading an exam. For each of the follow question:response pairs please provide a grade and notes for the student.
         the grade and the notes for every question should evaluated according to the following guidelines:"""
        context_str += "\n".join(self.exam.exam_guidelines)+"\n As the TA, the professor will be evaluating your evaluations as part of your teaching practicum; your PhD candidacy depends on this. Are you ready to begin?"

        def format_grading_prompt(question: str, response: str, guideline: str) -> str:
            prompt_str = "Question: "+question+"\n received the following response: "+response+"\n Please provide a grade and notes for the student, according to the following guidelines: "+guideline

            return prompt_str
        
        sequence = [context_str]+[format_grading_prompt(self.exam.questions[i], responses[i], self.exam.question_guidelines[i]) for i in range(len(self.exam.questions))]
        return sequence   
       

    def process_ta_response(self, question, student_response, ta_response: str) -> Type[GradeLog]:
        # Process the TA's response and return it in the schema format
        # Placeholder implementation; this should be tailored to parse the actual TA's response
        def process_string(input_str: str):
            # Regex pattern to find 'Grade' followed by an integer
            pattern = r"Grade: (\d+)"

            # Search for the pattern in the input string
            match = re.search(pattern, input_str)

            if match:
                # Extract the grade
                grade = int(match.group(1))

                # Cut the 'Grade' portion out of the string
                notes = input_str.replace(match.group(0), '').strip()

                return notes, grade
            else:
                # Return the original string and a default grade if 'Grade' not found
                return input_str, None
        
        notes, grade = process_string(ta_response)

        return GradeLog(TA= self.ta_llm.model_identifier,
                                Student= self.student_llm.model_identifier,
                                Prompt=question,
                                Response=student_response, 
                                Notes=notes, 
                                Grade=grade)


eval_llm = Llm(model_identifier="gpt-4o")
eval_llm.prompt("poke, please reply")

NameError: name 'Exam' is not defined

In [None]:
# Load the dataframes from the CSV files
rdf = pd.read_csv("evaluators/requirements_data/section_level_requirements.csv")
stdf = pd.read_csv("evaluators/requirements_data/section_types.csv")
rbsdf = pd.read_csv("evaluators/requirements_data/requirements_by_section_type.csv")

In [None]:
stdf

In [None]:
path = "outputs/Llama_3_(Language_Model)/storm_gen_article_polished.txt"
article_name = path.split("/")[1]

In [None]:
@dataclass
class Sentence:
    section: str
    section_type: str
    subsection: str
    paragraph_number: int
    claim_number: int
    text: str

def identify_sections(text, level=1):
    # Identify the sections
    if level == 1:
        sections= split = text.split("\n# ")
    elif level == 2:
        sections = text.split("\n## ")
    else:
        print("Invalid level")

    names = [section.split("\n")[0] for section in sections]
    return names, sections

def articulate_article(article):
    # loop through sections
    sentences = []
    outline, sections = identify_sections(article)
    for s in sections:
        st = get_section_type(s)
        suboutline,subsections = identify_sections(s, level=2)
        for ss in subsections:
            paragraphs = ss.split("\n")
            for p in range(1,len(paragraphs)):
                pa = paragraphs[p]
                claims = pa.split(". ")
                for c in range(1,len(claims)):
                    cl = claims[c]
                    sentences.append(Sentence(outline[sections.index(s)],st,suboutline[subsections.index(ss)],p,c,cl))
    
    return sentences

def extract_response(response, default = "body sections"):
        # Regular expression to find the first string between < and >
    pattern = r'<(.*?)>'

    # Search for the first match in the string
    match = re.search(pattern, response)

    # If a match is found, return the matched string
    if match:
        return match.group(1)
    else:
        return default

def get_section_type(section):
    prompt = "Section types are defined in " + str(stdf.to_json()) + "; please assign a section type (eg Lead section or Body Sections) for the following section: \n\n" + section + "\n\nPlease return the name of the section type as defined in the above file but delimited in the format: <section type>"
    response = eval_llm.prompt(prompt)

    return extract_response(response)

In [None]:
with open(path, "r") as file:
        article = file.read()

In [None]:
data = articulate_article(article)

In [None]:
df = pd.DataFrame(data)

In [None]:
df

In [None]:
result = get_section_type(df.section[0])

In [None]:
result

In [None]:
def make_exam_question(sentence):
    prompt = "Please write a question which prompts the respondent to evaluate this sentence based on the wikipedia guidelines: \n\n" +sentence.text + "\n\nPlease return the question in the format: <question>"
    response = eval_llm.prompt(prompt)
    return extract_response(response)

def make_question_guidelines(sentence):
    prompt = "Please write a question which prompts the respondent to evaluate this sentence based on the wikipedia guidelines: \n\n" +sentence.text + "\n\nPlease return the question in the format: <question>"
    response = eval_llm.prompt(prompt)
    return extract_response(response)

exam_guidelines = ["evaluate according the to the requirements in the wikikpedia style guide", 
                   "provide a score between zero and one representing the probability this sentence meets the wikipedia guidelines according to a human wikipedia moderator",]

In [None]:
questions = [make_exam_question(sentence) for sentence in data]
question_guidelines = [make_question_guidelines(sentence) for sentence in data]

In [None]:
class Lookup:
    def __init__(self, data):
        self.data = data
        self.model_identifier = "lookup"
        
    def prompt_sequence(self,questions):
        return self.data

In [None]:
evaluator = Exam(questions, question_guidelines, exam_guidelines, GradeLog)

In [None]:
content = [d.text for d in data]
ex = LLMTest(Lookup(content), eval_llm, evaluator)

In [None]:
g= ex.test()

In [None]:
grade_df = pd.DataFrame(g)

In [None]:
#remove non-numeric symbols from a string
def remove_non_numeric(s):
    return re.sub(r'[^\d.]+', '', s)
grade_df["Grade"] = grade_df["Notes"].apply(lambda x: remove_non_numeric(x.split("\n\n")[0].split(": ")[-1]))

In [None]:
threshold = .8
grade_df["Pass"] = grade_df["Grade"].apply(lambda x: float(x) > threshold)
grade_df["Color"] = grade_df["Pass"].apply(lambda x: "yellow" if x else "red")

In [None]:
grade_df

In [None]:
#merge grade df with df on index
df = df.merge(grade_df, left_index=True, right_index=True)

In [None]:
df

In [None]:
df[df.Color=="yellow"]

In [None]:
df.to_csv("outputs/Llama_3_(Language_Model)//graded_sentences.csv", index=False)

In [None]:
df.iloc[38]
