In [1]:
import pandas as pd
from src.kaggle_submission import SubmissionBase, test_submission
from src.client import openai_client
from pydantic import BaseModel
from typing import Literal
from src.python_agent import PythonAgent

test_questions = pd.read_csv("data/test.csv")
train_questions = pd.read_csv("data/train.csv")


  from .autonotebook import tqdm as notebook_tqdm


**V1 - Simple Context Improvement**



In [2]:
context = """
You are an AI expert in reliability engineering. Your task is to answer multiple-choice questions (MCQs) accurately and concisely. Each question will have exactly one correct answer.

Instructions:
- Read the question and the possible answers.
- Identify the single correct answer based on your expertise in reliability engineering.
- Respond only with the letter of the correct answer (e.g., a, b, c, or d). Do not provide any explanations or additional text.
For example, if you want to say that answer [d] is the right one, you should only retun "d".


Example usage:
Question: Which metric measures the average time between system failures?
a. MTTR
b. MTBF
c. Availability
d. Failure Rate

Expected response:
b
"""


class SimpleContext(SubmissionBase):
    def get_1_answer(self, q):
        response = openai_client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": context},
                {"role": "user", "content": q},
            ],
        )

        return response.choices[0].message.content
    
v1 = SimpleContext(test_questions, openai_client)

In [None]:
test_submission(v1, fake_multiple_attempts=True)

**Double Prompting**


In [3]:
class FullReasoning(BaseModel):
    steps: list[str]
    final_answer: Literal["a", "b", "c", "d"]


SYSTEM_PROMPT = context

DOUBT_PROMPT = """
I have a doubt. Are you totally sure ? Double-check your answer and explain briefly in 2 steps.
"""


class DoublePrompting(SubmissionBase):
    def get_1_answer(self, q):
        messages = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": q},
        ]

        first_response = openai_client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
            temperature=0.61,
        )

        messages += [
            {"role": "assistant", "content": first_response.choices[0].message.content},
            {"role": "user", "content": DOUBT_PROMPT},
        ]

        response = openai_client.beta.chat.completions.parse(
            model="gpt-4o-mini",
            messages=messages,
            temperature=0.61,
            response_format=FullReasoning,
        )
        answer = response.choices[0].message.parsed
        return answer.final_answer
    
double_prompting = DoublePrompting(test_questions, openai_client)

In [None]:
test_submission(double_prompting, fake_multiple_attempts=True)

**Multiway prompting**

This time we initially don't provide the choices to Chat. We first let it think about an answer, and then provide it the MCQ in order to choose the best answer.

In [2]:
SYSTEM_PROMPT = """You are a reliability expert. You will be asked to answer to several questions based on your knowledge and the definitions you know.
You will need to explain your reasonning and explain the steps that allowed you to choose your answers.
"""

class SimpleAnswer(BaseModel):
    choice: Literal["a", "b", "c", "d"]


def provide_choices(choices):
    return f"""Based on your previous answer, you should now assess the veracity of each of the following possible answers one by one:
{choices}"""

SELECTION_PROMPT = """Now please select the 1 possibility that fits bests the initial question.
It is possible that none of the possible answer seems acceptable to you. In this case, please choose the one that is the closest to your opinion."""


class MultiPrompting(SubmissionBase):
    def get_1_answer(self, q):
        question, choices = q.split("[Choices]")

        messages = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": question},
        ]

        first_response = (
            openai_client.chat.completions.create(
                model="gpt-4o-mini",
                messages=messages,
                temperature=0.61,
            )
            .choices[0]
            .message.content
        )

        messages += [
            {"role": "assistant", "content": first_response},
            {"role": "user", "content": provide_choices(choices)},
        ]

        chat_opinion = openai_client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
            temperature=0.61,
        ).choices[0].message.content

        messages += [
            {"role": "assistant", "content": chat_opinion},
            {"role": "user", "content": SELECTION_PROMPT},
        ]

        return openai_client.beta.chat.completions.parse(
            model="gpt-4o-mini",
            messages=messages,
            temperature=0.61,
            response_format=SimpleAnswer,
        ).choices[0].message.parsed.choice
    
mp = MultiPrompting(test_questions, openai_client)

In [None]:
test_submission(mp, fake_multiple_attempts=True)

**Agentic system**

Now we will give the model the capacity to write and execute Python scripts

In [6]:
PythonAgent.inject_python(openai_client=openai_client)
test_submission(double_prompting, fake_multiple_attempts=True)

 --> Prediction 1 for question 1 : d


KeyboardInterrupt: 

**RAG prompting**

We now augment the knowledge of our model using Retrieval Augmented Generation. We built a database containing specific information about reliability engineering, and will use it to augment our prompts.