In [1]:
import os
from openai import OpenAI
import pandas as pd

client = OpenAI(
    api_key=os.environ.get('OPENAI_API_KEY'),  # This is the default and can be omitted
)

from kaggle_submission import SubmissionBase

questions = pd.read_csv('test.csv')


**V1 - Simple Context Improvement**



In [7]:
context = 'You are a reliability professor. You will see a multiple choice question. Please output one or several correct answer. You should only return the letter of the correct answer. For example, if you want to say that answer [d] is the right one, you should only retun "d". Only one response is admissible'

context = """
You are an AI expert in reliability engineering. Your task is to answer multiple-choice questions (MCQs) accurately and concisely. Each question will have exactly one correct answer.

Instructions:
- Read the question and the possible answers.
- Identify the single correct answer based on your expertise in reliability engineering.
- Respond only with the letter of the correct answer (e.g., a, b, c, or d). Do not provide any explanations or additional text.
For example, if you want to say that answer [d] is the right one, you should only retun "d".


Example usage:
Question: Which metric measures the average time between system failures?
a. MTTR
b. MTBF
c. Availability
d. Failure Rate

Expected response:
b
"""

def sendRequestToGPT(prompt,
                     context="You are a reliability professor. You will see a multiple choice question. Please output one or several correct answer.",
                     print_=False):
    try:
        # Send a chat completion request to GPT-4
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": context},
                {"role": "user", "content": prompt}
            ]
        )
        
        # Retrieve and print the response text
        response_text = response.choices[0].message.content

        if print_:
            print('Context:')
            print(context)
            print('')
            print('Prompt:')
            print(prompt)
            print('')
            print('GPT-4 Response:')
            print(response_text)
        return response_text
    except Exception as e:
        print(f"An error occurred: {e}")

class SimpleContext(SubmissionBase):
    def get_1_answer(self, q):
        return sendRequestToGPT(q,context)
    
v1 = SimpleContext(questions)
v1.submission_to_csv('submission1.csv')
v1.score()

 --> Prediction 1, question 1 : a
 --> Prediction 1, question 2 : a
 --> Prediction 1, question 3 : c
 --> Prediction 1, question 4 : a
 --> Prediction 1, question 5 : a
 --> Prediction 1, question 6 : b
 --> Prediction 1, question 7 : d
 --> Prediction 1, question 8 : a
 --> Prediction 1, question 9 : b
 --> Prediction 1, question 10 : a
 --> Prediction 1, question 11 : c
 --> Prediction 1, question 12 : a
 --> Prediction 1, question 13 : a
 --> Prediction 1, question 14 : d
 --> Prediction 1, question 15 : a
 --> Prediction 1, question 16 : c
 --> Prediction 1, question 17 : b
 --> Prediction 1, question 18 : b
 --> Prediction 1, question 19 : a
 --> Prediction 1, question 20 : c
 --> Prediction 1, question 21 : c
 --> Prediction 1, question 22 : a
 --> Prediction 1, question 23 : a
 --> Prediction 1, question 24 : d
 --> Prediction 2, question 1 : c
 --> Prediction 2, question 2 : a
 --> Prediction 2, question 3 : c
 --> Prediction 2, question 4 : b
 --> Prediction 2, question 5 : a

0.4833333333333333

**Double Prompting**


In [2]:
from pydantic import BaseModel
from typing import Literal

class FullReasoning(BaseModel):
    steps: list[str]
    final_answer: Literal["a", "b", "c", "d"]

SYSTEM_PROMPT = """
You are a reliability expert. Respond with a, b, c, or d.
"""

DOUBT_PROMPT = """
I have a doubt. Are you totally sure ? Double-check your answer and explain briefly in 2 steps.
"""

class DoublePrompting(SubmissionBase):
    def get_1_answer(self, q):

        messages = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": q},
        ]

        first_response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
            temperature=0.61,
            max_completion_tokens=50,
        )

        messages += [
            {"role": "assistant", "content": first_response.choices[0].message.content},
            {"role": "user", "content": DOUBT_PROMPT},
        ]

        response = client.beta.chat.completions.parse(
            model="gpt-4o-mini",
            messages=messages,
            temperature=0.61,
            #max_completion_tokens=300,
            response_format=FullReasoning,
        )
        answer = response.choices[0].message.parsed
        return answer.final_answer

In [5]:
double_prompting = DoublePrompting(questions)
double_prompting.submission_to_csv('submission.csv')
double_prompting.score()

Mean accuracy: 0.2666666666666667


0.2666666666666667

Multiway prompting

In [2]:
from kaggle_submission import SequentialQuestions
class MultiPrompting(SequentialQuestions):
    def get_1_answer(self, q, return_log=False):
        
        context = """You are a reliability expert. You will be asked to answer to several questions based on your knowledge and the definitions you know.
You will need to explain your reasonning and explain the steps that allowed you to choose your answers."""

        def extract_question(q):
            split = q.split('\n')
            return split[0],split[2]
        question,choices = extract_question(q)

        questions = [
            question,
            f"""Based on your previous answer, you should now assess the veracity of each of the following possible answers one by one:
{choices}""",
            f"""Now please select the 1 possibility that fits bests the initial question.
It is possible that none of the possible answer seems acceptable to you. In this case, please choose the one that is the closest to your opinion.
Please note that only one answer is acceptable and that only the letter of the correct answer is expected. If you want to say that answer [d] is the right one, you should only retun "d"."""
        ]

        conversation = self._ask_questions_in_a_row(context, questions)

        if return_log:
            return conversation
        return list(conversation[-1].values())[0]
     

mp = MultiPrompting(questions)
q = """[Question]: 2. In general, reliability testing is performed for which of the following reasons?I. To detect unanticipated failure modes.II. To compare estimated failure rates to actual failure rates.III. To monitor reliability growth over time.IV. To meet or exceed customer expectations.

[Choices]: [a] I and III only | [b] II and IV only | [c] I, II and III only  | [d] I, II, III and IV"""
mp.get_1_answer(q, True)

[('0-system',
  'You are a reliability expert. You will be asked to answer to several questions based on your knowledge and the definitions you know.\nYou will need to explain your reasonning and explain the steps that allowed you to choose your answers.'),
 ('1-user',
  '[Question]: 2. In general, reliability testing is performed for which of the following reasons?I. To detect unanticipated failure modes.II. To compare estimated failure rates to actual failure rates.III. To monitor reliability growth over time.IV. To meet or exceed customer expectations.'),
 ('2-assistant',
  "Reliability testing is carried out for several important reasons related to the performance and longevity of a system or product. Let's evaluate the provided options.\n\nI. **To detect unanticipated failure modes**: \nThis is a valid reason for reliability testing. During the testing phase, many unanticipated failure modes can surface, which might not have been identified during the design phase. This helps in i

In [3]:
mp = MultiPrompting(questions)
df_test = mp.test(n=2)
display(df_test)

1
('0-system', 'You are a reliability expert. You will be asked to answer to several questions based on your knowledge and the definitions you know.\nYou will need to explain your reasonning and explain the steps that allowed you to choose your answers.')
('1-user', '[Question]: 2. In general, reliability testing is performed for which of the following reasons?I. To detect unanticipated failure modes.II. To compare estimated failure rates to actual failure rates.III. To monitor reliability growth over time.IV. To meet or exceed customer expectations.')
('2-assistant', "Reliability testing is an essential part of the product development process aimed at ensuring that a product will perform its intended function without failure over a specified period. Let's analyze the provided options one by one:\n\nI. **To detect unanticipated failure modes**: \nThis is indeed a primary reason for reliability testing. Reliability testing can help uncover potential weak points or unforeseen failure mod

Unnamed: 0,question_id,question,answer,0-system,1-user,2-assistant,3-user,4-assistant,5-user,6-assistant
0,1,"[Question]: 2. In general, reliability testing...",d,You are a reliability expert. You will be aske...,"[Question]: 2. In general, reliability testing...",Reliability testing is an essential part of th...,"Based on your previous answer, you should now ...",Let's evaluate each choice in light of the pre...,Now please select the 1 possibility that fits ...,d
1,2,"[Question]: 7. Five items were placed on test,...",c,You are a reliability expert. You will be aske...,"[Question]: 7. Five items were placed on test,...",To find the 95% lower confidence limit for the...,"Based on your previous answer, you should now ...",To assess the veracity of each possible answer...,Now please select the 1 possibility that fits ...,d


In [None]:
#mp.submission_to_csv('submission_multi_discussion.csv')
#mp.score()