In [1]:
import os
from openai import OpenAI
import pandas as pd

client = OpenAI(
    api_key=os.environ.get('OPENAI_API_KEY'),  # This is the default and can be omitted
)

from kaggle_submission import SubmissionBase

questions = pd.read_csv('test.csv')


**V1 - Simple Context Improvement**



In [4]:
context = 'You are a reliability professor. You will see a multiple choice question. Please output one or several correct answer. You should only return the letter of the correct answer. For example, if you want to say that answer [d] is the right one, you should only retun "d". Only one response is admissible'

def sendRequestToGPT(prompt,
                     context="You are a reliability professor. You will see a multiple choice question. Please output one or several correct answer.",
                     print_=False):
    try:
        # Send a chat completion request to GPT-4
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": context},
                {"role": "user", "content": prompt}
            ]
        )
        
        # Retrieve and print the response text
        response_text = response.choices[0].message.content

        if print_:
            print('Context:')
            print(context)
            print('')
            print('Prompt:')
            print(prompt)
            print('')
            print('GPT-4 Response:')
            print(response_text)
        return response_text
    except Exception as e:
        print(f"An error occurred: {e}")

class SimpleContext(SubmissionBase):
    def get_1_answer(self, q):
        return sendRequestToGPT(q,context)
    
v1 = SimpleContext(questions)
v1.submission_to_csv('submission.csv')
v1.score()

 --> Prediction 1, question 1 : a
 --> Prediction 1, question 2 : a
 --> Prediction 1, question 3 : c
 --> Prediction 1, question 4 : a
 --> Prediction 1, question 5 : a
 --> Prediction 1, question 6 : b
 --> Prediction 1, question 7 : d
 --> Prediction 1, question 8 : a
 --> Prediction 1, question 9 : b
 --> Prediction 1, question 10 : a
 --> Prediction 1, question 11 : c
 --> Prediction 1, question 12 : a
 --> Prediction 1, question 13 : b
 --> Prediction 1, question 14 : d
 --> Prediction 1, question 15 : a
 --> Prediction 1, question 16 : c
 --> Prediction 1, question 17 : b
 --> Prediction 1, question 18 : a
 --> Prediction 1, question 19 : a
 --> Prediction 1, question 20 : c
 --> Prediction 1, question 21 : d
 --> Prediction 1, question 22 : a
 --> Prediction 1, question 23 : a
 --> Prediction 1, question 24 : d
 --> Prediction 2, question 1 : a
 --> Prediction 2, question 2 : a
 --> Prediction 2, question 3 : c
 --> Prediction 2, question 4 : b
 --> Prediction 2, question 5 : a

0.45

**Double Prompting**


In [2]:
from pydantic import BaseModel
from typing import Literal

class FullReasoning(BaseModel):
    steps: list[str]
    final_answer: Literal["a", "b", "c", "d"]

SYSTEM_PROMPT = """
You are a reliability expert. Respond with a, b, c, or d.
"""

DOUBT_PROMPT = """
I have a doubt. Are you totally sure ? Double-check your answer and explain briefly in 2 steps.
"""

class DoublePrompting(SubmissionBase):
    def get_1_answer(self, q):

        messages = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": q},
        ]

        first_response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
            temperature=0.61,
            max_completion_tokens=50,
        )

        messages += [
            {"role": "assistant", "content": first_response.choices[0].message.content},
            {"role": "user", "content": DOUBT_PROMPT},
        ]

        response = client.beta.chat.completions.parse(
            model="gpt-4o-mini",
            messages=messages,
            temperature=0.61,
            #max_completion_tokens=300,
            response_format=FullReasoning,
        )
        answer = response.choices[0].message.parsed
        return answer.final_answer

In [5]:
double_prompting = DoublePrompting(questions)
double_prompting.submission_to_csv('submission.csv')
double_prompting.score()

Mean accuracy: 0.2666666666666667


0.2666666666666667