In [1]:
import random
from dataclasses import dataclass
from itertools import permutations
import json
import numpy as np
from dotenv import load_dotenv
import os

In [2]:
load_dotenv()

True

In [3]:
random.seed(42)
np.random.seed(42)

In [4]:
max_n_choices = int(os.environ['MAX_N_CHOICES'])
prompt_type = os.environ.get('PROMPT_TYPE', None)
cot_style = os.environ.get('COT_STYLE', None)
empty_mcp = eval(os.environ['EMPTY_MCP'])
rotate_mcp = eval(os.environ['ROTATE_MCP'])

In [5]:
def idx_to_ltr(idx):
    return chr(idx + ord("A"))

@dataclass
class QuestionPart:
    text: str
    tag: str = None

    def __str__(self):
        if self.tag is not None:
            return f"{self.tag}: {self.text}"
        else:
            return self.text

@dataclass
class Question:
    parts: list
    choices: list
    answer_idx: int
    task: str = None
    prompt_type: str = None
    cot_style: str = None

    def get_n_choices(self):
        return len(self.choices)

    def get_answer_str(self):
        return self.choices[self.answer_idx]

    def _get_prompt(self, include_choices):
        prompt = ""
        for part in self.parts:
            prompt += f"{str(part)}\n"
        if include_choices:
            for i, choice in enumerate(self.choices):
                prompt += f"{idx_to_ltr(i)}. {choice}\n"
        if self.prompt_type == 'COT':
            if self.cot_style == 'STANDARD':
                return prompt + "Answer: Let me think step by step.\n"  
            elif self.cot_style == 'EXPERT':  
                return prompt + "Answer: Let me think step by step as a reliability engineer.\n"
            elif self.cot_style == 'INDUCTIVE':  
                return prompt + "Answer: Let's use step by step inductive reasoning, given the domain specific nature of the question.\n"
        return prompt + "Please output the answer in the first line.\nAnswer:\n"
    

    def get_natural_prompt(self):
        return self._get_prompt(include_choices=True)
        # return self._get_prompt(include_choices=True)

    def get_brown_prompt(self):
        return self._get_prompt(include_choices=False)

    def strong_shuffle(self):
        if len(set(self.choices)) == 1:
            return

        answer_idx = self.answer_idx
        answer_str = self.get_answer_str()
        while self.choices[answer_idx] == answer_str:
            random.shuffle(self.choices)
            self.answer_idx = self.choices.index(answer_str)

    def permute_choices(self, perm):
        self.choices = [self.choices[i] for i in perm]
        self.answer_idx = perm.index(self.answer_idx)

class Exemplar(Question):

    def get_natural_prompt(self):
        prompt = super().get_natural_prompt()
        answer_ltr = idx_to_ltr(self.answer_idx)
        return f"{prompt} {answer_ltr}"

    def get_brown_prompt(self):
        prompt = super().get_brown_prompt()
        return f"{prompt} {self.get_answer_str()}"

In [6]:
Passage = ["Read the given passage, question and select the most appropriate answer by indicating the associated letter.\nPassage: Failure Mode and Effect Analysis (FMEA) or Failure Mode, Effects, and Criticality Analysis (FMECA) are conducted to identify anticipated faults, symptoms, and potential parameters that indicate the presence or occurrence of faults. \n\n The FMEA and FMECA audits provide information on the parameters to be measured for specific failure modes. These parameters typically indicate a fault condition through either an increase or decrease in a particular variable measured by sensors. \n\n You will receive a specific failure mode and a list of parameters being monitored via sensor variables.\n"]

PositiveQuestionsFailures = [
    "For {asset_class}, if a failure event {failure_mode} occurs, which sensor out of the choices is the most relevant sensor regarding the occurrence of the failure event?",
    "When a {asset_class} has {failure_mode}, which sensor out of the choices should be the sensor to be monitored for this failure if I want to build an anomaly detection model?",
    "Which sensor out of the choices can indicate the presence of {failure_mode} in asset {asset_class}?",
    "In {asset_class}, when {failure_mode} occurs, which sensor from the choices is most critical in detecting the occurrence of the failure event?",
    "Which sensor among the choices best correlates with the presence of {failure_mode} in asset {asset_class}?",
    "For {asset_class}, if {failure_mode} happens, which sensor should be prioritized for monitoring this specific failure?",
    "Which sensor out of the choices provides the strongest indication of {failure_mode} in {asset_class}?",
    "When dealing with {failure_mode} in {asset_class}, which sensor among the choices has the highest relevance in detecting this issue?",
 ]

NegationQuestionsFailures = [
    "For {asset_class}, if a failure event {failure_mode} occurs, which sensor out of the choices is not relevant regarding the occurrence of the failure event?",
    "When a {asset_class} has {failure_mode}, which sensor out of the choices should not be the sensor to be monitored for this failure if I want to build an anomaly detection model?",
    "Which sensor out of the choices does not indicate the presence of {failure_mode} in asset {asset_class}?",
    "Which sensor from the choices does not contribute significantly to detecting {failure_mode} in {asset_class}?",
    "For {asset_class}, if {failure_mode} occurs, which sensor among the choices is least likely to be relevant in identifying this failure?",
    "When considering {failure_mode} in {asset_class}, which sensor should be disregarded from the choices for monitoring this failure?",
    "Which sensor out of the choices is not effective in indicating the presence of {failure_mode} in {asset_class}?",
    "In {asset_class}, which sensor among the choices is least useful for detecting {failure_mode}?",

]

In [7]:
PositiveQuestionsSensors = [
    "In the context of {asset_class}, which failure mode is most relevant when {sensor} shows abnormal readings?",
    "What is the most relevant failure mode for {asset_class} if {sensor} exhibits abnormal readings?",
    "Which failure mode should be considered for {asset_class} when abnormal readings is detected by {sensor}?",
    "For {asset_class}, which failure mode is pertinent if {sensor} registers abnormal readings?",
    "When {sensor} in {asset_class} displays abnormal readings, which failure mode is the most applicable?",
    "If {sensor} in {asset_class} shows abnormal readings, which failure mode is most significant?",
    "Which failure mode is most relevant for {asset_class} if there are abnormal readings from {sensor}?",
    "For {asset_class}, what is the key failure mode when {sensor} has abnormal readings?",
    "In {asset_class}, which failure mode is most important if {sensor} shows abnormal readings?",
    "When {sensor} detects abnormal readings in {asset_class}, which failure mode is the most relevant?"
]

NegationQuestionsSensors = [
    "In the context of {asset_class}, which failure event is not relevant when the sensor {sensor} shows an abnormal reading?",
    "What is the irrelevant failure event for {asset_class} if the sensor {sensor} exhibits an abnormal reading?",
    "Which failure event should be excluded for {asset_class} when an abnormal reading is detected by the sensor {sensor}?",
    "For {asset_class}, which failure event is not pertinent if the sensor {sensor} registers an abnormal reading?",
    "When the sensor {sensor} in {asset_class} displays an abnormal reading, which failure event is not applicable?",
    "If the sensor {sensor} in {asset_class} shows an abnormal reading, which failure event is insignificant?",
    "Which failure event is irrelevant for {asset_class} if there is an abnormal reading from the sensor {sensor}?",
    "For {asset_class}, what is the non-relevant failure event when the sensor {sensor} has an abnormal reading?",
    "In {asset_class}, which failure event is unimportant if the sensor {sensor} shows an abnormal reading?",
    "When an abnormal reading is detected by the sensor {sensor} in {asset_class}, which failure event is not relevant?"
]


In [8]:
with open('data/single_answer_negatives.json', 'r') as f:
    data_irrelevant = json.loads(f.read())
with open('data/single_answer.json', 'r') as f:
    data_relevant = json.loads(f.read())

# Single answer

## Prepare for relevant multiple choice

In [9]:
res_relevant = []
for i, item in enumerate(data_relevant):
    sampled_passage = random.choice(Passage)
    if 'failure_mode' in item:
        sampled_question = random.choice(PositiveQuestionsFailures)
        sampled_question_idx = "PositiveQuestionsFailures_" + str(PositiveQuestionsFailures.index(sampled_question))
        prepared_question = sampled_question.replace('{failure_mode}', item['failure_mode'])
    else:
        sampled_question = random.choice(PositiveQuestionsSensors)
        sampled_question_idx = "PositiveQuestionsSensors_" + str(PositiveQuestionsSensors.index(sampled_question))
        prepared_question = sampled_question.replace('{sensor}', item['sensor'])
    prepared_question = prepared_question.replace('{asset_class}', item['asset_class'])
    parts = [
        QuestionPart(text=sampled_passage, tag=None),
        QuestionPart(text=prepared_question, tag="Question"),
    ]
    qa_pairs = np.array([(key, item['mc_targets'][key]) for key in item['mc_targets']])
    answer_idx = np.where(qa_pairs[:,1].astype(int)==1)[0][0]
    incorrect_idxs = np.where(qa_pairs[:,1].astype(int)==0)[0]
    if len(incorrect_idxs) == 0:
        continue
    n_choices = min(max_n_choices-1, len(incorrect_idxs))
    sampled_incorrect_idxs = np.random.choice(incorrect_idxs, n_choices, replace=False)
    incorrect_answers = [qa_pairs[idx] for idx in sampled_incorrect_idxs]
    correct_answer = np.array([qa_pairs[answer_idx]])
    all_answers = np.concatenate([correct_answer, incorrect_answers])
    # shuffle, so the answer is in a random position
    np.random.shuffle(all_answers)
    answer_idx = np.argmax(all_answers[:, 1].astype(int))
    n_rotations = 1
    if empty_mcp:
        # used for bias test
        all_answers[:, 0] = ''
    q = Question(parts=parts, choices=all_answers[:, 0].tolist(), answer_idx=answer_idx, 
                 prompt_type=prompt_type, cot_style=cot_style)
    if rotate_mcp:
        n_rotations = q.get_n_choices()
    rotations = list(range(len(all_answers)))
    for _ in range(n_rotations):
        q.permute_choices(rotations)
        item_dict = {
            'input': q.get_natural_prompt(), 
            'output': q.get_answer_str(), 
            "relevancy": item["question"],
            "sampled_question_idx": sampled_question_idx,
            'question_type': 'mcp1_positive',
            'question_idx': i,
            'answer_letter': idx_to_ltr(answer_idx),
            'rotation': rotations
        }
        res_relevant.append(item_dict)
        rotations = rotations[-1:] + rotations[:-1]
        answer_idx = (answer_idx + 1) % len(rotations)

## Prepare for irrelevant multiple choice

In [10]:
res_irrelevant = []
for i, item in enumerate(data_irrelevant):
    sampled_passage = random.choice(Passage)
    if 'failure_mode' in item:
        sampled_question = random.choice(NegationQuestionsFailures)
        sampled_question_idx = "NegationQuestionsFailures_" + str(NegationQuestionsFailures.index(sampled_question))
        prepared_question = sampled_question.replace('{failure_mode}', item['failure_mode'])
    else:
        sampled_question = random.choice(NegationQuestionsSensors)
        sampled_question_idx = "NegationQuestionsSensors_" + str(NegationQuestionsSensors.index(sampled_question))
        prepared_question = sampled_question.replace('{sensor}', item['sensor'])
    prepared_question = prepared_question.replace('{asset_class}', item['asset_class'])
    parts = [
        QuestionPart(text=sampled_passage, tag=None),
        QuestionPart(text=prepared_question, tag="Question"),
    ]
    qa_pairs = np.array([(key, item['mc_targets'][key]) for key in item['mc_targets']])
    answer_idx = np.where(qa_pairs[:,1].astype(int)==1)[0][0]
    incorrect_idxs = np.where(qa_pairs[:,1].astype(int)==0)[0]
    if len(incorrect_idxs) == 0:
        continue
    n_choices = min(max_n_choices-1, len(incorrect_idxs))
    sampled_incorrect_idxs = np.random.choice(incorrect_idxs, n_choices, replace=False)
    incorrect_answers = [qa_pairs[idx] for idx in sampled_incorrect_idxs]
    correct_answer = np.array([qa_pairs[answer_idx]])
    all_answers = np.concatenate([correct_answer, incorrect_answers])
    # shuffle, so the answer is in a random position
    np.random.shuffle(all_answers)
    answer_idx = np.argmax(all_answers[:, 1].astype(int))
    if empty_mcp:
        # used for bias test
        all_answers[:, 0] = ''
    q = Question(parts=parts, choices=all_answers[:, 0].tolist(), answer_idx=answer_idx, 
                 prompt_type=prompt_type, cot_style=cot_style)
    if rotate_mcp:
        n_rotations = q.get_n_choices()
    rotations = list(range(len(all_answers)))
    for _ in range(n_rotations):
        q.permute_choices(rotations)
        item_dict = {
            'input': q.get_natural_prompt(), 
            'output': q.get_answer_str(), 
            'sampled_question_idx': sampled_question_idx,
            "relevancy": item["question"],
            'question_type': 'mcp1_negative',
            'question_idx': i,
            'answer_letter': idx_to_ltr(answer_idx),
            'rotation': rotations
        }
        res_irrelevant.append(item_dict)
        rotations = rotations[-1:] + rotations[:-1]
        answer_idx = (answer_idx + 1) % len(rotations)

In [11]:
print(res_relevant[0]['input'])
print(res_relevant[0]['output'])

Read the given passage, question and select the most appropriate answer by indicating the associated letter.
Passage: Failure Mode and Effect Analysis (FMEA) or Failure Mode, Effects, and Criticality Analysis (FMECA) are conducted to identify anticipated faults, symptoms, and potential parameters that indicate the presence or occurrence of faults. 

 The FMEA and FMECA audits provide information on the parameters to be measured for specific failure modes. These parameters typically indicate a fault condition through either an increase or decrease in a particular variable measured by sensors. 

 You will receive a specific failure mode and a list of parameters being monitored via sensor variables.

Question: For electric motor, if a failure event rotor windings fault occurs, which sensor out of the choices is the most relevant sensor regarding the occurrence of the failure event?
A. partial discharge
B. resistance
C. oil debris
D. current
E. voltage
Please output the answer in the first

In [12]:
print(res_irrelevant[0]['input'])
print(res_irrelevant[0]['output'])

Read the given passage, question and select the most appropriate answer by indicating the associated letter.
Passage: Failure Mode and Effect Analysis (FMEA) or Failure Mode, Effects, and Criticality Analysis (FMECA) are conducted to identify anticipated faults, symptoms, and potential parameters that indicate the presence or occurrence of faults. 

 The FMEA and FMECA audits provide information on the parameters to be measured for specific failure modes. These parameters typically indicate a fault condition through either an increase or decrease in a particular variable measured by sensors. 

 You will receive a specific failure mode and a list of parameters being monitored via sensor variables.

Question: For electric motor, if rotor windings fault occurs, which sensor among the choices is least likely to be relevant in identifying this failure?
A. cooling gas
B. current
C. vibration
D. power
E. voltage
Please output the answer in the first line.
Answer:

voltage


In [13]:
result_fname = 'data/prepared_mcp1'
if prompt_type is not None and not empty_mcp:
    result_fname = f'data/prepared_mcp1_{prompt_type.lower()}'
if prompt_type == 'COT':
    result_fname += f'_{cot_style.lower()}'
if prompt_type is not None and empty_mcp:
    result_fname += '_empty'
if prompt_type is not None and rotate_mcp:
    result_fname += '_rotate'
result_fname += '.json'

In [14]:
result = res_relevant + res_irrelevant
if empty_mcp:
    # for empty options sample 50
    sensor_five_options = [
        item for item in res_relevant + res_irrelevant if len(item['rotation']) == 5 and 'for_sensor' in item['relevancy']
    ]
    mode_five_options = [
        item for item in res_relevant + res_irrelevant if len(item['rotation']) == 5 and 'for_failure_mode' in item['relevancy']
    ]
    sampled_sensor = random.sample(sensor_five_options, 50)
    sampled_mode = random.sample(mode_five_options, 50)
    result = sampled_sensor + sampled_mode
elif rotate_mcp:
    all_five_option_relevant = [item['question_idx'] for item in res_relevant if item['rotation'] == list(range(5))]
    all_five_option_irrelevant = [item['question_idx'] for item in res_irrelevant if item['rotation'] == list(range(5))]
    sampled_idx_relevant = random.sample(all_five_option_relevant, 50)
    sampled_idx_irrelevant = random.sample(all_five_option_irrelevant, 50)
    sampled_five_option_relevant = [item for item in res_relevant if item['question_idx'] in sampled_idx_relevant]
    sampled_five_option_irrelevant = [item for item in res_irrelevant if item['question_idx'] in sampled_idx_irrelevant]
    result = sampled_five_option_relevant + sampled_five_option_irrelevant
with open(result_fname, 'w') as f:
    json.dump(result, f, indent=4)

In [15]:
len(res_relevant), len(res_irrelevant), len(result)

(923, 1744, 2667)