In [1]:
import random
from dataclasses import dataclass
from itertools import permutations
import json
import numpy as np

In [2]:
random.seed(42)

In [3]:
max_n_choices = 5

In [4]:
def idx_to_ltr(idx):
    return chr(idx + ord("A"))

@dataclass
class QuestionPart:
    text: str
    tag: str = None

    def __str__(self):
        if self.tag is not None:
            return f"{self.tag}: {self.text}"
        else:
            return self.text

@dataclass
class Question:
    parts: list
    choices: list
    answer_idx: int
    task: str = None

    def get_n_choices(self):
        return len(self.choices)

    def get_answer_str(self):
        if type(self.answer_idx) != int:
            return ','.join([self.choices[idx] for idx in self.answer_idx])
        return self.choices[self.answer_idx]

    def _get_prompt(self, include_choices):
        prompt = ""
        for part in self.parts:
            prompt += f"{str(part)}\n"
        if include_choices:
            for i, choice in enumerate(self.choices):
                prompt += f"{idx_to_ltr(i)}. {choice}\n"
        return prompt + "\nAnswer:"
    
    def _get_new_prompt(self, include_choices):
        prompt = ""
        for part in self.parts:
            prompt += f"{str(part)}\n"
        if include_choices:
            options = []
            for i, choice in enumerate(self.choices):
                prompt += f"{idx_to_ltr(i)}. {choice}\n"
                options.append(idx_to_ltr(i))
            prompt += "Choose one of the following: "+','.join(options) + '\n'
        return prompt + "Answer:"

    def get_natural_prompt(self):
        return self._get_prompt(include_choices=True)
        # return self._get_prompt(include_choices=True)

    def get_brown_prompt(self):
        return self._get_prompt(include_choices=False)

    def strong_shuffle(self):
        if len(set(self.choices)) == 1:
            return

        answer_idx = self.answer_idx
        answer_str = self.get_answer_str()
        while self.choices[answer_idx] == answer_str:
            random.shuffle(self.choices)
            self.answer_idx = self.choices.index(answer_str)

    def permute_choices(self, perm):
        self.choices = [self.choices[i] for i in perm]
        self.answer_idx = perm.index(self.answer_idx)

class Exemplar(Question):

    def get_natural_prompt(self):
        prompt = super().get_natural_prompt()
        answer_ltr = idx_to_ltr(self.answer_idx)
        return f"{prompt} {answer_ltr}"

    def get_brown_prompt(self):
        prompt = super().get_brown_prompt()
        return f"{prompt} {self.get_answer_str()}"

In [5]:
Passage = ["Read the given passgage, question and select the most appropriate answer by indicating the associated letter.\nFailure Mode and Effect Analysis (FMEA) or Failure Mode, Effects, and Criticality Analysis (FMECA) are conducted to identify anticipated faults, symptoms, and potential parameters that indicate the presence or occurrence of faults. \n\n The FMEA and FMECA audits provide information on the parameters to be measured for specific failure modes. These parameters typically indicate a fault condition through either an increase or decrease in a particular variable measured by sensors. \n\n You will receive a specific failure mode and a list of parameters being monitored via sensor variables."]

PositiveQuestionsFailures = [
    "For {asset_class}, if a failure event {failure_mode} occurs, which sensors out of the choices are the most relevant sensor regarding the occurrence of the failure event?",
    "When a {asset_class} has {failure_mode}, which sensors out of the choices should be the sensor to be monitored for this failure if I want to build an anomaly detection model?",
    "Which sensors out of the choices can indicate the presence of {failure_mode} in asset {asset_class}?",
    "In {asset_class}, when {failure_mode} occurs, which sensors from the choices are most critical in detecting the occurrence of the failure event?",
    "Which sensors among the choices best correlate with the presence of {failure_mode} in asset {asset_class}?",
    "For {asset_class}, if {failure_mode} happens, which sensors should be prioritized for monitoring this specific failure?",
    "Which sensors out of the choices provide the strongest indication of {failure_mode} in {asset_class}?",
    "When dealing with {failure_mode} in {asset_class}, which sensors among the choices have the highest relevance in detecting this issue?",
 ]

NegationQuestionsFailures = [
    "For {asset_class}, if a failure event {failure_mode} occurs, which sensors out of the choices are not relevant regarding the occurrence of the failure event?",
    "When a {asset_class} has {failure_mode}, which sensors out of the choices should not be the sensor to be monitored for this failure if I want to build an anomaly detection model?",
    "Which sensors out of the choices do not indicate the presence of {failure_mode} in asset {asset_class}?",
    "Which sensors from the choices do not contribute significantly to detecting {failure_mode} in {asset_class}?",
    "For {asset_class}, if {failure_mode} occurs, which sensors among the choices are least likely to be relevant in identifying this failure?",
    "When considering {failure_mode} in {asset_class}, which sensors should be disregarded from the choices for monitoring this failure?",
    "Which sensors out of the choices are not effective in indicating the presence of {failure_mode} in {asset_class}?",
    "In {asset_class}, which sensors among the choices are least useful for detecting {failure_mode}?",

]

In [6]:
PositiveQuestionsSensors = [
    "In the context of {asset_class}, which failure modes are most relevant when {sensor} shows abnormal readings?",
    "What are the most relevant failure modes for {asset_class} if {sensor} exhibits abnormal readings?",
    "Which failure modes should be considered for {asset_class} when abnormal readings are detected by {sensor}?",
    "For {asset_class}, which failure modes are pertinent if {sensor} registers abnormal readings?",
    "When {sensor} in {asset_class} displays abnormal readings, which failure modes are the most applicable?",
    "If {sensor} in {asset_class} shows abnormal readings, which failure modes are most significant?",
    "Which failure modes are most relevant for {asset_class} if there are abnormal readings from {sensor}?",
    "For {asset_class}, what are the key failure modes when {sensor} has abnormal readings?",
    "In {asset_class}, which failure modes are most important if {sensor} shows abnormal readings?",
    "When {sensor} detects abnormal readings in {asset_class}, which failure modes are the most relevant?"
]

NegationQuestionsSensors = [
    "In the context of {asset_class}, which failure events are not relevant when the sensor {sensor} shows an abnormal reading?",
    "What are the irrelevant failure events for {asset_class} if the sensor {sensor} exhibits an abnormal reading?",
    "Which failure events should be excluded for {asset_class} when an abnormal reading is detected by the sensor {sensor}?",
    "For {asset_class}, which failure events are not pertinent if the sensor {sensor} registers an abnormal reading?",
    "When the sensor {sensor} in {asset_class} displays an abnormal reading, which failure events are not applicable?",
    "If the sensor {sensor} in {asset_class} shows an abnormal reading, which failure events are insignificant?",
    "Which failure events are irrelevant for {asset_class} if there is an abnormal reading from the sensor {sensor}?",
    "For {asset_class}, what are the non-relevant failure events when the sensor {sensor} has an abnormal reading?",
    "In {asset_class}, which failure events are unimportant if the sensor {sensor} shows an abnormal reading?",
    "When an abnormal reading is detected by the sensor {sensor} in {asset_class}, which failure events are not relevant?"
]


In [7]:
with open('data/multi_answer_negatives.json', 'r') as f:
    data_irrelevant = json.loads(f.read())
with open('data/multi_answer.json', 'r') as f:
    data_relevant = json.loads(f.read())

# MCP2 2 answers

## Prepare for relevant multiple choice multiple answers

In [8]:
res_relevant = []
for i, item in enumerate(data_relevant):
    sampled_passage = random.choice(Passage)
    if 'failure_mode' in item:
        sampled_question = random.choice(PositiveQuestionsFailures)
        sampled_question_idx = "PositiveQuestionsFailures_" + str(PositiveQuestionsFailures.index(sampled_question))
        prepared_question = sampled_question.replace('{failure_mode}', item['failure_mode'])
    else:
        sampled_question = random.choice(PositiveQuestionsSensors)
        sampled_question_idx = "PositiveQuestionsSensors_" + str(PositiveQuestionsSensors.index(sampled_question))
        prepared_question = sampled_question.replace('{sensor}', item['sensor'])
    prepared_question = prepared_question.replace('{asset_class}', item['asset_class'])
    parts = [
        QuestionPart(text=sampled_passage, tag="Passage"),
        QuestionPart(text=prepared_question, tag="Question"),
    ]
    qa_pairs = np.array([(key, item['mc_targets'][key]) for key in item['mc_targets']])
    all_answer_idxs = np.where(qa_pairs[:,1].astype(int)==1)[0]
    incorrect_idxs = np.where(qa_pairs[:,1].astype(int)==0)[0]
    if len(incorrect_idxs) <= 2 or len(all_answer_idxs) <= 1:
        # no possible multiple choice to create
        continue
    # get all the positive pairs and sample negative
    for i in range(len(all_answer_idxs)):
        for j in range(i+1, len(all_answer_idxs)):
            correct_answers = [qa_pairs[all_answer_idxs[i]], qa_pairs[all_answer_idxs[j]]]
            n_choices = min(max_n_choices - 2, max(1, len(incorrect_idxs)))
            sampled_incorrect_idxs = np.random.choice(incorrect_idxs, n_choices, replace=False)
            incorrect_answers = [qa_pairs[idx] for idx in sampled_incorrect_idxs]
            all_answers = np.stack(correct_answers + incorrect_answers)
            # shuffle, so the answer is in a random position
            np.random.shuffle(all_answers)
            assert len(all_answers) == max_n_choices
            answer_idxs = np.where(all_answers[:, 1].astype(int)==1)[0]
            q = Question(parts=parts, choices=all_answers[:, 0], answer_idx=answer_idxs)
            item_dict = {
                'input': q.get_natural_prompt(), 
                'output': q.get_answer_str(), 
                'relevancy': item["question"],
                'sampled_question_idx': sampled_question_idx,
                'question_type': 'mcp2_positive_separate_answers',
                'answer_letter': ','.join([idx_to_ltr(answer_idx) for answer_idx in answer_idxs])
            }
            res_relevant.append(item_dict)
print(res_relevant[0])

{'input': 'Passage: Read the given passgage, question and select the most appropriate answer by indicating the associated letter.\nFailure Mode and Effect Analysis (FMEA) or Failure Mode, Effects, and Criticality Analysis (FMECA) are conducted to identify anticipated faults, symptoms, and potential parameters that indicate the presence or occurrence of faults. \n\n The FMEA and FMECA audits provide information on the parameters to be measured for specific failure modes. These parameters typically indicate a fault condition through either an increase or decrease in a particular variable measured by sensors. \n\n You will receive a specific failure mode and a list of parameters being monitored via sensor variables.\nQuestion: For electric motor, if a failure event rotor windings fault occurs, which sensors out of the choices are the most relevant sensor regarding the occurrence of the failure event?\nA. power\nB. current\nC. voltage\nD. oil debris\nE. resistance\n\nAnswer:', 'output': 'p

## Prepare for irrelevant multiple choice multiple answers

In [9]:
res_irrelevant = []
for item in data_irrelevant:
    sampled_passage = random.choice(Passage)
    if 'failure_mode' in item:
        sampled_question = random.choice(NegationQuestionsFailures)
        sampled_question_idx = "NegationQuestionsFailures_" + str(NegationQuestionsFailures.index(sampled_question))
        prepared_question = sampled_question.replace('{failure_mode}', item['failure_mode'])
    else:
        sampled_question = random.choice(NegationQuestionsSensors)
        sampled_question_idx = "NegationQuestionsSensors_" + str(NegationQuestionsSensors.index(sampled_question))
        prepared_question = sampled_question.replace('{sensor}', item['sensor'])
    prepared_question = prepared_question.replace('{asset_class}', item['asset_class'])
    parts = [
        QuestionPart(text=sampled_passage, tag="Passage"),
        QuestionPart(text=prepared_question, tag="Question"),
    ]
    qa_pairs = np.array([(key, item['mc_targets'][key]) for key in item['mc_targets']])
    all_answer_idxs = np.where(qa_pairs[:,1].astype(int)==1)[0]
    incorrect_idxs = np.where(qa_pairs[:,1].astype(int)==0)[0]
    if len(incorrect_idxs) <= 2 or len(all_answer_idxs) <= 1:
        # no possible multiple choice to create. We need at least 2 correct in the multiple choice
        continue
    # get all the positive pairs and sample negative
    for i in range(len(all_answer_idxs)):
        for j in range(i+1, len(all_answer_idxs)):
            correct_answers = [qa_pairs[all_answer_idxs[i]], qa_pairs[all_answer_idxs[j]]]
            n_choices = min(max_n_choices - 2, max(1, len(incorrect_idxs)))
            sampled_incorrect_idxs = np.random.choice(incorrect_idxs, n_choices, replace=False)
            incorrect_answers = [qa_pairs[idx] for idx in sampled_incorrect_idxs]
            all_answers = np.stack(correct_answers + incorrect_answers)
            # shuffle, so the answer is in a random position
            np.random.shuffle(all_answers)
            assert len(all_answers) == max_n_choices
            answer_idxs = np.where(all_answers[:, 1].astype(int)==1)[0]
            q = Question(parts=parts, choices=all_answers[:, 0], answer_idx=answer_idxs)
            item_dict = {
                'input': q.get_natural_prompt(), 
                'output': q.get_answer_str(), 
                'relevancy': item["question"],
                'sampled_question_idx': sampled_question_idx,
                'question_type': 'mcp2_negative_separate_answers',
                'answer_letter': ','.join([idx_to_ltr(answer_idx) for answer_idx in answer_idxs])
            }
            res_irrelevant.append(item_dict)

In [10]:
print(res_relevant[0]['input'])
print(res_relevant[0]['output'])

Passage: Read the given passgage, question and select the most appropriate answer by indicating the associated letter.
Failure Mode and Effect Analysis (FMEA) or Failure Mode, Effects, and Criticality Analysis (FMECA) are conducted to identify anticipated faults, symptoms, and potential parameters that indicate the presence or occurrence of faults. 

 The FMEA and FMECA audits provide information on the parameters to be measured for specific failure modes. These parameters typically indicate a fault condition through either an increase or decrease in a particular variable measured by sensors. 

 You will receive a specific failure mode and a list of parameters being monitored via sensor variables.
Question: For electric motor, if a failure event rotor windings fault occurs, which sensors out of the choices are the most relevant sensor regarding the occurrence of the failure event?
A. power
B. current
C. voltage
D. oil debris
E. resistance

Answer:
power,current


In [11]:
print(res_irrelevant[0]['input'])
print(res_irrelevant[0]['output'])

Passage: Read the given passgage, question and select the most appropriate answer by indicating the associated letter.
Failure Mode and Effect Analysis (FMEA) or Failure Mode, Effects, and Criticality Analysis (FMECA) are conducted to identify anticipated faults, symptoms, and potential parameters that indicate the presence or occurrence of faults. 

 The FMEA and FMECA audits provide information on the parameters to be measured for specific failure modes. These parameters typically indicate a fault condition through either an increase or decrease in a particular variable measured by sensors. 

 You will receive a specific failure mode and a list of parameters being monitored via sensor variables.
Question: Which sensors out of the choices do not indicate the presence of rotor windings fault in asset electric motor?
A. cooling gas
B. voltage
C. vibration
D. resistance
E. power

Answer:
voltage,resistance


In [12]:
with open('data/prepared_mcp2_separate_answers.json', 'w') as f:
    f.write(json.dumps(res_relevant + res_irrelevant))

In [13]:
len(res_relevant), len(res_irrelevant)

(1694, 3935)

# MCP2 one answer

## Prepare for relevant

In [8]:
res_relevant = []
for i, item in enumerate(data_relevant):
    sampled_passage = random.choice(Passage)
    if 'failure_mode' in item:
        sampled_question = random.choice(PositiveQuestionsFailures)
        prepared_question = sampled_question.replace('{failure_mode}', item['failure_mode'])
    else:
        sampled_question = random.choice(PositiveQuestionsSensors)
        prepared_question = sampled_question.replace('{sensor}', item['sensor'])
    prepared_question = prepared_question.replace('{asset_class}', item['asset_class'])
    parts = [
        QuestionPart(text=sampled_passage, tag="Passage"),
        QuestionPart(text=prepared_question, tag="Question"),
    ]
    qa_pairs = np.array([(key, item['mc_targets'][key]) for key in item['mc_targets']])
    all_answer_idxs = np.where(qa_pairs[:,1].astype(int)==1)[0]
    incorrect_idxs = np.where(qa_pairs[:,1].astype(int)==0)[0]
    if len(incorrect_idxs) <= 2 or len(all_answer_idxs) <= 2:
        # no possible multiple choice to create
        continue
    # get all the positive pairs and sample negative
    for i in range(len(all_answer_idxs)):
        for j in range(i+1, len(all_answer_idxs)):
            correct_answer = np.array([[qa_pairs[all_answer_idxs[i]][0] + ", " + qa_pairs[all_answer_idxs[j]][0], '1']])
            n_choices = min((max_n_choices-1)*2, len(incorrect_idxs))
            if n_choices%2==1:
                n_choices -=1
            sampled_incorrect_idxs = np.random.choice(incorrect_idxs, n_choices, replace=False)
            sampled_incorrect_idxs = np.expand_dims(sampled_incorrect_idxs, 0).reshape(-1, 2)
            sampled_incorrect_idxs = sampled_incorrect_idxs[:max_n_choices-1, :]
            # print(sampled_incorrect_idxs)
            sampled_incorrect_idxs_2 = np.random.choice(incorrect_idxs, n_choices, replace=False)
            sampled_incorrect_idxs_2 = np.expand_dims(sampled_incorrect_idxs_2, 0).reshape(-1, 2)
            sampled_incorrect_idxs_2 = sampled_incorrect_idxs_2[:max_n_choices-1, :]
            # print(sampled_incorrect_idxs_2)
            incorrect_answers = np.stack([[qa_pairs[idx[0]][0] + ", " + qa_pairs[idx[1]][0], '0'] for idx in np.concatenate((sampled_incorrect_idxs,sampled_incorrect_idxs_2), axis=0)])
            # print(incorrect_answers)
            if len(incorrect_answers) < max_n_choices-1:
                continue
            else:
                incorrect_answers = incorrect_answers[:max_n_choices-1]
            all_answers = np.concatenate([correct_answer, incorrect_answers])
            assert len(all_answers) == max_n_choices
            # shuffle, so the answer is in a random position
            np.random.shuffle(all_answers)
            answer_idxs = np.where(all_answers[:, 1].astype(int)==1)[0]
            q = Question(parts=parts, choices=all_answers[:, 0], answer_idx=answer_idxs)
            item_dict = {
                'input': q.get_natural_prompt(), 
                'output': q.get_answer_str(), 
                'relevancy': item["question"],
                'question_type': 'mcp2_positive_single_answer',
                'answer_letter': ','.join([idx_to_ltr(answer_idx) for answer_idx in answer_idxs])
            }
            res_relevant.append(item_dict)

## Prepare for irrelevant multiple choice multiple answers

In [9]:
res_irrelevant = []
for item in data_irrelevant:
    #todo: remove
    data_irrelevant[0]['mc_targets']['current']=1
    sampled_passage = random.choice(Passage)
    if 'failure_mode' in item:
        sampled_question = random.choice(NegationQuestionsFailures)
        prepared_question = sampled_question.replace('{failure_mode}', item['failure_mode'])
    else:
        sampled_question = random.choice(NegationQuestionsSensors)
        prepared_question = sampled_question.replace('{sensor}', item['sensor'])
    prepared_question = prepared_question.replace('{asset_class}', item['asset_class'])
    parts = [
        QuestionPart(text=sampled_passage, tag="Passage"),
        QuestionPart(text=prepared_question, tag="Question"),
    ]
    qa_pairs = np.array([(key, item['mc_targets'][key]) for key in item['mc_targets']])
    all_answer_idxs = np.where(qa_pairs[:,1].astype(int)==1)[0]
    incorrect_idxs = np.where(qa_pairs[:,1].astype(int)==0)[0]
    if len(incorrect_idxs) <= 1 or len(all_answer_idxs) <= 1:
        # no possible multiple choice to create
        continue
    # get all the positive pairs and sample negative
    for i in range(len(all_answer_idxs)):
        for j in range(i+1, len(all_answer_idxs)):
            correct_answer = np.array([[qa_pairs[all_answer_idxs[i]][0] + ", " + qa_pairs[all_answer_idxs[j]][0], '1']])
            # sampled_incorrect_idxs = np.stack([np.random.choice(incorrect_idxs, 2, replace=False) for _ in range(max_n_choices-1)])
            # incorrect_answers = np.stack([[qa_pairs[idx[0]][0] + ", " + qa_pairs[idx[1]][0], '0'] for idx in sampled_incorrect_idxs])
            correct_answer = np.array([[qa_pairs[all_answer_idxs[i]][0] + ", " + qa_pairs[all_answer_idxs[j]][0], '1']])
            n_choices = min((max_n_choices-1)*2, len(incorrect_idxs))
            if n_choices%2==1:
                n_choices -= 1
            sampled_incorrect_idxs = np.random.choice(incorrect_idxs, n_choices, replace=False)
            sampled_incorrect_idxs = np.expand_dims(sampled_incorrect_idxs, 0).reshape(-1, 2)
            sampled_incorrect_idxs = sampled_incorrect_idxs[:max_n_choices-1, :]
            sampled_incorrect_idxs_2 = np.random.choice(incorrect_idxs, n_choices, replace=False)
            sampled_incorrect_idxs_2 = np.expand_dims(sampled_incorrect_idxs_2, 0).reshape(-1, 2)
            sampled_incorrect_idxs_2 = sampled_incorrect_idxs_2[:max_n_choices-1, :]
            # print(sampled_incorrect_idxs_2)
            incorrect_answers = np.stack([[qa_pairs[idx[0]][0] + ", " + qa_pairs[idx[1]][0], '0'] for idx in np.concatenate((sampled_incorrect_idxs,sampled_incorrect_idxs_2), axis=0)])
            # print(incorrect_answers)
            if len(incorrect_answers) < max_n_choices-1:
                continue
            else:
                incorrect_answers = incorrect_answers[:max_n_choices-1]
            all_answers = np.concatenate([correct_answer, incorrect_answers])
            assert len(all_answers) == max_n_choices
            # shuffle, so the answer is in a random position
            np.random.shuffle(all_answers)
            answer_idxs = np.where(all_answers[:, 1].astype(int)==1)[0]
            q = Question(parts=parts, choices=all_answers[:, 0], answer_idx=answer_idxs)
            item_dict = {
                'input': q.get_natural_prompt(), 
                'output': q.get_answer_str(), 
                'relevancy': item["question"],
                'question_type': 'mcp2_negative_single_answer',
                'answer_letter': ','.join([idx_to_ltr(answer_idx) for answer_idx in answer_idxs])
            }
            res_irrelevant.append(item_dict)

In [10]:
print(res_relevant[0]['input'])
print(res_relevant[0]['output'])

Passage: Read the given passgage, question and select the most appropriate answer by indicating the associated letter.
Failure Mode and Effect Analysis (FMEA) or Failure Mode, Effects, and Criticality Analysis (FMECA) are conducted to identify anticipated faults, symptoms, and potential parameters that indicate the presence or occurrence of faults. 

 The FMEA and FMECA audits provide information on the parameters to be measured for specific failure modes. These parameters typically indicate a fault condition through either an increase or decrease in a particular variable measured by sensors. 

 You will receive a specific failure mode and a list of parameters being monitored via sensor variables.
Question: For electric motor, if a failure event rotor windings fault occurs, which sensors out of the choices are the most relevant sensor regarding the occurrence of the failure event?
A. coast down time, voltage
B. partial discharge, oil debris
C. current, power
D. coast down time, voltage

In [11]:
print(res_irrelevant[0]['input'])
print(res_irrelevant[0]['output'])

Passage: Read the given passgage, question and select the most appropriate answer by indicating the associated letter.
Failure Mode and Effect Analysis (FMEA) or Failure Mode, Effects, and Criticality Analysis (FMECA) are conducted to identify anticipated faults, symptoms, and potential parameters that indicate the presence or occurrence of faults. 

 The FMEA and FMECA audits provide information on the parameters to be measured for specific failure modes. These parameters typically indicate a fault condition through either an increase or decrease in a particular variable measured by sensors. 

 You will receive a specific failure mode and a list of parameters being monitored via sensor variables.
Question: Which sensors out of the choices do not indicate the presence of rotor windings fault in asset electric motor?
A. cooling gas, speed
B. voltage, resistance
C. power, axial flux
D. torque, power
E. temperature, axial flux

Answer:
voltage, resistance


In [12]:
with open('data/prepared_mcp2_single_answer.json', 'w') as f:
    f.write(json.dumps(res_relevant + res_irrelevant))

In [13]:
len(res_relevant), len(res_irrelevant)

(1501, 2608)