<a href="https://colab.research.google.com/github/Penguinbeanie/Capstone-Project/blob/dev_branch/Capstone_Project_Data_Generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Loading Model

In [None]:
import google.generativeai as genai
from IPython.display import display, Markdown
from google.colab import userdata
import json

# retrieving the key stored in Colab
key = userdata.get('GOOGLE_API_KEY')

# configure the key for calling GenAI model
genai.configure(api_key=key)

# load model
model = genai.GenerativeModel("gemini-1.5-flash")

# Sorting Original Questionnaire Questions by Question Type

In [None]:
import json
import requests
from google.colab import files

input_files = [
    "https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/main/original_questionnaire/original_files/questionnaire1.json",
    "https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/main/original_questionnaire/original_files/questionnaire2.json",
    "https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/main/original_questionnaire/original_files/questionnaire3.json",
    "https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/main/original_questionnaire/original_files/questionnaire4.json",
    "https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/main/original_questionnaire/original_files/questionnaire5.json"
]

# Dictionary to hold questions grouped by type
question_types = {}

# Iterate through each file
for file_url in input_files:
    response = requests.get(file_url)
    data = response.json()
    for entry in data:
        q_type = entry['type']
        if q_type not in question_types:
            question_types[q_type] = []
        question_types[q_type].append(entry)

# Save each question type to separate JSON files
output_files = []
for q_type, questions in question_types.items():
    filename = f"/content/{q_type}.json"
    with open(filename, 'w') as f:
        json.dump(questions, f, indent=4)
    output_files.append(filename)

print("Files have been created.")


# JSON To String For Context

In [None]:
# Load files

import json
import requests

file_path_date = 'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/main/original_questionnaire/type_seperated/DATE.json'
file_path_multi = 'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/main/original_questionnaire/type_seperated/MULTI_SELECT.json'
file_path_number = 'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/main/original_questionnaire/type_seperated/NUMBER.json'
file_path_single = 'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/main/original_questionnaire/type_seperated/SINGLE_SELECT.json'
file_path_text = 'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/main/original_questionnaire/type_seperated/TEXT.json'

# Function to load JSON from URL
def load_json_from_url(url):
    response = requests.get(url)
    response.raise_for_status()  # Raise an exception for bad status codes
    return response.json()

# Load JSON data from URLs
try:
    json_data_date = load_json_from_url(file_path_date)
    json_data_multi = load_json_from_url(file_path_multi)
    json_data_number = load_json_from_url(file_path_number)
    json_data_single = load_json_from_url(file_path_single)
    json_data_text = load_json_from_url(file_path_text)

    # Convert JSON data to strings (if needed)
    json_string_date = json.dumps(json_data_date, indent=4)
    json_string_multi = json.dumps(json_data_multi, indent=4)
    json_string_number = json.dumps(json_data_number, indent=4)
    json_string_single = json.dumps(json_data_single, indent=4)
    json_string_text = json.dumps(json_data_text, indent=4)

except requests.exceptions.RequestException as e:
    print(f"Error loading JSON files: {e}")


# Running the Model

In [None]:
# prompt

multiVar = 10
singleVar = 10
numberVar = 50
textVar = 50
dateVar = 50

prompt_multi = f"""
                Using the context below as a template, create {multiVar} more JSON objects. The output should include only the keys "type",
                "question", and "options". Ensure the "type" is always "MULTI-SELECT". The "options" array should contain multiple unique objects,
                each with the "option" key and a meaningful value representing an available choice. No IDs should be included.
                Generate multiple unique examples of questions that make sense in context for the type "MULTI-SELECT".
                Output only valid JSON, with proper line spacing and indentation, without any additional formatting or code block delimiters.

                Topic: Questionnnaire for evaluation. Diverse.

                Context:
                {json_string_multi}
                """

prompt_single = f"""
                Using the context below as a template, create {singleVar} more JSON objects. The output should include only the keys "type",
                "question", and "options". Ensure the "type" is always "SINGLE-SELECT". The "options" array should contain multiple unique objects,
                each with the "option" key and a meaningful value representing an available choice. No IDs should be included.
                Generate multiple unique examples of questions that make sense in context for the type "SINGLE-SELECT".
                Output only valid JSON, with proper line spacing and indentation, without any additional formatting or code block delimiters.

                Topic: Questionnnaire for evaluation. Communications.

                Context:
                {json_string_single}
                """

prompt_text = f"""
                Using the context below as a template, create {textVar} more JSON objects. The output should include only the keys "type",
                "question", and "options". Ensure the "type" is always "TEXT", the "options" array always contains exactly one object with
                the "option" set to "Text", and no IDs are included. Generate multiple unique examples of questions that make sense in context
                for the type "TEXT". They should be open ended. Output only valid JSON, with proper line spacing and indentation, without any additional formatting or code block delimiters.

                Topic: Questionnnaire for evaluation. Communications. (General topic in the field of communications, not directly the word communications)

                Context:
                {json_string_text}
                """

prompt_number = f"""
                Using the context below as a template, create {numberVar} more JSON objects. The output should include only the keys "type",
                "question", and "options". Ensure the "type" is always "NUMBER", the "options" array always contains exactly one object with
                the "option" set to the category the question best, and no IDs are included. Generate multiple unique examples of questions that make sense in context
                for the type "NUMBER". Output only valid JSON, with proper line spacing and indentation, without any additional formatting or code block delimiters.

                Topic: Questionnnaire for evaluation. Convention/Fair.

                Context:
                {json_string_number}
                """

prompt_date = f"""
                Using the context below as a template, create {dateVar} more JSON objects. The output should include only the keys "type",
                "question", and "options". Ensure the "type" is always "DATE", the "options" array always contains exactly one object with
                the "option" set to "Date", and no IDs are included. Generate multiple unique examples of questions that make sense in context
                for the type "DATE". Make sure the question specifically ask for a date. Add "Provide a date." at the end of each question.
                Output only valid JSON, with proper line spacing and indentation, without any additional formatting or code block delimiters.

                Topic: Questionnnaire for evaluation. Healthcare.

                Context:
                {json_string_date}
                """


# responses
responses = {
    #"prompt_multi": "Questionnaire_Multi_Artificial.json",
    #"prompt_single": "Questionnaire_Single_Artificial.json",
    #"prompt_date": "Questionnaire_Date_Artificial.json",
    #"prompt_number": "Questionnaire_Number_Artificial.json",
    "prompt_text": "Questionnaire_Text_Artificial.json"
}

for response_name, response_file in responses.items():
    prompt = globals()[response_name]  # Get the prompt string
    response = model.generate_content(prompt)  # Get model's response

    try:
        response_data = json.loads(response.text)  # Parse the response text as JSON
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON for {response_name}: {e}")
        continue

    # Write the parsed JSON to a file
    with open(response_file, "w") as json_file:
        json.dump(response_data, json_file, indent=4)

    print(f"JSON file '{response_file}' has been created")

#JSON to Dataframe

###MULTI

In [None]:
import json
import pandas as pd
import requests

#Load the JSON data
questions = [
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/main/artificial_questionnaire/MULTI/Questionnaire_Multi_Artificial_Diverse.json',
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/main/artificial_questionnaire/MULTI/Questionnaire_Multi_Artificial_HeavyIndustry.json',
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/main/artificial_questionnaire/MULTI/Questionnaire_Multi_Artificial_Sales.json',
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/main/artificial_questionnaire/MULTI/Questionnaire_Multi_Artificial_SoftwareDev.json'
]

dfs = []

for question in questions:
    response = requests.get(question)
    data = response.json()

    #Create a DataFrame from the JSON data
    df_temp = pd.DataFrame(data)
    df_temp['options'] = df_temp['options'].apply(lambda x: [item['option'] for item in x])
    dfs.append(df_temp)

df_MULTI = pd.concat(dfs, ignore_index=True)

df_for_csv = df_MULTI.copy()
df_for_csv['options'] = df_for_csv['options'].apply(lambda x: '; '.join(x))
df_for_csv.to_csv('MULTI_combined.csv', index=False)

df_MULTI

###SINGLE

In [None]:
import json
import pandas as pd
import requests

#Load the JSON data

questions = [
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/main/artificial_questionnaire/SINGLE/Questionnaire_Single_Artificial_ArtIndustry.json',
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/main/artificial_questionnaire/SINGLE/Questionnaire_Single_Artificial_Communications.json',
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/main/artificial_questionnaire/SINGLE/Questionnaire_Single_Artificial_Diverse.json',
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/main/artificial_questionnaire/SINGLE/Questionnaire_Single_Artificial_SoftwareDev.json'
]

dfs = []

for question in questions:
    response = requests.get(question)
    data = response.json()

    #Create a DataFrame from the JSON data

    df_temp = pd.DataFrame(data)
    df_temp['options'] = df_temp['options'].apply(lambda x: [item['option'] for item in x])
    dfs.append(df_temp)

df_SINGLE = pd.concat(dfs, ignore_index=True)

df_for_csv = df_SINGLE.copy()
df_for_csv['options'] = df_for_csv['options'].apply(lambda x: '; '.join(x))
df_for_csv.to_csv('SINGLE_combined.csv', index=False)

df_SINGLE


### NUMBER

In [None]:
import json
import pandas as pd
import requests

#Load the JSON data

questions = [
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/dev_branch/artificial_questionnaire/NUMBER/Questionnaire_Number_Artificial_BusinessAndJob.json',
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/dev_branch/artificial_questionnaire/NUMBER/Questionnaire_Number_Artificial_HeavyIndustries.json',
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/dev_branch/artificial_questionnaire/NUMBER/Questionnaire_Number_Artificial_Diverse.json',
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/dev_branch/artificial_questionnaire/NUMBER/Questionnaire_Number_Artificial_Customer.json'
]

dfs = []

for question in questions:
    response = requests.get(question)
    data = response.json()

    #Create a DataFrame from the JSON data

    df_temp = pd.DataFrame(data)
    df_temp['options'] = df_temp['options'].apply(lambda x: [item['option'] for item in x])
    dfs.append(df_temp)

df_NUMBER = pd.concat(dfs, ignore_index=True)

df_for_csv = df_NUMBER.copy()
df_for_csv['options'] = df_for_csv['options'].apply(lambda x: '; '.join(x))
df_for_csv.to_csv('NUMBER_combined.csv', index=False)

df_NUMBER

### DATE

In [None]:
import json
import pandas as pd
import requests

#Load the JSON data

questions = [
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/dev_branch/artificial_questionnaire/DATE/Questionnaire_Date_Artificial_Customer.json',
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/dev_branch/artificial_questionnaire/DATE/Questionnaire_Date_Artificial_HeavyIndustries.json',
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/dev_branch/artificial_questionnaire/DATE/Questionnaire_Date_Artificial_Technology.json',
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/dev_branch/artificial_questionnaire/DATE/Questionnaire_Date_Artificial_Healthcare.json'
]

dfs = []

for question in questions:
    response = requests.get(question)
    data = response.json()

    #Create a DataFrame from the JSON data

    df_temp = pd.DataFrame(data)
    df_temp['options'] = df_temp['options'].apply(lambda x: [item['option'] for item in x])
    dfs.append(df_temp)

df_DATE = pd.concat(dfs, ignore_index=True)

df_for_csv = df_DATE.copy()
df_for_csv['options'] = df_for_csv['options'].apply(lambda x: '; '.join(x))
df_for_csv.to_csv('DATE_combined.csv', index=False)

df_DATE

### TEXT

In [None]:
import json
import pandas as pd
import requests

#Load the JSON data

questions = [
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/dev_branch/artificial_questionnaire/TEXT/Questionnaire_Text_Artificial_Communications.json',
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/dev_branch/artificial_questionnaire/TEXT/Questionnaire_Text_Artificial_HeavyIndustries.json',
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/dev_branch/artificial_questionnaire/TEXT/Questionnaire_Text_Artificial_CustomerSupport.json',
    'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/dev_branch/artificial_questionnaire/TEXT/Questionnaire_Text_Artificial_Convention.json'
]

dfs = []

for question in questions:
    response = requests.get(question)
    data = response.json()

    #Create a DataFrame from the JSON data

    df_temp = pd.DataFrame(data)
    df_temp['options'] = df_temp['options'].apply(lambda x: [item['option'] for item in x])
    dfs.append(df_temp)

df_TEXT = pd.concat(dfs, ignore_index=True)

df_for_csv = df_TEXT.copy()
df_for_csv['options'] = df_for_csv['options'].apply(lambda x: '; '.join(x))
df_for_csv.to_csv('TEXT_combined.csv', index=False)

df_TEXT

# Answer Generation

### MULTI

In [None]:
import time
from google.api_core import exceptions

df_MULTI_with_answers = df_MULTI.iloc[100:190].copy()

# Generate a single compact prompt for all variations
def generate_minimal_prompt(row):
    return f"""Question: {row['question']}
            Options: {', '.join(row['options'])}

            Provide 5 different responses to this multiple choice question. Each response should select from the given options. Chose a different
            cobination of options (or single option) for each response. Each answer (V, C, Q, E, A) is to be given by a seperate person.
            Each label (V_Sel, C_Sel, Q_Sel, E_Sel, A_Sel) should contain the options corresponding to the response.
            Format exactly as follows:
            V: [verbose response, not exceeding 3 sentences]
            V_Sel: [options corresponding to the response V]
            C: [concise response]
            C_Sel: [options corresponding to the response C]
            Q: [colloquial response, no 'honestly']
            Q_Sel: [options corresponding to the response Q]
            E: [explanatory response, not exceeding 3 sentences]
            E_Sel: [options corresponding to the response E]
            A: [mildly annoyed response, no 'ugh']
            A_Sel: [options corresponding to the response A]"""

# Define the prefixes for each response type
prefixes = {
    'verbose': 'V:',
    'verbose_Gemini_label': 'V_Sel: ',
    'concise': 'C:',
    'concise_Gemini_label': 'C_Sel: ',
    'colloquial': 'Q:',
    'colloquial_Gemini_label': 'Q_Sel: ',
    'explanatory': 'E:',
    'explanatory_Gemini_label': 'E_Sel: ',
    'annoyed': 'A:',
    'annoyed_Gemini_label': 'A_Sel: '
}

# Parse the response text into separate variations
def parse_responses(text):
    answers = {}
    lines = text.split('\n')

    # Extract each response type
    for key, prefix in prefixes.items():
        try:
            # Find the line starting with this prefix
            response_line = next((line for line in lines if line.strip().startswith(prefix)), '')
            # Remove the prefix and trim
            response = response_line.replace(prefix, '', 1).strip()
            answers[f"answer_{key}"] = response if response else f"Error: No {key} response found"
        except Exception as e:
            answers[f"answer_{key}"] = f"Error parsing {key} response: {str(e)}"

    return answers

# Process a single row with retries
def process_row(row):
    prompt = generate_minimal_prompt(row)
    for attempt in range(3):
        try:
            response = model.generate_content(prompt)
            answers = parse_responses(response.text)
            time.sleep(3)
            return answers
        except exceptions.TooManyRequests:
            if attempt < 2:
                time.sleep(2 ** attempt)
            else:
                return {f"answer_{key}": "Error: Rate limit exceeded."
                       for key in prefixes}
        except Exception as e:
            print(f"Generated text was:\n{response.text}\n")  # Print the response text
            print(f"Error: {str(e)}")
            return {f"answer_{key}": f"Error: {str(e)}"
                  for key in prefixes}

# Process the dataframe
answer_columns = df_MULTI_with_answers.apply(process_row, axis=1)

# Add columns to dataframe
for key in prefixes:
    df_MULTI_with_answers[f"answer_{key}"] = answer_columns.apply(lambda x: x[f"answer_{key}"])

# Save to CSV
df_for_csv_MULTI_answer = df_MULTI_with_answers.copy()
df_for_csv_MULTI_answer['options'] = df_for_csv_MULTI_answer['options'].apply(lambda x: '; '.join(x))
df_for_csv_MULTI_answer.to_csv('MULTI_answer_combined.csv', index=False, sep='>')

df_MULTI_with_answers

In [None]:
df_0_100 = pd.read_csv('MULTI_answer_combined_0-100.csv', sep = '>')
df_101_189 = pd.read_csv('MULTI_answer_combined_101-189.csv', sep = '>')

df_MULTI_with_answers_total = pd.concat([df_0_100, df_101_189], ignore_index=True)
df_MULTI_with_answers_total.to_csv('MULTI_answer_combined_total.csv', index=False, sep='>')


In [None]:
import pandas as pd

# Step 1: Melt the dataframe to combine all answer columns into one column
df_long = df_MULTI_with_answers_total.melt(
    id_vars=["type", "question", "options"],
    var_name="answer_type",
    value_name="answer_value"
)

# Step 2: Create a new column for the answer type (e.g., verbose, concise, etc.)
df_long["answer_category"] = df_long["answer_type"].str.extract(r"answer_(\w+)")

# Step 3: Separate rows into answers and their labels
answers = df_long[~df_long["answer_type"].str.endswith("_Gemini_label")].copy()
labels = df_long[df_long["answer_type"].str.endswith("_Gemini_label")].copy()

# Step 4: Add a matching key column to facilitate merging
answers["label_type"] = answers["answer_type"] + "_Gemini_label"

# Step 5: Merge answers with their corresponding labels
df_reshaped = pd.merge(
    answers,
    labels,
    left_on=["type", "question", "options", "label_type"],
    right_on=["type", "question", "options", "answer_type"],
    suffixes=("", "_label")
)

# Step 6: Rename columns and keep the desired ones
df_reshaped = df_reshaped.rename(
    columns={"answer_value": "answer", "answer_value_label": "answer_label_GEMINI"}
)[["type", "question", "options", "answer", "answer_label_GEMINI", "answer_category"]]

# Step 7: Sort the data if needed (optional)
df_reshaped_MULTI = df_reshaped.sort_values(by=["type", "question"]).reset_index(drop=True)

# Display the final dataframe
df_reshaped_MULTI.to_csv('MULTI_answer_combined_total_reshaped.csv', index=False, sep='>')
df_reshaped_MULTI


### SINGLE

In [None]:
import time
from google.api_core import exceptions

df_SINGLE_with_answers = df_SINGLE.iloc[101:150].copy()

def generate_minimal_prompt(row):
    return f"""Question: {row['question']}
            Options: {', '.join(row['options'])}
            Provide 5 different responses to this single choice question. Each response should select from the given options. Chose a different
            option for each response (if enough options exist for all 5 responses, otherwise repeat options after having already used all of them).
            Each answer (V, C, Q, E, A) is to be given by a seperate person.
            Each label (V_Sel, C_Sel, Q_Sel, E_Sel, A_Sel) should contain the option corresponding to the response. Format exactly as follows:
            V: [verbose response, not exceeding 3 sentences]
            V_Sel: [option corresponding to the response V]
            C: [concise response]
            C_Sel: [option corresponding to the response C]
            Q: [colloquial response, no 'honestly']
            Q_Sel: [option corresponding to the response Q]
            E: [explanatory response, not exceeding 3 sentences]
            E_Sel: [option corresponding to the response E]
            A: [mildly annoyed response, no 'ugh']
            A_Sel: [option corresponding to the response A]"""

# Define the prefixes for each response type
prefixes = {
    'verbose': 'V:',
    'verbose_Gemini_label': 'V_Sel: ',
    'concise': 'C:',
    'concise_Gemini_label': 'C_Sel: ',
    'colloquial': 'Q:',
    'colloquial_Gemini_label': 'Q_Sel: ',
    'explanatory': 'E:',
    'explanatory_Gemini_label': 'E_Sel: ',
    'annoyed': 'A:',
    'annoyed_Gemini_label': 'A_Sel: '
}

def parse_responses(text):
    answers = {}
    lines = text.split('\n')
    for key, prefix in prefixes.items():
        try:
            for line in lines:
                if line.startswith(prefix):
                    line = line.removeprefix(prefix)
                    answers[f"answer_{key}"] = line if line else f"Error: No {key} response found"
        except Exception as e:
                answers[f"answer_{key}"] = f"Error parsing {key} response: {str(e)}"

    return answers

def process_row(row):
    prompt = generate_minimal_prompt(row)
    for attempt in range(3):
        try:
            response = model.generate_content(prompt)
            answers = parse_responses(response.text)
            time.sleep(3)
            return answers
        except exceptions.TooManyRequests:
            if attempt < 2:
                time.sleep(2 ** attempt)
            else:
                return {f"answer_{key}": "Error: Rate limit exceeded."
                       for key in prefixes}
        except Exception as e:
            print(f"Generated text was:\n{response.text}\n")  # Print the response text
            print(f"Error: {str(e)}")
            return {f"answer_{key}": f"Error: {str(e)}"
                  for key in prefixes}

answer_columns = df_SINGLE_with_answers.apply(process_row, axis=1)

# Add columns to dataframe
for key in prefixes:
    df_SINGLE_with_answers[f"answer_{key}"] = answer_columns.apply(lambda x: x[f"answer_{key}"])

df_for_csv_SINGLE_answer = df_SINGLE_with_answers.copy()
df_for_csv_SINGLE_answer['options'] = df_for_csv_SINGLE_answer['options'].apply(lambda x: '; '.join(x))
df_for_csv_SINGLE_answer.to_csv('SINGLE_answer_combined.csv', index=False, sep='>')

df_SINGLE_with_answers

In [None]:
df_0_100 = pd.read_csv('SINGLE_answer_combined_0-100.csv', sep = '>')
df_101_189 = pd.read_csv('SINGLE_answer_combined_101-149.csv', sep = '>')

df_SINGLE_with_answers_total = pd.concat([df_0_100, df_101_189], ignore_index=True)
df_SINGLE_with_answers_total.to_csv('SINGLE_answer_combined_total.csv', index=False, sep='>')

In [None]:
import pandas as pd

# Step 1: Melt the dataframe to combine all answer columns into one column
df_long = df_SINGLE_with_answers_total.melt(
    id_vars=["type", "question", "options"],
    var_name="answer_type",
    value_name="answer_value"
)

# Step 2: Create a new column for the answer type (e.g., verbose, concise, etc.)
df_long["answer_category"] = df_long["answer_type"].str.extract(r"answer_(\w+)")

# Step 3: Separate rows into answers and their labels
answers = df_long[~df_long["answer_type"].str.endswith("_Gemini_label")].copy()
labels = df_long[df_long["answer_type"].str.endswith("_Gemini_label")].copy()

# Step 4: Add a matching key column to facilitate merging
answers["label_type"] = answers["answer_type"] + "_Gemini_label"

# Step 5: Merge answers with their corresponding labels
df_reshaped = pd.merge(
    answers,
    labels,
    left_on=["type", "question", "options", "label_type"],
    right_on=["type", "question", "options", "answer_type"],
    suffixes=("", "_label")
)

# Step 6: Rename columns and keep the desired ones
df_reshaped = df_reshaped.rename(
    columns={"answer_value": "answer", "answer_value_label": "answer_label_GEMINI"}
)[["type", "question", "options", "answer", "answer_label_GEMINI", "answer_category"]]

# Step 7: Sort the data if needed (optional)
df_reshaped_SINGLE = df_reshaped.sort_values(by=["type", "question"]).reset_index(drop=True)

# Display the final dataframe
df_reshaped_SINGLE.to_csv('SINGLE_answer_combined_total_reshaped.csv', index=False, sep='>')
df_reshaped_SINGLE

### NUMBER

In [None]:
import time
from google.api_core import exceptions

df_NUMBER_with_answers = df_NUMBER.copy()

def generate_minimal_prompt(row):
    return f"""Question: {row['question']}
            Options: {row['options']}
            Provide 5 different responses to this single choice question, which asks for a number. The question provides a single option defining the type of number to be chosen.
            Each response should adhere to the given type but provide a unique answer. Some answers should be written as numbers, some such that the numbers are spelled out.
            Each answer (V, C, Q, E, A) is to be given by a separate person.
            Each label (V_Sel, C_Sel, Q_Sel, E_Sel, A_Sel) should contain just the number (with correct units) corresponding to the response. Format exactly as follows:
            V: [verbose response, not exceeding 3 sentences]
            V_Sel: [number corresponding to the response V]
            C: [concise response]
            C_Sel: [number corresponding to the response C]
            Q: [colloquial response, no 'honestly']
            Q_Sel: [number corresponding to the response Q]
            E: [explanatory response, not exceeding 3 sentences]
            E_Sel: [number corresponding to the response E]
            A: [mildly annoyed response, no 'ugh']
            A_Sel: [number corresponding to the response A]"""

# Define the prefixes for each response type
prefixes = {
    'verbose': 'V:',
    'verbose_Gemini_label': 'V_Sel: ',
    'concise': 'C:',
    'concise_Gemini_label': 'C_Sel: ',
    'colloquial': 'Q:',
    'colloquial_Gemini_label': 'Q_Sel: ',
    'explanatory': 'E:',
    'explanatory_Gemini_label': 'E_Sel: ',
    'annoyed': 'A:',
    'annoyed_Gemini_label': 'A_Sel: '
}

def parse_responses(text):
    answers = {}
    lines = text.split('\n')
    for key, prefix in prefixes.items():
        try:
            for line in lines:
                if line.startswith(prefix):
                    line = line.removeprefix(prefix)
                    answers[f"answer_{key}"] = line if line else f"Error: No {key} response found"
        except Exception as e:
                answers[f"answer_{key}"] = f"Error parsing {key} response: {str(e)}"

    return answers

def process_row(row):
    prompt = generate_minimal_prompt(row)
    for attempt in range(3):
        try:
            response = model.generate_content(prompt)
            answers = parse_responses(response.text)
            time.sleep(3)
            return answers
        except exceptions.TooManyRequests:
            if attempt < 2:
                time.sleep(2 ** attempt)
            else:
                return {f"answer_{key}": "Error: Rate limit exceeded."
                       for key in prefixes}
        except Exception as e:
            print(f"Generated text was:\n{response.text}\n")  # Print the response text
            print(f"Error: {str(e)}")
            return {f"answer_{key}": f"Error: {str(e)}"
                  for key in prefixes}

answer_columns = df_NUMBER_with_answers.apply(process_row, axis=1)

for key in prefixes:
    df_NUMBER_with_answers[f"answer_{key}"] = answer_columns.apply(lambda x: x[f"answer_{key}"])

df_for_csv_NUMBER_answer = df_NUMBER_with_answers.copy()
df_for_csv_NUMBER_answer['options'] = df_for_csv_NUMBER_answer['options'].apply(lambda x: '; '.join(x))
df_for_csv_NUMBER_answer.to_csv('NUMBER_answer_combined.csv', index=False, sep='>')

df_NUMBER_with_answers

In [None]:
import pandas as pd

# Convert options to strings before melting
df_NUMBER_with_answers['options'] = df_NUMBER_with_answers['options'].astype(str)

# Step 1: Melt the dataframe to combine all answer columns into one column
df_long = df_NUMBER_with_answers.melt(
    id_vars=["type", "question", "options"],
    var_name="answer_type",
    value_name="answer_value"
)


# Step 2: Create a new column for the answer type (e.g., verbose, concise, etc.)
df_long["answer_category"] = df_long["answer_type"].str.extract(r"answer_(\w+)")

# Step 3: Separate rows into answers and their labels
answers = df_long[~df_long["answer_type"].str.endswith("_Gemini_label")].copy()
labels = df_long[df_long["answer_type"].str.endswith("_Gemini_label")].copy()

# Step 4: Add a matching key column to facilitate merging
answers["label_type"] = answers["answer_type"] + "_Gemini_label"

# Step 5: Merge answers with their corresponding labels
df_reshaped = pd.merge(
    answers,
    labels,
    left_on=["type", "question", "options", "label_type"],
    right_on=["type", "question", "options", "answer_type"],
    suffixes=("", "_label")
)

# Step 6: Rename columns and keep the desired ones
df_reshaped = df_reshaped.rename(
    columns={"answer_value": "answer", "answer_value_label": "answer_label_GEMINI"}
)[["type", "question", "options", "answer", "answer_label_GEMINI", "answer_category"]]

# Step 7: Sort the data if needed (optional)
df_reshaped_NUMBER = df_reshaped.sort_values(by=["type", "question"]).reset_index(drop=True)

# Display the final dataframe
df_reshaped_NUMBER.to_csv('NUMBER_answer_combined_total_reshaped.csv', index=False, sep='>')
df_reshaped_NUMBER

### DATE

In [None]:
import time
from google.api_core import exceptions

df_DATE_with_answers = df_DATE.copy()

def generate_minimal_prompt(row):
    return f"""Question: {row['question']}
            Options: {row['options']}
            Provide 5 different responses to this single choice question, which asks for a date. The question provides one option defining the type to be chosen.
            Each response should adhere to the given type but provide a unique answer. Some answers should be written as numbers, some such that the numbers are spelled out. Use different date formats.
            Assume question takers are from Europe.
            Each answer (V, C, Q, E, A) is to be given by a separate person.
            Each label (V_Sel, C_Sel, Q_Sel, E_Sel, A_Sel) should contain just the date corresponding to the response. Use this date format: dd-mm-yyyy and nothing else for the label.
            Format exactly as follows:
            V: [verbose response, not exceeding 3 sentences]
            V_Sel: [date corresponding to the response V]
            C: [concise response]
            C_Sel: [date corresponding to the response C]
            Q: [colloquial response, no 'honestly']
            Q_Sel: [date corresponding to the response Q]
            E: [explanatory response, not exceeding 3 sentences]
            E_Sel: [date corresponding to the response E]
            A: [mildly annoyed response, no 'ugh']
            A_Sel: [date corresponding to the response A]"""

# Define the prefixes for each response type
prefixes = {
    'verbose': 'V:',
    'verbose_Gemini_label': 'V_Sel: ',
    'concise': 'C:',
    'concise_Gemini_label': 'C_Sel: ',
    'colloquial': 'Q:',
    'colloquial_Gemini_label': 'Q_Sel: ',
    'explanatory': 'E:',
    'explanatory_Gemini_label': 'E_Sel: ',
    'annoyed': 'A:',
    'annoyed_Gemini_label': 'A_Sel: '
}

def parse_responses(text):
    answers = {}
    lines = text.split('\n')
    for key, prefix in prefixes.items():
        try:
            for line in lines:
                if line.startswith(prefix):
                    line = line.removeprefix(prefix)
                    answers[f"answer_{key}"] = line if line else f"Error: No {key} response found"
        except Exception as e:
                answers[f"answer_{key}"] = f"Error parsing {key} response: {str(e)}"

    return answers

def process_row(row):
    prompt = generate_minimal_prompt(row)
    for attempt in range(3):
        try:
            response = model.generate_content(prompt)
            answers = parse_responses(response.text)
            time.sleep(3)
            return answers
        except exceptions.TooManyRequests:
            if attempt < 2:
                time.sleep(2 ** attempt)
            else:
                return {f"answer_{key}": "Error: Rate limit exceeded."
                       for key in prefixes}
        except Exception as e:
            print(f"Generated text was:\n{response.text}\n")  # Print the response text
            print(f"Error: {str(e)}")
            return {f"answer_{key}": f"Error: {str(e)}"
                  for key in prefixes}

answer_columns = df_DATE_with_answers.apply(process_row, axis=1)

for key in prefixes:
    df_DATE_with_answers[f"answer_{key}"] = answer_columns.apply(lambda x: x[f"answer_{key}"])

df_for_csv_DATE_answer = df_DATE_with_answers.copy()
df_for_csv_DATE_answer['options'] = df_for_csv_DATE_answer['options'].apply(lambda x: '; '.join(x))
df_for_csv_DATE_answer.to_csv('DATE_answer_combined.csv', index=False, sep='>')

df_DATE_with_answers

In [None]:
import pandas as pd

# Convert options to strings before melting
df_DATE_with_answers['options'] = df_DATE_with_answers['options'].astype(str)

# Step 1: Melt the dataframe to combine all answer columns into one column
df_long = df_DATE_with_answers.melt(
    id_vars=["type", "question", "options"],
    var_name="answer_type",
    value_name="answer_value"
)


# Step 2: Create a new column for the answer type (e.g., verbose, concise, etc.)
df_long["answer_category"] = df_long["answer_type"].str.extract(r"answer_(\w+)")

# Step 3: Separate rows into answers and their labels
answers = df_long[~df_long["answer_type"].str.endswith("_Gemini_label")].copy()
labels = df_long[df_long["answer_type"].str.endswith("_Gemini_label")].copy()

# Step 4: Add a matching key column to facilitate merging
answers["label_type"] = answers["answer_type"] + "_Gemini_label"

# Step 5: Merge answers with their corresponding labels
df_reshaped = pd.merge(
    answers,
    labels,
    left_on=["type", "question", "options", "label_type"],
    right_on=["type", "question", "options", "answer_type"],
    suffixes=("", "_label")
)

# Step 6: Rename columns and keep the desired ones
df_reshaped = df_reshaped.rename(
    columns={"answer_value": "answer", "answer_value_label": "answer_label_GEMINI"}
)[["type", "question", "options", "answer", "answer_label_GEMINI", "answer_category"]]

# Step 7: Sort the data if needed (optional)
df_reshaped_DATE = df_reshaped.sort_values(by=["type", "question"]).reset_index(drop=True)

# Display the final dataframe
df_reshaped_DATE.to_csv('DATE_answer_combined_total_reshaped.csv', index=False, sep='>')
df_reshaped_DATE

### TEXT

In [None]:
import time
from google.api_core import exceptions

df_TEXT_with_answers = df_TEXT.copy()

def generate_minimal_prompt(row):
    return f"""Question: {row['question']}
            Options: {row['options']}
            Provide 5 different responses to this single choice question, which asks for a text. The question provides a single option defining the type of answer to be chosen.
            Each response should adhere to the given type but provide a unique answer. The questions are open ended, therfore your answer should replicate a human's answer to such a question.
            Each answer (V, C, Q, E, A) is to be given by a separate person.
            Each label (V_Sel, C_Sel, Q_Sel, E_Sel, A_Sel) should contain the shortest possible topic summery.
            Format exactly as follows:
            V: [verbose response, not exceeding 3 sentences]
            V_Sel: [summery corresponding to the response V]
            C: [concise response]
            C_Sel: [summery corresponding to the response C]
            Q: [colloquial response, no 'honestly']
            Q_Sel: [summery corresponding to the response Q]
            E: [explanatory response, not exceeding 3 sentences]
            E_Sel: [summery corresponding to the response E]
            A: [mildly annoyed response, no 'ugh']
            A_Sel: [summery corresponding to the response A]"""

# Define the prefixes for each response type
prefixes = {
    'verbose': 'V:',
    'verbose_Gemini_label': 'V_Sel: ',
    'concise': 'C:',
    'concise_Gemini_label': 'C_Sel: ',
    'colloquial': 'Q:',
    'colloquial_Gemini_label': 'Q_Sel: ',
    'explanatory': 'E:',
    'explanatory_Gemini_label': 'E_Sel: ',
    'annoyed': 'A:',
    'annoyed_Gemini_label': 'A_Sel: '
}

def parse_responses(text):
    answers = {}
    lines = text.split('\n')
    for key, prefix in prefixes.items():
        try:
            for line in lines:
                if line.startswith(prefix):
                    line = line.removeprefix(prefix)
                    answers[f"answer_{key}"] = line if line else f"Error: No {key} response found"
        except Exception as e:
                answers[f"answer_{key}"] = f"Error parsing {key} response: {str(e)}"

    return answers

def process_row(row):
    prompt = generate_minimal_prompt(row)
    for attempt in range(3):
        try:
            response = model.generate_content(prompt)
            answers = parse_responses(response.text)
            time.sleep(3)
            return answers
        except exceptions.TooManyRequests:
            if attempt < 2:
                time.sleep(2 ** attempt)
            else:
                return {f"answer_{key}": "Error: Rate limit exceeded."
                       for key in prefixes}
        except Exception as e:
            print(f"Generated text was:\n{response.text}\n")  # Print the response text
            print(f"Error: {str(e)}")
            return {f"answer_{key}": f"Error: {str(e)}"
                  for key in prefixes}

answer_columns = df_TEXT_with_answers.apply(process_row, axis=1)

for key in prefixes:
    df_TEXT_with_answers[f"answer_{key}"] = answer_columns.apply(lambda x: x[f"answer_{key}"])

df_for_csv_TEXT_answer = df_TEXT_with_answers.copy()
df_for_csv_TEXT_answer['options'] = df_for_csv_TEXT_answer['options'].apply(lambda x: '; '.join(x))
df_for_csv_TEXT_answer.to_csv('TEXT_answer_combined.csv', index=False, sep='>')

df_TEXT_with_answers

In [None]:
import pandas as pd

# Convert options to strings before melting
df_TEXT_with_answers['options'] = df_TEXT_with_answers['options'].astype(str)

# Step 1: Melt the dataframe to combine all answer columns into one column
df_long = df_TEXT_with_answers.melt(
    id_vars=["type", "question", "options"],
    var_name="answer_type",
    value_name="answer_value"
)


# Step 2: Create a new column for the answer type (e.g., verbose, concise, etc.)
df_long["answer_category"] = df_long["answer_type"].str.extract(r"answer_(\w+)")

# Step 3: Separate rows into answers and their labels
answers = df_long[~df_long["answer_type"].str.endswith("_Gemini_label")].copy()
labels = df_long[df_long["answer_type"].str.endswith("_Gemini_label")].copy()

# Step 4: Add a matching key column to facilitate merging
answers["label_type"] = answers["answer_type"] + "_Gemini_label"

# Step 5: Merge answers with their corresponding labels
df_reshaped = pd.merge(
    answers,
    labels,
    left_on=["type", "question", "options", "label_type"],
    right_on=["type", "question", "options", "answer_type"],
    suffixes=("", "_label")
)

# Step 6: Rename columns and keep the desired ones
df_reshaped = df_reshaped.rename(
    columns={"answer_value": "answer", "answer_value_label": "answer_label_GEMINI"}
)[["type", "question", "options", "answer", "answer_label_GEMINI", "answer_category"]]

# Step 7: Sort the data if needed (optional)
df_reshaped_TEXT = df_reshaped.sort_values(by=["type", "question"]).reset_index(drop=True)

# Display the final dataframe
df_reshaped_TEXT.to_csv('TEXT_answer_combined_total_reshaped.csv', index=False, sep='>')
df_reshaped_TEXT

# Merge all CSV Files

In [None]:
import glob

# list all csv files only
csv_files = ['TEXT_answer_combined_total_reshaped.csv', 'DATE_answer_combined_total_reshaped.csv', 'NUMBER_answer_combined_total_reshaped.csv', 'SINGLE_answer_combined_total_reshaped.csv', 'MULTI_answer_combined_total_reshaped.csv']

import pandas as pd

# Create an empty list to store each DataFrame
dfs = []

# Read each CSV file and append to the list
for file in csv_files:
    df = pd.read_csv(file, sep='>')
    dfs.append(df)

# Concatenate all DataFrames
merged_df = pd.concat(dfs, ignore_index=True)

# Save the merged DataFrame to a new CSV
merged_df.to_csv('all_answers_combined_reshaped.csv', index=False, sep='>')


In [None]:
df_all = pd.read_csv('all_answers_combined_reshaped.csv', sep='>')
df_all

In [None]:
df_all['options'] = df_all['options'].str.replace(r"\['|'\]", "", regex=True)

In [None]:
df_all.to_csv('all_answers_combined_reshaped.csv', index=False, sep='>')

# Clean Up CSV

In [None]:
# Remove all rows containing quotation marks

import csv
import requests

def clean_csv_from_github(url, output_file):
    """
    Reads a CSV file from a GitHub raw URL, removes rows containing quotation marks,
    and saves the cleaned data to a new CSV file.

    Parameters:
        url (str): The GitHub raw URL of the CSV file.
        output_file (str): The file path to save the cleaned CSV.
    """
    try:
        # Fetch the CSV content from the GitHub raw URL
        response = requests.get(url)
        response.raise_for_status()  # Ensure the request was successful

        # Decode the content and split into lines
        csv_content = response.text
        csv_lines = csv_content.splitlines()

        # Read the CSV into memory
        reader = csv.reader(csv_lines, delimiter='>')
        cleaned_rows = []

        for row in reader:
            # Remove rows containing quotation marks
            if not any('"' in field for field in row):
                cleaned_rows.append(row)

        # Write the cleaned rows to a new CSV file
        with open(output_file, 'w', encoding='utf-8', newline='') as outfile:
            writer = csv.writer(outfile, delimiter='>')
            writer.writerows(cleaned_rows)

        print(f"Cleaned CSV saved to {output_file}")

    except requests.exceptions.RequestException as e:
        print(f"Error fetching the CSV file: {e}")
    except Exception as e:
        print(f"An error occurred: {e}")

url = 'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/dev_branch/artificial_answers/all_answers_combined_reshaped.csv'
output_file = 'cleaned_all_answers_combined_reshaped.csv'
clean_csv_from_github(url, output_file)

Cleaned CSV saved to cleaned_all_answers_combined_reshaped.csv


# Adding Additional Labels

In [1]:
import pandas as pd
from transformers import pipeline

# Initialize the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")


def load_csv_method1(url):
    df = pd.read_csv(url, sep='>', names=['type', 'question', 'options', 'answer', 'answer_label_GEMINI', 'answer_category'], converters={'options': lambda x: x.split(';')})
    return df

url = 'https://raw.githubusercontent.com/Penguinbeanie/Capstone-Project/refs/heads/dev_branch/artificial_answers/cleaned_all_answers_combined_reshaped.csv'
df_full = load_csv_method1(url)
df_MULTI_full = df_full[df_full['type'] == 'MULTI-SELECT']
df_MULTI = df_MULTI_full.copy()

print(df_MULTI)


# Function to classify each row
def classify_row(row):
    result = classifier(row["answer"], candidate_labels=row["options"], multi_label=True)
    result_list = list(zip(result["labels"], result["scores"]))
    filtered_list = [label for label, score in result_list if score > 0.5]
    print(filtered_list)

    return filtered_list


# Apply the classification to each row

df_MULTI["answer_label_BART_large_mnli"] = df_MULTI.apply(lambda row: classify_row(row), axis=1)

# Display the DataFrame
df_MULTI

# drop all rows where type == MULTI-SELECT
df_full = df_full.drop(df_full[df_full['type'] == 'MULTI-SELECT'].index)

# append df_MULTI to df_full
df_full = pd.concat([df_full, df_MULTI])


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cuda:0


              type                                       question  \
3529  MULTI-SELECT         What algorithms are you familiar with?   
3530  MULTI-SELECT         What algorithms are you familiar with?   
3531  MULTI-SELECT         What algorithms are you familiar with?   
3532  MULTI-SELECT         What algorithms are you familiar with?   
3533  MULTI-SELECT         What algorithms are you familiar with?   
...            ...                                            ...   
4446  MULTI-SELECT  Which operating systems do you use regularly?   
4447  MULTI-SELECT  Which operating systems do you use regularly?   
4448  MULTI-SELECT  Which operating systems do you use regularly?   
4449  MULTI-SELECT  Which operating systems do you use regularly?   
4450  MULTI-SELECT  Which operating systems do you use regularly?   

                                                options  \
3529  [Sorting algorithms,  Searching algorithms,  G...   
3530  [Sorting algorithms,  Searching algorithms,  G.

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


[' Regulations', ' Economic conditions']
[' Need', ' Industry', 'Company size', ' Budget', ' Authority']
[' Need', ' Budget']
[' Need', ' Budget', ' Industry']
[' Need', ' Budget', ' Authority']
[' Budget', ' Need']
[' Conversion rate', ' Customer lifetime value (CLTV)', 'Revenue']
['Revenue', ' Conversion rate']
['Revenue', ' Conversion rate']
[' Customer acquisition cost (CAC)', ' Average deal size', 'Revenue']
[' Average deal size', 'Revenue']
[' High customer acquisition costs', 'Lack of leads']
[' High customer acquisition costs', ' Competition']
['Lack of leads', ' Low conversion rates', ' High customer acquisition costs']
[' Low conversion rates', ' Competition']
[' Lack of resources', ' Competition', 'Lack of leads', ' Low conversion rates', ' High customer acquisition costs']
['Objections from prospects', ' Price sensitivity', ' Long sales cycles', ' Competition']
[' Long sales cycles', ' Price sensitivity']
[' Long sales cycles', ' Price sensitivity', ' Competition']
['Object

KeyboardInterrupt: 

In [5]:
df_full

Unnamed: 0,type,question,options,answer,answer_label_GEMINI,answer_category,predicted_options
0,type,question,[options],answer,answer_label_GEMINI,answer_category,
1,TEXT,Describe any areas where cost savings could be...,[Text],Significant cost savings could be realized by...,"Supplier negotiation, energy efficiency, proce...",verbose,
2,TEXT,Describe any areas where cost savings could be...,[Text],"Reduce supplier costs, improve energy efficie...",Cost reduction strategies,concise,
3,TEXT,Describe any areas where cost savings could be...,[Text],"Dude, we could totally save a ton of cash by...","Supplier negotiation, energy saving, waste red...",colloquial,
4,TEXT,Describe any areas where cost savings could be...,[Text],Cost savings can be achieved through a multi-...,"Contract review, energy efficiency, workflow o...",explanatory,
...,...,...,...,...,...,...,...
4446,MULTI-SELECT,Which operating systems do you use regularly?,"[Windows, macOS, Linux, Android, iOS]","I primarily use Windows for work, as it's the ...","Windows, macOS, Linux",verbose,"[Windows, macOS]"
4447,MULTI-SELECT,Which operating systems do you use regularly?,"[Windows, macOS, Linux, Android, iOS]","Windows, Android.","Windows, Android",concise,[Windows]
4448,MULTI-SELECT,Which operating systems do you use regularly?,"[Windows, macOS, Linux, Android, iOS]","I'm all about Windows and Android, that's my jam.","Windows, Android",colloquial,"[Windows, Android]"
4449,MULTI-SELECT,Which operating systems do you use regularly?,"[Windows, macOS, Linux, Android, iOS]",My daily driver is Windows 10 for its compatib...,"Windows, iOS",explanatory,"[Windows, iOS]"
