In [18]:
import os
import pandas as pd
import re
from openai import OpenAI
import json
import time

In [19]:
OPENAI_API_KEY = "<key>"
NVIDIA_API_KEY = "<key>"
DATA_PATH = "distilled_df.pkl"
RESULT_PATH = 'results_ds_7b_small_v2'
model = "deepseek-ai/deepseek-r1-distill-qwen-7b"

In [13]:
distilled_qa_df = pd.read_pickle(DATA_PATH)

In [5]:
print(distilled_qa_df.columns)

Index(['question_id', 'question', 'explanation', 'options', 'expected_answer',
       'type', 'domain', 'difficulty_level', 'tags', 'q_type'],
      dtype='object')


In [7]:
grouped_responses = {}

## **DeepSeek-R1**

In [14]:
client = OpenAI(
    base_url = "https://integrate.api.nvidia.com/v1",
    api_key = NVIDIA_API_KEY
)

In [None]:
curr_question = "Imagine a venture capitalist who opts for a startup offering a stable return of 0.06 instead of investing in a technology company projected to yield 0.44 with a probability of 47.94% or 0.00 with a probability of 52.06%. Assume that the person doesn't want to have inconsistency in his behavior, facing new investment options, which should the venture capitalist select:"

curr_options = "[Option 1: A biotech startup with a potential return of 0.06 with a probability of 27.64% and 0 with a probability of 72.36%, Option 2: A renewable energy startup with a projected return of 43 with a probability of 52.06% and 0 with a probability of 47.94%]"

prompt = (
        "<think>\n"
        f"Question: {curr_question}\n"
        f"Options: {curr_options}\n\n"
        "Choose the correct option and explain your reasoning. "
        "Please reason step by step, and put your final answer within \\boxed{}."
    )

response = client.chat.completions.create(
    model=model,
    messages=[{"role": "user", "content": prompt}],
    temperature=0.6,
)

print(response)

In [16]:
def process_question(row):
    """Process a single question and return its result dictionary."""
    prompt = (
        f"Question: {row['question']}\n"
        f"Options: {row['options']}\n\n"
        "Choose the correct option and explain your reasoning. "
        "Please reason step by step, and put your final answer within \\boxed{}."
        "<think>\n"
    )

    time.sleep(0.2)

    try:
        response = client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            temperature=0.6,
        )
    except Exception as e:
        print(f"Error calling the language model for question {row['question_id']}: {e}")
        return None

    print(response, row['question_id'])

    try:
        if hasattr(response, 'choices'):
            raw_answer = response.choices[0].message.content
        else:
            print("No 'choices' attribute in the response.")
            return None
    except Exception as e:
        print(f"Error extracting raw answer for question {row['question_id']}: {e}")
        return None

    if '</think>' in raw_answer:
        parts = raw_answer.split('</think>')
        chain_of_thought = parts[0].strip()
        final_answer = parts[1].strip()
    else:
        chain_of_thought = ""
        final_answer = raw_answer

    boxed_match = re.search(r"\\boxed\{(.*?)\}", final_answer)
    correct_response = boxed_match.group(1).strip() if boxed_match else None

    return {
        'question_id': row['question_id'],
        'question': row['question'],
        'options': row['options'],
        'expected_answer': row['expected_answer'],
        'q_type': row['q_type'],
        'chain_of_thought': chain_of_thought,
        'final_answer': final_answer,
        'correct_response': correct_response
    }

In [17]:
TARGET = 10

for q_type, group in distilled_qa_df.groupby('q_type'):
    # keep only rows with an expected answer
    valid = group[group['expected_answer'].notna()]

    # randomly pick up to TARGET of them
    if len(valid) >= TARGET:
        sampled = valid.sample(n=TARGET, random_state=42)
    else:
        sampled = valid.copy()  # fewer than TARGET available

    print(f'Processing {q_type}: sampling {len(sampled)} of {len(valid)} valid rows')

    responses = []
    for _, row in sampled.iterrows():
        result = process_question(row)
        if result is not None:
            responses.append(result)

    print(f'Collected {len(responses)} valid responses for q_type: {q_type}')

    # write out immediately
    df_out  = pd.DataFrame(responses)
    csv_path = os.path.join(RESULT_PATH, f"{q_type}_responses.csv")
    df_out.to_csv(csv_path, index=False)
    print(f"Saved CSV for {q_type} to {csv_path}")

Processing add_sub: sampling 10 of 240 valid rows
ChatCompletion(id='chat-b1fd5cd3af014044b4c8ed3ae7899222', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="First, I need to subtract $36 from $63 to determine how much money is left after spending on clothes.\n\nI'll start by aligning the numbers by their place values:\n```\n  63\n- 36\n```\n\nNext, I'll subtract the units place: 3 minus 6. Since 3 is smaller than 6, I'll borrow 1 from the tens place, making it 13 minus 6, which equals 7.\n\nThen, I'll subtract the tens place: after borrowing, the tens place becomes 5 minus 3, which equals 2.\n\nCombining the results, the final answer is $27.\n</think>\n\nSure, let's solve the subtraction problem step by step.\n\n**Problem:**\nCalculate \\( 63 - 36 \\).\n\n**Solution:**\n\n1. **Align the numbers by their place values:**\n   \\[\n   \\begin{array}{r}\n     63 \\\\\n   - 36 \\\\\n   \\hline\n   \\end{array}\n   \\]\n\n2. **Subtract the 

KeyboardInterrupt: 

In [None]:
print(json.dumps(grouped_responses, indent=4))

## **Save the results**

In [12]:
os.makedirs(RESULT_PATH, exist_ok=True)
for q_type, responses in grouped_responses.items():
    df = pd.DataFrame(responses)
    csv_filename = f"responses_{q_type}.csv"
    csv_file_path = os.path.join(RESULT_PATH, csv_filename)
    df.to_csv(csv_file_path, index=False)
    print(f"Saved responses for q_type '{q_type}' to {csv_file_path}")

Saved responses for q_type 'add_sub' to results_ds_7b_nvidia\responses_add_sub.csv
Saved responses for q_type 'ambiguity_aversion' to results_ds_7b_nvidia\responses_ambiguity_aversion.csv
Saved responses for q_type 'auctions_risk' to results_ds_7b_nvidia\responses_auctions_risk.csv
Saved responses for q_type 'backward_induction' to results_ds_7b_nvidia\responses_backward_induction.csv
Saved responses for q_type 'bayes_nash' to results_ds_7b_nvidia\responses_bayes_nash.csv
Saved responses for q_type 'bayes_rule' to results_ds_7b_nvidia\responses_bayes_rule.csv
Saved responses for q_type 'best_response' to results_ds_7b_nvidia\responses_best_response.csv
Saved responses for q_type 'borda_count' to results_ds_7b_nvidia\responses_borda_count.csv
Saved responses for q_type 'budget_balance' to results_ds_7b_nvidia\responses_budget_balance.csv
Saved responses for q_type 'categorical_syllogism' to results_ds_7b_nvidia\responses_categorical_syllogism.csv
Saved responses for q_type 'certainty_ef

## **Data Reprocessing**

In [20]:
ORIGINAL_DATA_PATH = "elements"
TYPE_TO_SELECT     = "ambiguity_aversion"
base_dir           = os.path.join(ORIGINAL_DATA_PATH, TYPE_TO_SELECT)

In [21]:
questions      = pd.read_pickle(os.path.join(base_dir, "questions.pkl"))
options        = pd.read_pickle(os.path.join(base_dir, "options.pkl"))
answers        = pd.read_pickle(os.path.join(base_dir, "answers.pkl"))
questions_meta = pd.read_pickle(os.path.join(base_dir, "questions_metadata.pkl"))

In [22]:
qid = "0_0"
print("OPTIONS:")
print(options[options.question_id == qid].sort_values("option_id"))
print("\nANSWERS:")
print(answers[answers.question_id == qid])

OPTIONS:
  question_id  option_id                                        option_text
0         0_0          0  Get a dollar if you draw a black marble and 1 ...
1         0_0          1  Get a dollar if you choose a red marble and 1 ...

ANSWERS:
  question_id  option_id  correct
0         0_0          0    False
1         0_0          1     True


In [23]:
# — ensure metadata is one‑row‑per‑question —
questions_meta = (
    questions_meta
    .drop_duplicates(subset="question_id")
    .set_index("question_id", verify_integrity=True)
    .reset_index()
)

# — build opts with a uniform `correct` boolean column —
if "correct" in answers.columns:
    # answers already has a True/False flag
    opts = options.merge(
        answers[["question_id", "option_id", "correct"]],
        on=["question_id", "option_id"],
        how="left",
    )

elif "correct_answer" in answers.columns and "option_id" in answers.columns:
    # answers gives you a 0/1 flag per option
    opts = options.merge(
        answers[["question_id", "option_id", "correct_answer"]],
        on=["question_id", "option_id"],
        how="left",
    )
    # turn 0/1 → bool
    opts["correct"] = opts["correct_answer"].fillna(0).astype(int).astype(bool)
    opts = opts.drop(columns=["correct_answer"])

else:
    raise KeyError("answers.pkl schema not recognized")

# — cast to pandas’ nullable BooleanDtype & fill any remaining NAs —
opts["correct"] = (
    opts["correct"]
    .astype(pd.BooleanDtype())  # use the new BooleanDtype
    .fillna(False)              # no downcasting warning
)

# — now merge in questions & metadata safely —
full = (
    opts
    .merge(questions,      on="question_id", how="left")
    .merge(questions_meta, on="question_id", how="left")
    .sort_values(["question_id", "option_id"])
)

In [24]:
# — one‑row‑per‑question with formatted options —
def to_wide(group: pd.DataFrame) -> pd.Series:
    group = group.sort_values("option_id")
    formatted_opts = [
        f"Option {i+1}: {txt}"
        for i, txt in enumerate(group["option_text"])
    ]
    # pick out the single correct formatted string
    correct_row = group[group["correct"]]
    correct_opt = (
        formatted_opts[
            int(correct_row["option_id"].iloc[0] - group["option_id"].min())
        ]
        if not correct_row.empty
        else None
    )

    data = pd.Series({
        "question"   : group["question_text"].iat[0],
        "options"         : formatted_opts,
        "expected_answer"  : correct_opt,
        "explanation"     : group["explanation"].iat[0],
        "domain"          : group["domain"].iat[0],
        "difficulty_level": group["difficulty_level"].iat[0],
        "type"            : group["type"].iat[0],
        "q_type"          : TYPE_TO_SELECT
    })

    # only add tags if that column exists
    if "tags" in group.columns:
        data["tags"] = group["tags"].iat[0]

    return data

df_wide = (
    full
    .groupby("question_id")
    .apply(to_wide, include_groups=False)      # same effect
    .reset_index()
)

In [79]:
REVISED_DIR = "revised_elements"

# construct a filename
out_path = os.path.join(REVISED_DIR, f"{TYPE_TO_SELECT}.pkl")

# write the DataFrame out
df_wide.to_pickle(out_path)

print(f"Saved processed DataFrame to {out_path}")

Saved processed DataFrame to revised_elements\enforceability.pkl
