In [None]:
import pandas as pd
import numpy as np
import os
import pickle
from google.colab import drive
import re
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


## Import Dataset

### STEER

In [None]:
DATA_PATH = "/content/drive/MyDrive/CIS6200_Project/all_qa_df.pkl"

In [None]:
all_qa_df = pd.read_pickle(DATA_PATH)
all_qa_df

KeyboardInterrupt: 

In [None]:
print(all_qa_df['q_type'].unique())

In [None]:
target_dist = {
    'enforceability': 300,
    'backward_induction': 300,
    'trigger': 300,
    'feasibility': 200,
    'auctions_risk': 200,
    'endowment_effect': 150,
    'certainty_effect': 150,
    'time_inconsistency': 50,
    'budget_balance': 50,
    'condorcet_criterion': 50,
    'bayes_nash': 50
}

In [None]:
def match_option_text(row):
    for opt in row["options"].split("\n"):
        if isinstance(row["expected_answer"], str) and row["expected_answer"].strip() in opt:
            return opt.strip()
    return None

# Drop rows with critical nulls
df_clean = all_qa_df.dropna(subset=["expected_answer", "question", "options"])

# Match expected_answer to one of the options
df_clean["matched_option"] = df_clean.apply(match_option_text, axis=1)

# Filter only those with successful matches
df_clean = df_clean.dropna(subset=["matched_option"])

# Stratified sampling
sampled_list = []

for q_type, target_count in target_dist.items():
    subset = df_clean[df_clean["q_type"] == q_type]

    if len(subset) < target_count:
        print(f"Warning: {q_type} only has {len(subset)} samples, less than requested {target_count}")
        sample = subset
    else:
        sample = subset.sample(n=target_count, random_state=42)

    sampled_list.append(sample)

# Combine and reset index
final_subset_df = pd.concat(sampled_list).reset_index(drop=True)


In [None]:
df_clean.to_csv('/content/drive/MyDrive/CIS6200_Project/Training_Dataset/STEER_clean.csv', index=False)

In [None]:
final_subset_df

In [None]:
# Drop specified columns
final_subset_df = final_subset_df.drop(columns=['tags', 'type', 'domain', 'explanation', 'question_id'])

# apply row-wise to create a new column

final_subset_df = final_subset_df[
    ['q_type', 'question', 'options', 'expected_answer', 'matched_option', 'difficulty_level']
]

In [None]:
final_subset_df

In [None]:
# Save the DataFrame to a CSV file
final_subset_df.to_csv('/content/drive/MyDrive/CIS6200_Project/Training_Dataset/train_STEER.csv', index=False)


### EconLogicQA

In [None]:
EconLogicQA_dir = "/content/drive/MyDrive/CIS6200_Project/EconLogicQA"

# Convert to pandas DataFrame
EconLogicQA_df = pd.read_csv(f"{EconLogicQA_dir}/train.csv")

# Preview
EconLogicQA_df

In [None]:
# Sample 200 rows randomly from EconLogicQA_df
final_EconLogicQA_df = EconLogicQA_df.sample(n=200, random_state=42)
final_EconLogicQA_df

In [None]:
final_EconLogicQA_df.to_csv('/content/drive/MyDrive/CIS6200_Project/Training_Dataset/train_EconLogicQA.csv', index=False)

### EconNLI

In [None]:
EconNLI_dir = "/content/drive/MyDrive/CIS6200_Project/EconNLI"

### Only test set are human expert exmained
EconNLI_df = pd.read_csv(f"{EconNLI_dir}/EconNLI_test.csv")

EconNLI_df

In [None]:
# Sample 100 rows for EconNLI

final_EconNLI_df = EconNLI_df.sample(n=100, random_state=42)
final_EconNLI_df

In [None]:
final_EconNLI_df.to_csv('/content/drive/MyDrive/CIS6200_Project/Training_Dataset/train_EconNLI.csv', index=False)

## Curation

In [None]:
final_EconLogicQA_df = pd.read_csv('/content/drive/MyDrive/CIS6200_Project/Training_Dataset/train_EconLogicQA.csv')
final_EconNLI_df = pd.read_csv('/content/drive/MyDrive/CIS6200_Project/Training_Dataset/train_EconNLI.csv')
final_STEER_df = pd.read_csv('/content/drive/MyDrive/CIS6200_Project/Training_Dataset/train_STEER.csv')

In [None]:
final_STEER_df

Unnamed: 0,q_type,question,options,expected_answer,matched_option,difficulty_level
0,enforceability,"Two firms, A and B, are involved in a reciproc...",Option 1: Both firms A and B fulfill their ser...,Firm A fulfills the servicing obligations whil...,Option 3: Firm A fulfills the servicing obliga...,7.0
1,enforceability,In a duopoly where two firms have a choice to ...,Option 1: Both firms undercut each other's pri...,Both firms undercut each other's prices in eve...,Option 1: Both firms undercut each other's pri...,7.0
2,enforceability,"Consider a duopoly where two firms, A and B, s...",Option 1: Firms A and B compete on price in ea...,Firm A competes and Firm B colludes indefinitely.,Option 4: Firm A competes and Firm B colludes ...,7.0
3,enforceability,Two firms are engaged in a long-term exclusive...,Option 1: Both firms strictly adhere to the ag...,Both firms breach the agreement.,Option 2: Both firms breach the agreement.,7.0
4,enforceability,"Two firms, A and B, are engaged in a long-term...",Option 1: Both firms collaborate each period\n...,Both firms collaborate each period,Option 1: Both firms collaborate each period,7.0
...,...,...,...,...,...,...
1795,bayes_nash,"Suppose a game has 3 states: State 1, State 2,...","Option 1: ('Action R', 'Action K')\nOption 2: ...","('Action S', 'Action L')","Option 4: ('Action S', 'Action L')",12.0
1796,bayes_nash,Consider the following game with 2 states: Sta...,"Option 1: ('Action R', 'Action K')\nOption 2: ...","('Action S', 'Action K')","Option 3: ('Action S', 'Action K')",11.0
1797,bayes_nash,"Consider a game with 2 states: State 1, State ...","Option 1: ('Action R', 'Action K')\nOption 2: ...","('Action R', 'Action K')","Option 1: ('Action R', 'Action K')",11.0
1798,bayes_nash,Suppose the following game has 2 states: State...,"Option 1: ('Action R', 'Action K')\nOption 2: ...","('Action S', 'Action K')","Option 3: ('Action S', 'Action K')",11.0


In [None]:
final_EconLogicQA_df

Unnamed: 0,Question,A,B,C,D,Answer
0,"Emma Raducanu, an 18-year-old tennis player, h...",Emma Raducanu attracts interest from global br...,Emma Raducanu wins the US Open.,Emma Raducanu's earnings from sponsorship deal...,Emma Raducanu negotiates and signs lucrative s...,"B, A, D, C"
1,The Monash University team in Melbourne has de...,The team patents the new battery technology.,The team develops the lithium-sulfur battery.,The team conducts further testing of the new b...,The team commercializes the new battery techno...,"B, A, C, D"
2,"Sarah Palin, a prominent political figure, has...",Sarah Palin testifies about the impact of the ...,The New York Times publishes an editorial inco...,Sarah Palin files a lawsuit against the New Yo...,The former editorial page editor of the New Yo...,"B, D, C, A"
3,Arrange the following events related to MyPill...,"Mike Lindell, CEO of MyPillow, is banned from ...",Twitter enacts a new policy allowing for the p...,Bed Bath & Beyond stops selling MyPillow produ...,Twitter permanently bans Donald Trump's account.,"B, D, A, C"
4,Consider the recent surge in the housing marke...,Buyers scramble to find properties due to low ...,The housing market cools off as mortgage rates...,Homeowners refinance their mortgages to take a...,The winning bid for a property is an all-cash ...,"C, A, D, B"
...,...,...,...,...,...,...
195,"Katherine Tai, as the US Trade Representative-...",Revoking the tariffs imposed on imported goods,Increasing market competition by opening up ma...,Improving the economic welfare of the society,Reducing the prices of both imported and domes...,"A, B, D, C"
196,Arrange the following events in the logical se...,The Federal Reserve signals the likelihood of ...,"The Dow falls for the fifth consecutive week, ...","Inflation concerns dominate headlines, exacerb...","Shares of top tech companies, which had been l...","C, A, D, B"
197,Arrange the following events in the logical se...,The percentage of the adult population not wor...,Air travel slows down due to reduced demand.,Employers with public-facing workers experienc...,"Americans shift their behavior, dealing a blow...","A, C, D, B"
198,China has recently passed a new data privacy l...,Tech companies adjust their data collection an...,The Chinese government passes a new data priva...,Tech stocks experience a drop due to the new law.,Companies that fail to comply with the new law...,"B, D, A, C"


In [None]:
### for R1-distilled reasoning model, the author recommend leave system prompt blank
reasoning_start = "<think>"
reasoning_end   = "</think>"
solution_start = "<answer>"
solution_end = "</answer>"

SYSTEM_PROMPT = ''

In [None]:
### General Prompt Format
# {
#   "prompt": [{"role": "user", "content": PROMPT}],
#   "answer": "FINAL_ANSWER"
# }


In [None]:
def build_steer_prompt(row):
    return f"""You are given a reasoning problem in Economics. Please reason step by step to solve it.

Enclose your reasoning in {reasoning_start} and {reasoning_end}. Then, give your final answer using the format \\boxed{{Option X: full choice text}}.

You must include both the option number and the full text of your chosen answer.

Question:
{row['question']}

Options:
{row['options']}"""

# Build clean dataset
steer_grpo_df = final_STEER_df.copy()

steer_grpo_df["prompt"] = steer_grpo_df.apply(
    lambda row: [{"role": "user", "content": build_steer_prompt(row)}],
    axis=1
)
steer_grpo_df["answer"] = steer_grpo_df["matched_option"]

# Final GRPO-style dataset
steer_grpo_ready_df = steer_grpo_df[["prompt", "answer"]]

In [None]:
steer_grpo_ready_df['prompt'][0]

[{'role': 'user',
  'content': 'You are given a reasoning problem in Economics. Please reason step by step to solve it.\n\nEnclose your reasoning in <think> and </think>. Then, give your final answer using the format \\boxed{Option X: full choice text}.\n\nYou must include both the option number and the full text of your chosen answer.\n\nQuestion:\nTwo firms, A and B, are involved in a reciprocal servicing agreement over an indefinite horizon. Each period, if both firms fulfill their servicing obligations, each receives a benefit of $16858.48. However, if one firm renegs on the agreement while the other firm fulfills their obligation, the reneging firm receives a benefit of $36498.39, while the dutiful firm incurs a cost of $79329.61. If both firms decide to renege on their obligations, they each receive $31218.06. Given the discount factor is 0.41, which of the following outcomes is enforceable in a Nash equilibrium?\n\nOptions:\nOption 1: Both firms A and B fulfill their servicing o

In [None]:
print(steer_grpo_ready_df['prompt'][0][0]['content'])

You are given a reasoning problem in Economics. Please reason step by step to solve it.

Enclose your reasoning in <think> and </think>. Then, give your final answer using the format \boxed{Option X: full choice text}.

You must include both the option number and the full text of your chosen answer.

Question:
Two firms, A and B, are involved in a reciprocal servicing agreement over an indefinite horizon. Each period, if both firms fulfill their servicing obligations, each receives a benefit of $16858.48. However, if one firm renegs on the agreement while the other firm fulfills their obligation, the reneging firm receives a benefit of $36498.39, while the dutiful firm incurs a cost of $79329.61. If both firms decide to renege on their obligations, they each receive $31218.06. Given the discount factor is 0.41, which of the following outcomes is enforceable in a Nash equilibrium?

Options:
Option 1: Both firms A and B fulfill their servicing obligations every period
Option 2: Both firm

In [None]:
def build_nli_prompt(row):
    return f"""You are given two economic events: a premise and a hypothesis.

Please determine whether the premise can causally lead to the hypothesis.

Explain your reasoning step by step inside {reasoning_start} and {reasoning_end}, then write your final answer using the format \\boxed{{Yes}} or \\boxed{{No}}.

Premise: {row['cause']}
Hypothesis: {row['effect']}"""


# Construct EconNLI
nli_dataset = final_EconNLI_df.copy()
nli_dataset["prompt"] = nli_dataset.apply(lambda row: [{"role": "user", "content": build_nli_prompt(row)}], axis=1)
nli_dataset["answer"] = nli_dataset["label"].map({1: "Yes", 0: "No"})

# Final clean datasets (without boxing in answer field)
final_nli = nli_dataset[["prompt", "answer"]]
final_nli

Unnamed: 0,prompt,answer
0,"[{'role': 'user', 'content': 'You are given tw...",No
1,"[{'role': 'user', 'content': 'You are given tw...",No
2,"[{'role': 'user', 'content': 'You are given tw...",Yes
3,"[{'role': 'user', 'content': 'You are given tw...",No
4,"[{'role': 'user', 'content': 'You are given tw...",Yes
...,...,...
95,"[{'role': 'user', 'content': 'You are given tw...",Yes
96,"[{'role': 'user', 'content': 'You are given tw...",No
97,"[{'role': 'user', 'content': 'You are given tw...",Yes
98,"[{'role': 'user', 'content': 'You are given tw...",Yes


In [None]:
final_nli['prompt'][0]

[{'role': 'user',
  'content': 'You are given two economic events: a premise and a hypothesis.\n\nPlease determine whether the premise can causally lead to the hypothesis.\n\nExplain your reasoning step by step inside <think> and </think>, then write your final answer using the format \\boxed{Yes} or \\boxed{No}.\n\nPremise: demand curve shifts to the left \nHypothesis:  The Laspeyres index tends to overstate inflation'}]

In [None]:
print(final_nli['prompt'][0][0]['content'])

You are given two economic events: a premise and a hypothesis.

Please determine whether the premise can causally lead to the hypothesis.

Explain your reasoning step by step inside <think> and </think>, then write your final answer using the format \boxed{Yes} or \boxed{No}.

Premise: demand curve shifts to the left 
Hypothesis:  The Laspeyres index tends to overstate inflation


In [None]:
def build_logicqa_prompt(row):
    return f"""You are given a question followed by four events labeled A, B, C, and D.

Determine the most logically coherent chronological order of these events.

Write your reasoning between {reasoning_start} and {reasoning_end}, and write your final answer, a comma-separated sequence of letters, using the format \\boxed{{A, B, C, D}}.

Question:
{row['Question']}

A: {row['A']}
B: {row['B']}
C: {row['C']}
D: {row['D']}"""

# Construct LogicQA
logicqa_dataset = final_EconLogicQA_df.copy()
logicqa_dataset["prompt"] = logicqa_dataset.apply(lambda row: [{"role": "user", "content": build_logicqa_prompt(row)}], axis=1)
logicqa_dataset["answer"] = logicqa_dataset["Answer"].apply(lambda x: x.strip())

final_logicqa = logicqa_dataset[["prompt", "answer"]]
final_logicqa

Unnamed: 0,prompt,answer
0,"[{'role': 'user', 'content': 'You are given a ...","B, A, D, C"
1,"[{'role': 'user', 'content': 'You are given a ...","B, A, C, D"
2,"[{'role': 'user', 'content': 'You are given a ...","B, D, C, A"
3,"[{'role': 'user', 'content': 'You are given a ...","B, D, A, C"
4,"[{'role': 'user', 'content': 'You are given a ...","C, A, D, B"
...,...,...
195,"[{'role': 'user', 'content': 'You are given a ...","A, B, D, C"
196,"[{'role': 'user', 'content': 'You are given a ...","C, A, D, B"
197,"[{'role': 'user', 'content': 'You are given a ...","A, C, D, B"
198,"[{'role': 'user', 'content': 'You are given a ...","B, D, A, C"


In [None]:
final_logicqa['prompt'][0]

[{'role': 'user',
  'content': "You are given a question followed by four events labeled A, B, C, and D.\n\nDetermine the most logically coherent chronological order of these events.\n\nWrite your reasoning between <think> and </think>, and write your final answer, a comma-separated sequence of letters, using the format \\boxed{A, B, C, D}.\n\nQuestion:\nEmma Raducanu, an 18-year-old tennis player, has recently won the US Open. This victory has opened up numerous opportunities for her in terms of sponsorship deals and endorsements. Arrange the following events in the logical sequence they would likely occur in the aftermath of her victory.\n\nA: Emma Raducanu attracts interest from global brands for endorsement deals.\nB: Emma Raducanu wins the US Open.\nC: Emma Raducanu's earnings from sponsorship deals surpass her tournament winnings.\nD: Emma Raducanu negotiates and signs lucrative sponsorship deals."}]

In [None]:
print(final_logicqa['prompt'][0][0]['content'])

You are given a question followed by four events labeled A, B, C, and D.

Determine the most logically coherent chronological order of these events.

Write your reasoning between <think> and </think>, and write your final answer, a comma-separated sequence of letters, using the format \boxed{A, B, C, D}.

Question:
Emma Raducanu, an 18-year-old tennis player, has recently won the US Open. This victory has opened up numerous opportunities for her in terms of sponsorship deals and endorsements. Arrange the following events in the logical sequence they would likely occur in the aftermath of her victory.

A: Emma Raducanu attracts interest from global brands for endorsement deals.
B: Emma Raducanu wins the US Open.
C: Emma Raducanu's earnings from sponsorship deals surpass her tournament winnings.
D: Emma Raducanu negotiates and signs lucrative sponsorship deals.


In [None]:

# Concatenate the three dataframes
combined_df = pd.concat([steer_grpo_ready_df, final_nli, final_logicqa], ignore_index=True)

# Save the combined dataframe
combined_df.to_csv('/content/drive/MyDrive/CIS6200_Project/Training_Dataset/train_dataset.csv', index=False)

combined_df

Unnamed: 0,prompt,answer
0,"[{'role': 'user', 'content': 'You are given a ...",Option 3: Firm A fulfills the servicing obliga...
1,"[{'role': 'user', 'content': 'You are given a ...",Option 1: Both firms undercut each other's pri...
2,"[{'role': 'user', 'content': 'You are given a ...",Option 4: Firm A competes and Firm B colludes ...
3,"[{'role': 'user', 'content': 'You are given a ...",Option 2: Both firms breach the agreement.
4,"[{'role': 'user', 'content': 'You are given a ...",Option 1: Both firms collaborate each period
...,...,...
2095,"[{'role': 'user', 'content': 'You are given a ...","A, B, D, C"
2096,"[{'role': 'user', 'content': 'You are given a ...","C, A, D, B"
2097,"[{'role': 'user', 'content': 'You are given a ...","A, C, D, B"
2098,"[{'role': 'user', 'content': 'You are given a ...","B, D, A, C"


## Pure Nash & PTE

In [None]:
nash_data_df = pd.read_csv('/content/drive/MyDrive/CIS6200_Project/Nash_Data/nash_train_v2.csv')
pte_data_df = pd.read_csv('/content/drive/MyDrive/CIS6200_Project/Nash_Data/pte_train.csv')

In [None]:
# Random select 100 data from it
nash_data_df = nash_data_df.sample(n=100, random_state=42)
pte_data_df = pte_data_df.sample(n=100, random_state=42)

In [None]:
# Load existing training dataset
existing_df = pd.read_csv('/content/drive/MyDrive/CIS6200_Project/Training_Dataset/train_dataset.csv')

# Concatenate
combined_df = pd.concat([existing_df, nash_data_df, pte_data_df], ignore_index=True)

combined_df

Unnamed: 0,prompt,answer
0,"[{'role': 'user', 'content': 'You are given a ...",Option 3: Firm A fulfills the servicing obliga...
1,"[{'role': 'user', 'content': ""You are given a ...",Option 1: Both firms undercut each other's pri...
2,"[{'role': 'user', 'content': ""You are given a ...",Option 4: Firm A competes and Firm B colludes ...
3,"[{'role': 'user', 'content': 'You are given a ...",Option 2: Both firms breach the agreement.
4,"[{'role': 'user', 'content': 'You are given a ...",Option 1: Both firms collaborate each period
...,...,...
2295,"[{'role': 'user', 'content': ""You are given a ...","[[2, 2]]"
2296,"[{'role': 'user', 'content': ""You are given a ...",[]
2297,"[{'role': 'user', 'content': ""You are given a ...","[[1, 2]]"
2298,"[{'role': 'user', 'content': ""You are given a ...","[[1, 2]]"


In [None]:
# Store as v4
combined_df.to_csv('/content/drive/MyDrive/CIS6200_Project/Training_Dataset/train_dataset_v4.csv', index=False)

## Reduce Number of questions

In [None]:
import pandas as pd

# 1. Load the full concatenated dataset
df = pd.read_csv('/content/drive/MyDrive/CIS6200_Project/Training_Dataset/train_dataset_v4.csv')

# 2. Original counts (in the concatenation order)
original_counts = [
    300,  # Enforceability
    300,  # Backward Induction
    300,  # Trigger
    200,  # Feasibility
    200,  # Auction Risk
    150,  # Endowment Effect
    150,  # Certainty Effect
     50,  # Time Inconsistency
     50,  # Budget Balance
     50,  # Condorcet Criterion
     50,  # Bayes Nash
    200,  # EconLogicQA
    100,  # EconNLI
    100,  # Pure Nash
    100,  # PTE
]

# 3. Desired counts in exactly the same order
desired_counts = [
    250,  # Enforceability
    250,  # Backward Induction
    250,  # Trigger
    150,  # Feasibility
    150,  # Auction Risk
     75,  # Endowment Effect
     75,  # Certainty Effect
     25,  # Time Inconsistency
     50,  # Budget Balance
     25,  # Condorcet Criterion
     50,  # Bayes Nash
    150,  # EconLogicQA
    100,  # EconNLI
    100,  # Pure Nash
    100,  # PTE
]

# 4. Slice out the head of each block
parts = []
start = 0
for orig, want in zip(original_counts, desired_counts):
    if want > orig:
        raise ValueError(f"Cannot take {want} from a block of size {orig}")
    parts.append(df.iloc[start : start + want])
    start += orig  # always advance by the full original block size

# 5. Concatenate and reset index
shrinked_df = pd.concat(parts, ignore_index=True)

# 6. Save the new file
shrinked_df.to_csv(
    '/content/drive/MyDrive/CIS6200_Project/Training_Dataset/train_dataset_v5.csv',
    index=False
)

print(f"Done: {len(shrinked_df)} rows written to train_dataset_v5.csv")

Done: 1800 rows written to train_dataset_v5.csv


## Huggingface Test

In [None]:
!pip install datasets

from datasets import Dataset, DatasetDict

Collecting datasets
  Downloading datasets-3.5.1-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.5.1-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.4/491.4 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (1

In [None]:
from huggingface_hub import login

login(token="<key>")

In [None]:
# Load your combined dataframe
combined_df = pd.read_csv('/content/drive/MyDrive/CIS6200_Project/Training_Dataset/train_dataset_v5.csv')

combined_df

Unnamed: 0,prompt,answer
0,"[{'role': 'user', 'content': 'You are given a ...",Option 3: Firm A fulfills the servicing obliga...
1,"[{'role': 'user', 'content': ""You are given a ...",Option 1: Both firms undercut each other's pri...
2,"[{'role': 'user', 'content': ""You are given a ...",Option 4: Firm A competes and Firm B colludes ...
3,"[{'role': 'user', 'content': 'You are given a ...",Option 2: Both firms breach the agreement.
4,"[{'role': 'user', 'content': 'You are given a ...",Option 1: Both firms collaborate each period
...,...,...
1795,"[{'role': 'user', 'content': ""You are given a ...","[[2, 2]]"
1796,"[{'role': 'user', 'content': ""You are given a ...",[]
1797,"[{'role': 'user', 'content': ""You are given a ...","[[1, 2]]"
1798,"[{'role': 'user', 'content': ""You are given a ...","[[1, 2]]"


In [None]:
import ast

# Fix the prompt format from str to list
combined_df["prompt"] = combined_df["prompt"].apply(ast.literal_eval)


In [None]:
combined_df['prompt'][0][0]['content']

'You are given a reasoning problem in Economics. Please reason step by step to solve it.\n\nEnclose your reasoning in <think> and </think>. Then, give your final answer using the format \\boxed{Option X: full choice text}.\n\nYou must include both the option number and the full text of your chosen answer.\n\nQuestion:\nTwo firms, A and B, are involved in a reciprocal servicing agreement over an indefinite horizon. Each period, if both firms fulfill their servicing obligations, each receives a benefit of $16858.48. However, if one firm renegs on the agreement while the other firm fulfills their obligation, the reneging firm receives a benefit of $36498.39, while the dutiful firm incurs a cost of $79329.61. If both firms decide to renege on their obligations, they each receive $31218.06. Given the discount factor is 0.41, which of the following outcomes is enforceable in a Nash equilibrium?\n\nOptions:\nOption 1: Both firms A and B fulfill their servicing obligations every period\nOption

In [None]:
# Convert the pandas DataFrame to a Hugging Face Dataset
dataset = Dataset.from_pandas(combined_df)

# Create a DatasetDict (optional, but recommended for Hugging Face)
dataset_dict = DatasetDict({"train": dataset})

In [None]:
dataset_dict.push_to_hub("MasterZhou/econ_reasoning")

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/311 [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/MasterZhou/econ_reasoning/commit/f03fb2d8a7e310248fb5341e18a78c300af4195f', commit_message='Upload dataset', commit_description='', oid='f03fb2d8a7e310248fb5341e18a78c300af4195f', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/MasterZhou/econ_reasoning', endpoint='https://huggingface.co', repo_type='dataset', repo_id='MasterZhou/econ_reasoning'), pr_revision=None, pr_num=None)

## Reformat Questions to Qwen3 without instruct think token

In [None]:
combined_df = pd.read_csv('/content/drive/MyDrive/CIS6200_Project/Training_Dataset/train_dataset_v5.csv')
import ast

# Fix the prompt format from str to list
combined_df["prompt"] = combined_df["prompt"].apply(ast.literal_eval)
combined_df

Unnamed: 0,prompt,answer
0,"[{'role': 'user', 'content': 'You are given a ...",Option 3: Firm A fulfills the servicing obliga...
1,"[{'role': 'user', 'content': 'You are given a ...",Option 1: Both firms undercut each other's pri...
2,"[{'role': 'user', 'content': 'You are given a ...",Option 4: Firm A competes and Firm B colludes ...
3,"[{'role': 'user', 'content': 'You are given a ...",Option 2: Both firms breach the agreement.
4,"[{'role': 'user', 'content': 'You are given a ...",Option 1: Both firms collaborate each period
...,...,...
1795,"[{'role': 'user', 'content': 'You are given a ...","[[2, 2]]"
1796,"[{'role': 'user', 'content': 'You are given a ...",[]
1797,"[{'role': 'user', 'content': 'You are given a ...","[[1, 2]]"
1798,"[{'role': 'user', 'content': 'You are given a ...","[[1, 2]]"


In [None]:
print(combined_df['prompt'][1600][0]['content'])

You are given a 2-player normal-form game.

The game is represented using two separate matrices:
- The first matrix gives Player 0's payoffs.
- The second matrix gives Player 1's payoffs.

Both matrices have the same dimensions. Each corresponding cell (i, j) defines the outcome when Player 0 plays strategy i and Player 1 plays strategy j.
- Player 0's payoff for (i, j) is found in the first matrix.
- Player 1's payoff for (i, j) is found in the second matrix.

Your task is to find all pure strategy Nash equilibria (PSNE) in this game.
A pure strategy Nash equilibrium is a strategy pair [i, j] such that:
- Player 0 cannot improve their payoff by switching to a different row, if Player 1 sticks with column j.
- Player 1 cannot improve their payoff by switching to a different column, if Player 0 sticks with row i.

Output the list of all [i, j] pairs that form pure Nash equilibria.
Format your final answer strictly as \boxed{[[i, j], [k, l], ...]} with no additional text.
If there is no 

In [None]:
# Filter out rows with that sentence
filter_phrase = "Enclose your reasoning in <think> and </think>."
df_filtered = combined_df[~combined_df['prompt'].str.contains(filter_phrase, na=False)]
df_filtered ### Seems that all 1800 data has think token line

Unnamed: 0,prompt,answer
0,"[{'role': 'user', 'content': 'You are given a ...",Option 3: Firm A fulfills the servicing obliga...
1,"[{'role': 'user', 'content': 'You are given a ...",Option 1: Both firms undercut each other's pri...
2,"[{'role': 'user', 'content': 'You are given a ...",Option 4: Firm A competes and Firm B colludes ...
3,"[{'role': 'user', 'content': 'You are given a ...",Option 2: Both firms breach the agreement.
4,"[{'role': 'user', 'content': 'You are given a ...",Option 1: Both firms collaborate each period
...,...,...
1795,"[{'role': 'user', 'content': 'You are given a ...","[[2, 2]]"
1796,"[{'role': 'user', 'content': 'You are given a ...",[]
1797,"[{'role': 'user', 'content': 'You are given a ...","[[1, 2]]"
1798,"[{'role': 'user', 'content': 'You are given a ...","[[1, 2]]"


## Reasoning Dataset

In [None]:
!pip install datasets

from datasets import Dataset, DatasetDict

Collecting datasets
  Downloading datasets-3.5.1-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.5.1-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.4/491.4 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (

In [None]:
from huggingface_hub import login

login(token="<key>")

### QwQ

In [None]:
qwq_data = pd.read_csv('/content/drive/MyDrive/CIS6200_Project/qwq32_filtered_results.csv')
qwq_data

Unnamed: 0,original_question,gold_answer,model_output,extracted_reasoning,extracted_answer,model_correct
0,You are given a reasoning problem in Economics...,option 2: both firms breach the agreement.,<think>\nTo determine which outcome can be sus...,To determine which outcome can be sustained as...,Option 2: Both firms breach the agreement.,True
1,You are given a reasoning problem in Economics...,option 2: both firms agree to collude and main...,<think>\nTo determine which strategy is enforc...,To determine which strategy is enforceable as ...,Option 2: Both firms agree to collude and main...,True
2,You are given a reasoning problem in Economics...,option 2: both firms set high prices each period.,<think>\nTo determine which strategy can be su...,To determine which strategy can be sustained i...,Option 2: Both firms set high prices each period.,True
3,You are given a reasoning problem in Economics...,option 2: both companies breach the marketing ...,<think>\nTo determine which outcome can be enf...,To determine which outcome can be enforced as ...,Option 2: Both companies breach the marketing ...,True
4,You are given a reasoning problem in Economics...,option 1: both firms faithfully adhere to the ...,<think>\nTo determine which outcome is enforce...,To determine which outcome is enforceable as a...,Option 1: Both firms faithfully adhere to the ...,True
...,...,...,...,...,...,...
861,You are given a reasoning problem in Economics...,[],<think>\nTo find the Perfectly Transparent Equ...,To find the Perfectly Transparent Equilibrium ...,[],True
862,You are given a reasoning problem in Economics...,[],<think>\nTo find the Perfectly Transparent Equ...,To find the Perfectly Transparent Equilibrium ...,[],True
863,You are given a reasoning problem in Economics...,[],<think>\nTo find the Perfectly Transparent Equ...,To find the Perfectly Transparent Equilibrium ...,[],True
864,You are given a reasoning problem in Economics...,[],<think>\nTo find the Perfectly Transparent Equ...,To find the Perfectly Transparent Equilibrium ...,[],True


In [None]:
# Check how many rows are marked as incorrect
incorrect_rows = qwq_data[qwq_data['model_correct'] != True]

# Display incorrect entries, if any
incorrect_rows


Unnamed: 0,original_question,gold_answer,model_output,extracted_reasoning,extracted_answer,model_correct


In [None]:
# Convert to HuggingFace dataset format
qwq_hf_dataset = Dataset.from_pandas(
    qwq_data[["original_question", "model_output"]].rename(columns={"original_question": "prompt", "model_output": "response"})
)

In [None]:
print(qwq_hf_dataset['prompt'][0], '\n\nResponse:')
print(qwq_hf_dataset['response'][0])

You are given a reasoning problem in Economics. Please reason step by step to solve it.

Enclose your reasoning in <think> and </think>. Then, give your final answer using the format \boxed{Option X: full choice text}.

You must include both the option number and the full text of your chosen answer.

Question:
Two firms are engaged in a long-term exclusive distribution agreement. Each year, if both adhere to the agreement, each firm gets a payoff of $49634.75. However, if one firm decides to distribute through an alternate channel (breaching the agreement) while the other adheres, the breaching firm gets $59193.64 while the non-breaching firm gets a loss of $56105.08. Conversely, if both firms decide to find alternate distribution channels and breach the agreement, each firm gets $71559.75. Assuming the discount factor of future payoffs is 0.49, which of these outcomes can be sustained as an enforceable Nash equilibrium in an infinitely repeated version of this game?

Options:
Option 1

In [None]:
push_to_hub_name = "MasterZhou/econ_reasoning_qwq"

qwq_hf_dataset_dict = DatasetDict({"train": qwq_hf_dataset})
qwq_hf_dataset.push_to_hub(push_to_hub_name)

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/MasterZhou/econ_reasoning_qwq/commit/a005c86cf308f3bacd887c294ac700991f22c2f9', commit_message='Upload dataset', commit_description='', oid='a005c86cf308f3bacd887c294ac700991f22c2f9', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/MasterZhou/econ_reasoning_qwq', endpoint='https://huggingface.co', repo_type='dataset', repo_id='MasterZhou/econ_reasoning_qwq'), pr_revision=None, pr_num=None)

In [None]:
qwq_hf_dataset.to_csv("/content/drive/MyDrive/CIS6200_Project/Training_Dataset/qwq_sft_data.csv")

Creating CSV from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

2046215

### R1

In [None]:
r1_data = pd.read_csv('/content/drive/MyDrive/CIS6200_Project/R1_results.csv')
r1_data

Unnamed: 0,prompt,answer,extracted_content,extracted_answer,model_correct,extracted_reasoning_content
0,"[{'role': 'user', 'content': 'You are given a ...",Option 3: Firm A fulfills the servicing obliga...,The correct answer is \boxed{Option 2: Both fi...,Option 2: Both firms A and B renege on their s...,False,
1,"[{'role': 'user', 'content': ""You are given a ...",Option 1: Both firms undercut each other's pri...,"In the given duopoly game, the payoffs and dis...",Option 2: Both firms collude to keep prices hi...,False,
2,"[{'role': 'user', 'content': ""You are given a ...",Option 4: Firm A competes and Firm B colludes ...,The problem involves a duopoly where firms can...,Option 2: Firms A and B collude to maintain hi...,False,
3,"[{'role': 'user', 'content': 'You are given a ...",Option 2: Both firms breach the agreement.,The problem involves determining which outcome...,Option 2: Both firms breach the agreement.,True,
4,"[{'role': 'user', 'content': 'You are given a ...",Option 1: Both firms collaborate each period,The problem involves two firms in a repeated g...,Option 2: Both firms compete against each othe...,False,
...,...,...,...,...,...,...
1795,"[{'role': 'user', 'content': ""You are given a ...","[[2, 2]]",The steps to compute the Perfectly Transparent...,[],False,"Okay, let's try to figure out the Perfectly Tr..."
1796,"[{'role': 'user', 'content': ""You are given a ...",[],The steps to compute the Perfectly Transparent...,[],True,"Okay, let's try to figure out the Perfectly Tr..."
1797,"[{'role': 'user', 'content': ""You are given a ...","[[1, 2]]",To determine the Perfectly Transparent Equilib...,[],False,"Okay, so I need to find the Perfectly Transpar..."
1798,"[{'role': 'user', 'content': ""You are given a ...","[[1, 2]]",To compute the Perfectly Transparent Equilibri...,"[1, 2]",False,"Okay, let's try to figure out the Perfectly Tr..."


In [None]:
r1_correct_rows = r1_data[r1_data['model_correct'] == True]

# Display incorrect entries, if any
r1_correct_rows

Unnamed: 0,prompt,answer,extracted_content,extracted_answer,model_correct,extracted_reasoning_content
3,"[{'role': 'user', 'content': 'You are given a ...",Option 2: Both firms breach the agreement.,The problem involves determining which outcome...,Option 2: Both firms breach the agreement.,True,
10,"[{'role': 'user', 'content': 'You are given a ...",Option 2: Both firms agree to collude and main...,To determine which strategy is enforceable as ...,Option 2: Both firms agree to collude and main...,True,
12,"[{'role': 'user', 'content': 'You are given a ...",Option 2: Both firms set high prices each period.,The problem involves two firms in an infinitel...,Option 2: Both firms set high prices each period.,True,
13,"[{'role': 'user', 'content': 'You are given a ...",Option 1: Both firms engage in a price war eve...,The problem involves two firms in an oligopoly...,Option 1: Both firms engage in a price war eve...,True,
19,"[{'role': 'user', 'content': 'You are given a ...",Option 2: Both companies breach the marketing ...,The problem involves determining the Nash equi...,Option 2: Both companies breach the marketing ...,True,
...,...,...,...,...,...,...
1789,"[{'role': 'user', 'content': ""You are given a ...",[],To compute the Perfectly Transparent Equilibri...,[],True,"Okay, so I need to compute the Perfectly Trans..."
1790,"[{'role': 'user', 'content': ""You are given a ...",[],To compute the Perfectly Transparent Equilibri...,[],True,"Okay, so I need to find the Perfectly Transpar..."
1793,"[{'role': 'user', 'content': ""You are given a ...",[],To compute the Perfectly Transparent Equilibri...,[],True,"Okay, let's try to figure out the Perfectly Tr..."
1796,"[{'role': 'user', 'content': ""You are given a ...",[],The steps to compute the Perfectly Transparent...,[],True,"Okay, let's try to figure out the Perfectly Tr..."
