In [5]:
import numpy as np
import pandas as pd
from catsim.simulation import Simulator
from catsim.initialization import RandomInitializer
from catsim.selection import MaxInfoSelector
from catsim.estimation import NumericalSearchEstimator
from catsim.stopping import MaxItemStopper
from catsim.initialization import FixedPointInitializer


In [8]:
df = pd.read_csv('questions.csv')
df.head(1)

Unnamed: 0,qid,difficulty,problem,answer_option_list,answer_value,tag
0,3,4,Compute the average angular acceleration and t...,"[[{'aoVal': 'A', 'content': 'a = 0.1 rad/s\\te...",C,Number Theory Modules


In [10]:
# 1D IRT
num_items = len(df)

# 1D IRT parameters
# discrimination = np.random.uniform(0.8, 1.5, num_items)   # a: discrimination
discrimination = np.ones(num_items)
difficulty = df['difficulty'].values                      # b: difficulty
guessing = np.random.uniform(0.1, 0.25, num_items)        # c: guessing

# Combine into item bank: shape (num_items, 3)
item_bank = np.column_stack((discrimination, difficulty, guessing))
print(item_bank)
print(item_bank.shape)
print(item_bank.dtype)

# Create DataFrame and export
item_bank_df = pd.DataFrame(item_bank, columns=['a', 'b', 'c'])
item_bank_df.to_csv('item_bank.csv', index=False)


[[1.         4.         0.16016302]
 [1.         4.         0.11088165]
 [1.         0.         0.20979867]
 ...
 [1.         1.         0.23691091]
 [1.         1.         0.20818561]
 [1.         1.         0.11956783]]
(4999, 3)
float64


In [None]:
## NEED THIS FOR MIRT 3PL
# Example for d=3
num_items = len(df)
raw_difficulty = df["difficulty"].values
# Normalize difficulty to range ~[-3, 3]
difficulty = (raw_difficulty - np.mean(raw_difficulty)) / np.std(raw_difficulty) * 1.5
difficulty = np.clip(difficulty, -3, 3)  # optional safety clipping

# Discrimination parameters for each dimension
a1 = np.random.uniform(0.7, 1.7, num_items)
a2 = np.random.uniform(0.7, 1.7, num_items)
a3 = np.random.uniform(0.7, 1.7, num_items)

# Guessing parameters
c = np.random.uniform(0.1, 0.25, num_items)

# Combine into item bank
item_bank = np.column_stack((a1, a2, a3, difficulty, c))

# Save to CSV
item_bank_df = pd.DataFrame(item_bank, columns=["a1", "a2", "a3", "b", "c"])
item_bank_df.to_csv("item_bank.csv", index=False)

In [27]:
item_bank = pd.read_csv('item_bank.csv').values
print(item_bank)


[[0.51438129 1.3434944  0.94718111 4.         0.        ]
 [0.94501337 0.9986944  0.63249093 4.         0.        ]
 [1.30444554 1.05481908 1.18618423 0.         0.        ]
 ...
 [1.19785468 0.71659217 0.90025445 1.         0.        ]
 [0.63193667 1.00272298 1.13928471 1.         0.        ]
 [0.59471026 0.82045303 1.15326771 1.         0.        ]]


In [18]:
initializer = FixedPointInitializer(-3)
selector = MaxInfoSelector()
estimator = NumericalSearchEstimator()
stopper = MaxItemStopper(20)  # Stop after 20 items per examinee

num_examinees = 1  # Or any number you want

sim = Simulator(item_bank, num_examinees)
sim.simulate(initializer, selector, estimator, stopper)





In [8]:
# Estimated abilities (theta) for each examinee
theta = sim.estimations

# List of administered item indices for each examinee
administered_items = sim.administered_items

print("Estimated abilities:", theta)
print("Administered items (first examinee):", administered_items[0])


Estimated abilities: [[-3, np.float64(-1.5), np.float64(1.3821228520880865e-07), np.float64(0.6931470658915463), np.float64(1.3926969588199478), np.float64(0.8262260747498168), np.float64(0.49999994677496673), np.float64(0.731274023201805), np.float64(1.0426006330594542), np.float64(0.7948754339968767), np.float64(1.033227000325175), np.float64(0.8344001954708293), np.float64(1.0272416213129696), np.float64(0.8612499833887143), np.float64(0.721510635184379), np.float64(0.6008263490635081), np.float64(0.49459410053967573), np.float64(0.5890631909540723), np.float64(0.7295743622608765), np.float64(0.6339802031949896), np.float64(0.7576863398188852)]]
Administered items (first examinee): [np.int64(649), np.int64(659), np.int64(682), np.int64(2786), np.int64(4982), np.int64(4582), np.int64(677), np.int64(4583), np.int64(4983), np.int64(4585), np.int64(4985), np.int64(4586), np.int64(4986), np.int64(2812), np.int64(2814), np.int64(2815), np.int64(698), np.int64(2785), np.int64(4581), np.int

In [None]:
# Assume you have:
# - item_bank: your item parameter matrix
# - df: your DataFrame of questions
# - recommended_indices: list of indices already administered/recommended
# - theta: latest estimated theta (e.g., theta_history[-1])
# - selector: your MaxInfoSelector instance

k = 5  # Number of next recommendations you want
admin_items = set(recommended_indices)  # Items already administered
next_recommended_indices = []

for _ in range(k):
    next_item = selector.select(
        items=item_bank,
        administered_items=list(admin_items),
        est_theta=theta
    )
    if next_item is None:
        break  # No more items to recommend
    next_recommended_indices.append(next_item)
    admin_items.add(next_item)

print(f"Next {k} recommended item indices:", next_recommended_indices)

# To get the actual questions:
next_recommended_questions_df = df.iloc[next_recommended_indices]
next_recommended_questions_df.head()


In [9]:
from catsim.estimation import NumericalSearchEstimator

# Initial theta (e.g., for a newbie)
# theta = -3.0

# Store thetas after each response for reference
theta_history = [theta]

# For each recommended item, simulate a correct response and update theta
administered_items = []
responses = []

estimator = NumericalSearchEstimator()

for i, item_idx in enumerate(recommended_indices):
    administered_items.append(i)  # index within the subset
    responses.append(1)           # correct response
    
    # Get parameters for all administered so far
    items_so_far = item_bank[recommended_indices[:i+1]]
    
    # Estimate theta using responses so far
    theta = estimator.estimate(
        items=items_so_far,
        administered_items=list(range(i+1)),
        response_vector=responses,
        est_theta=theta  # use previous theta as starting point
    )
    theta_history.append(theta)
    print(f"Theta after item {i+1} (all correct so far): {theta}")

print("Theta history:", theta_history)


NameError: name 'recommended_indices' is not defined

In [None]:
# Assume you have:
# - item_bank: your item parameter matrix
# - df: your DataFrame of questions
# - recommended_indices: list of indices already administered/recommended
# - theta: latest estimated theta (e.g., theta_history[-1])
# - selector: your MaxInfoSelector instance

k = 5  # Number of next recommendations you want
admin_items = set(recommended_indices)  # Items already administered
next_recommended_indices = []

for _ in range(k):
    next_item = selector.select(
        items=item_bank,
        administered_items=list(admin_items),
        est_theta=theta
    )
    if next_item is None:
        break  # No more items to recommend
    next_recommended_indices.append(next_item)
    admin_items.add(next_item)

print(f"Next {k} recommended item indices:", next_recommended_indices)

# To get the actual questions:
next_recommended_questions_df = df.iloc[next_recommended_indices]
next_recommended_questions_df.head()


Next 5 recommended item indices: [np.int64(649), np.int64(659), np.int64(747), np.int64(746), np.int64(768)]


  warn(


Unnamed: 0,qid,difficulty,problem,answer_option_list,answer_value,tag
649,2069,0,What is the missing sign in the box? $8\squar...,"[[{'aoVal': 'A', 'content': '$$+$$ '}], [{'aoV...",C,Combinations
659,2136,0,Which of the following expressions is easier t...,"[[{'aoVal': 'A', 'content': '$$15+7-5$$ '}], [...",B,Counting Modules
747,2523,0,There are $$4$$ different colours of bags avai...,"[[{'aoVal': 'A', 'content': '$$3$$ '}], [{'aoV...",B,Enumeration
746,2518,0,"Sort the following names: Tom, Bob, Alice, Liz...","[[{'aoVal': 'A', 'content': 'Bob, Tom, Alice, ...",D,Combinations
768,2609,0,$6$ can be made in $2$ different ways: $1\time...,"[[{'aoVal': 'A', 'content': '$$2$$ '}], [{'aoV...",C,Calculation Modules
