In [5]:
import numpy as np
import json

# Step 1: Initialize Parameters
num_students = 2000
num_exercises = 50
num_concepts = 5
guess_probability = 0.25
learning_rate = 0.1  # Example learning rate for skill update

# Step 2: Generate Latent Knowledge States
student_skills = np.random.randn(num_students, num_concepts)  # Normally distributed skills

# Step 3: Generate Exercise Parameters
exercise_concepts = np.random.randint(0, num_concepts, num_exercises) # Randomly assign concepts to exercises
exercise_difficulties = np.random.randn(num_exercises)  # Normally distributed difficulties

# Step 4: Simulate Student Responses and Update Skills in Real-Time
def probability_correct(alpha, beta, c=guess_probability):
    return c + (1 - c) / (1 + np.exp(-alpha + beta))

responses = np.zeros((num_students, num_exercises))

data_list = []

for student in range(num_students):
    for exercise in range(num_exercises):
        concept = exercise_concepts[exercise]
        difficulty = exercise_difficulties[exercise]
        skill = student_skills[student, concept]
        prob_correct = probability_correct(skill, difficulty)
        correct = np.random.binomial(1, prob_correct)
        responses[student, exercise] = correct

        # Create a dictionary entry for each response
        entry = {
            'student_id': int(student),
            'concept': int(concept),
            'exercise_id': int(exercise),
            'difficulty': float(difficulty),
            'skill': float(skill),
            'response': int(correct)  # Ensure it's an int for JSON serialization
        }
        data_list.append(entry)

        # Update skill based on the response
        if correct:
            student_skills[student, concept] += learning_rate
        else:
            student_skills[student, concept] -= learning_rate

# Save the data list as a JSON file
with open('custom_dataset/data_list.json', 'w') as json_file:
    json.dump(data_list, json_file)

print("Data saved successfully as JSON.")


Data saved successfully as JSON.


In [4]:
data_list

[{'student_id': 0,
  'concept': 3.0,
  'exercise_id': 0,
  'difficulty': 0.8196367556597587,
  'skill': -1.0942014362698367,
  'response': 0},
 {'student_id': 0,
  'concept': 0.0,
  'exercise_id': 1,
  'difficulty': 0.8913040281589579,
  'skill': -0.17282499745742386,
  'response': 0},
 {'student_id': 0,
  'concept': 0.0,
  'exercise_id': 2,
  'difficulty': -0.5531372419066192,
  'skill': -0.27282499745742383,
  'response': 1},
 {'student_id': 0,
  'concept': 1.0,
  'exercise_id': 3,
  'difficulty': 0.26159009433817343,
  'skill': -0.02301228244654323,
  'response': 1},
 {'student_id': 0,
  'concept': 1.0,
  'exercise_id': 4,
  'difficulty': 1.3116985069874925,
  'skill': 0.07698771755345678,
  'response': 1},
 {'student_id': 0,
  'concept': 1.0,
  'exercise_id': 5,
  'difficulty': -1.3698887412990906,
  'skill': 0.17698771755345677,
  'response': 0},
 {'student_id': 0,
  'concept': 0.0,
  'exercise_id': 6,
  'difficulty': -0.679139310129589,
  'skill': -0.17282499745742383,
  'respons

In [2]:
loaded_data_dict = np.load('custom_dataset/data_dict.npy', allow_pickle=True).item()

loaded_student_skills = loaded_data_dict['student_skills']
loaded_exercise_concepts = loaded_data_dict['exercise_concepts']
loaded_exercise_difficulties = loaded_data_dict['exercise_difficulties']
loaded_responses = loaded_data_dict['responses']

print("Data loaded successfully.")

Data loaded successfully.


In [3]:
loaded_student_skills

array([[ 2.59921859, -0.66654211,  0.21643347, -0.0415731 ,  0.33964991],
       [ 0.26313948,  1.42022756, -0.41446611, -1.42026068,  1.76742657]])