In [47]:
import os
import json
import ast
from typing import Any, Dict

def extract_curriculum_attributes(file_path: str) -> Dict[str, Any]:
    """
    Extracts curriculum attributes from a Python file that defines a curriculum class.
    
    Args:
        file_path (str): Path to the Python file to extract attributes from.

    Returns:
        dict: A dictionary representing the curriculum's configuration and attributes.
    """
    with open(file_path, "r") as file:
        content = file.read()

    # Parse the content of the file to find class definitions and attribute definitions
    tree = ast.parse(content)

    # A dictionary to store curriculum information
    curriculum_info = {}

    for node in ast.walk(tree):
        # Look for the curriculum class definition
        if isinstance(node, ast.ClassDef) and 'Curriculum' in node.name:
            curriculum_name = node.name
            print(curriculum_name)
            curriculum_info["Curriculum"] = curriculum_name
            curriculum_info["Attributes"] = {}
            # Look for RangeAttributeDefinition or similar classes to extract attributes
            for stmt in node.body:
                # print("value", stmt.value)
                if isinstance(stmt, ast.FunctionDef) and stmt.name == '__init__':
                    # Look for the _define_attributes call within the constructor
                    for inner_stmt in stmt.body:
                        if isinstance(inner_stmt, ast.Expr) and isinstance(inner_stmt.value, ast.Call):
                            call_node = inner_stmt.value
                            # Check if the function call is _define_attributes
                            if hasattr(call_node.func, 'attr') and call_node.func.attr == '_define_attributes':
                                # Extract the arguments passed to _define_attributes
                                for keyword in call_node.args:
                                    if isinstance(keyword, ast.Call):
                                        if hasattr(keyword.func, 'id') and keyword.func.id in ['RangeAttributeDefinition', 'ScalarAttributeDefinition']:
                                            attr_name = None
                                            levels = []
                                            for kw in keyword.keywords:
                                                if kw.arg == "name":
                                                    attr_name = kw.value.s
                                                if kw.arg == "levels":
                                                    levels = extract_levels(kw.value) 
                                                    
                                            if attr_name:
                                                curriculum_info["Attributes"][attr_name] = {
                                                    "levels": levels
                                                }

    return curriculum_info

def extract_levels(node) :
    levels = []
    """Extract levels from a list, handling negative numbers and nested lists."""
    if isinstance(node, ast.List):
        # for el in node.elts:
        #     if isinstance(el, ast.Num):
        #         levels.append(el.n)
        #     elif isinstance(el, ast.UnaryOp):
        #         levels.append(el.operand.n)  # Handle negation
        for el in node.elts:
            if isinstance(el, ast.Num):  # Normal numbers
                levels.append(el.n)
            elif isinstance(el, ast.UnaryOp) and isinstance(el.op, ast.USub):  # Negative numbers
                levels.append(-el.operand.n)
            elif isinstance(el, ast.List):  # Nested lists (for operations_weights)
                levels.append(extract_levels(el))
    elif isinstance(node, ast.Call):  
        # Handle list(range(...))
        if isinstance(node.func, ast.Name) and node.func.id == "list":
            range_call = node.args[0]  # Extract range(...) inside list(...)
            if isinstance(range_call, ast.Call) and isinstance(range_call.func, ast.Name) and range_call.func.id == "range":
                range_args = [arg.n for arg in range_call.args if isinstance(arg, ast.Num)]
                if len(range_args) == 2:  # range(start, stop)
                    levels = list(range(range_args[0], range_args[1]))
                elif len(range_args) == 1:  # range(stop) (default start=0)
                    levels = list(range(range_args[0]))
    else:
        print(f"Unexpected type in levels: {type(node)}")

    return levels


def extract_curriculums_from_folder(root_folder: str) -> Dict[str, Any]:
    """
    Walks through the directory and extracts curriculum info from all Python files.

    Args:
        root_folder (str): Path to the reasoning-gym folder containing category folders.

    Returns:
        dict: A dictionary of curriculum names and their extracted configurations.
    """
    curriculums = {}

    # Walk through the folder and find all .py files
    for subdir, _, files in os.walk(root_folder):
        for file in files:
            if file.endswith(".py"):
                file_path = os.path.join(subdir, file)
                curriculum_info = extract_curriculum_attributes(file_path)                
                if curriculum_info:
                    # Categorize by folder (assumed to be the category name)
                    category_name = os.path.basename(subdir)
                    task_name = file.replace(".py", "")
                    
                    if category_name not in curriculums:
                        curriculums[category_name] = {}

                    curriculums[category_name][task_name] = {
                        "Attributes": curriculum_info["Attributes"]
                    }

    return curriculums

def save_to_json(data: Dict[str, Any], output_file: str) -> None:
    """
    Save the extracted curriculum data to a JSON file.
    
    Args:
        data (dict): The curriculum data.
        output_file (str): The file path where the data will be saved.
    """
    with open(output_file, "w") as json_file:
        json.dump(data, json_file, indent=4, separators=(",", ": "))



In [48]:
# Root folder where the reasoning-gym data is located
root_folder = "/users/camillechallier/code/reasoning-gym/reasoning_gym"

# Extract all curriculums from the folder
curriculums_data = extract_curriculums_from_folder(root_folder)

# Save the extracted data to a JSON file
save_to_json(curriculums_data, "curriculums_data.json")
print("Curriculum data has been saved to curriculums_data.json.")

ComplexArithmeticCurriculum
IntermediateIntegrationCurriculum
PolynomialEquationsCurriculum
PolynomialMultiplicationCurriculum
SimpleEquationsCurriculum
SimpleIntegrationCurriculum
ABCurriculum
BaseConversionCurriculum
BinaryAlternationCurriculum
BinaryMatrixCurriculum
CaesarCipherCurriculum
CountPrimesCurriculum
CryptarithmCurriculum
GameOfLifeCurriculum
GameOfLifeHaltingCurriculum
GraphColorCurriculum
GroupAnagramsCurriculum
IsomorphicStringsCurriculum
JugsCurriculum
LetterCountingCurriculum
LetterJumbleCurriculum
ManipulateMatrixCurriculum
NumberFilteringCurriculum
NumberSortingCurriculum
PalindromeCurriculum
PalindromePartitioningCurriculum
PoolMatrixCurriculum
RansomNoteCurriculum
RotateMatrixCurriculum
RottenOrangesCurriculum
SentenceReorderingCurriculum
SpellBackwardCurriculum
SpiralMatrixCurriculum
StringInsertionCurriculum
StringManipulationCurriculum
StringSplittingCurriculum
StringSynthesisCurriculum
WordLadderCurriculum
WordSequenceReversalCurriculum
WordSortingCurriculum
R

  attr_name = kw.value.s
  if isinstance(el, ast.Num):  # Normal numbers
  levels.append(-el.operand.n)
  levels.append(el.n)


BasicArithmeticCurriculum
BitwiseArithmeticCurriculum
CalendarArithmeticCurriculum
ChainSumCurriculum
CountBitsCurriculum
DecimalArithmeticCurriculum
DecimalChainSumCurriculum
DiceCurriculum
FractionSimplificationCurriculum
GCDCurriculum
LCMCurriculum
LegCountingCurriculum
NumberFormatCurriculum
PowerFunctionCurriculum
PrimeFactorizationCurriculum
ProductsCurriculum
TimeIntervalsCurriculum
CurriculumContext
DefaultCurriculumContext
BaseCurriculum
CurriculumAttributeConfig
CurriculumExperimentConfig
CurriculumExperiment
BFCurriculum
ColorCubeRotationCurriculum
ModuloGridCurriculum
NeedleHaystackCurriculum
NumberSequenceCurriculum


  range_args = [arg.n for arg in range_call.args if isinstance(arg, ast.Num)]
  range_args = [arg.n for arg in range_call.args if isinstance(arg, ast.Num)]


RectangleCountCurriculum
RubiksCubeCurriculum
BoxnetCurriculum
EmojiMysteryCurriculum
FutoshikiCurriculum
MahjongPuzzleCurriculum
MazeCurriculum
MiniSudokuCurriculum
NQueensCurriculum
RushHourCurriculum
SokobanCurriculum
SudokuCurriculum
HanoiCurriculum
TsumegoCurriculum
AdvancedGeometryCurriculum
SimpleGeometryCurriculum
CourseScheduleCurriculum
FamilyRelationshipsCurriculum
LargestIslandCurriculum
QuantumLockCurriculum
ShortestPathCurriculum
AliceInWonderlandCurriculum
CircuitLogicCurriculum
PropositionalLogicCurriculum
SelfReferenceCurriculum
ZebraCurriculum
Curriculum data has been saved to curriculums_data.json.


In [36]:
curriculums_data['algebra']['complex_arithmetic']

{'Attributes': {'min_real': {'levels': [-10, -100, -10000, -100000000]},
  'max_real': {'levels': [10, 100, 10000, 100000000]},
  'min_imag': {'levels': [-10, -100, -10000, -100000000]},
  'max_imag': {'levels': [10, 100, 10000, 100000000]},
  'operations_weights': {'levels': [[0.4, 0.4, 0.1, 0.1],
    [0.25, 0.25, 0.25, 0.25],
    [0.2, 0.2, 0.3, 0.3],
    [0.1, 0.1, 0.4, 0.4]]}}}