# Implementing our own CFG for code generation

In [20]:
from anytree import Node, RenderTree
import random
from io import StringIO
from contextlib import redirect_stdout
import time

In [21]:
cfg_rules = {
    # Variables and digits
    "VARIABLE": ["a", "b", "c", "d", "e",
#                  "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"
                ],
    "DIGIT": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],
    
    # Operators
    "ARITHMETIC_OPERATOR": ["+", "-", "*", "/"],
    "RELATIONAL_OPERATOR": ["<", ">", "<=", ">=", "!=", "=="],
    "LOGICAL_OPERATOR_INFIX": ["and", "or"],
    "LOGICAL_OPERATOR_PREFIX": ["not"],
    "LOGICAL_OPERATOR": ["LOGICAL_OPERATOR_INFIX", "LOGICAL_OPERATOR_PREFIX"],
    "OPERATOR": ["ARITHMETIC_OPERATOR"], 
 
    # Formatting
    "NEW_LINE": ["\n"],
    "TAB_INDENT": ["\t"],
    "BRACKET_OPEN": ['('],
    "BRACKET_CLOSE": [')'],
    "EQUALS": ["="],
    "COLON": [":"],
    "COMMA": [","],

    
    # Keywords
    "IF": ["if"],
    "ELIF": ["elif"],
    "ELSE": ["else"],
    "FOR": ["for"],
    "IN": ["in"],
    "RANGE": ["range"],
    "WHILE": ["while"],
    "PRINT": ["print"],

    # Terms and expressions
    "TERM": ["EXPRESSION_IDENTIFIER", "DIGIT"],
    "EXPRESSION": ["TERM SPACE OPERATOR SPACE TERM"],
    "ENCLOSED_EXPRESSION": ["BRACKET_OPEN EXPRESSION BRACKET_CLOSE"],
    "DISPLAY_EXPRESSION": ["EXPRESSION_IDENTIFIER SPACE OPERATOR SPACE EXPRESSION_IDENTIFIER" ,
                            "EXPRESSION_IDENTIFIER SPACE OPERATOR SPACE DIGIT"],
    
    # Initializations and assignments
    "IDENTIFIER_INITIALIZATION": ["IDENTIFIER_INITIALIZATION INITIALIZATION", 
                                  "INITIALIZATION"],

    "INITIALIZATION": ["VARIABLE SPACE EQUALS SPACE DIGIT NEW_LINE"],
    
    "SIMPLE_ASSIGNMENTS": ["VARIABLE SPACE EQUALS SPACE EXPRESSION NEW_LINE" , ""],
    "ADVANCED_ASSIGNMENTS": ["VARIABLE SPACE EQUALS SPACE SIMPLE_ARITHMETIC_EVALUATION NEW_LINE", 
                             "VARIABLE SPACE EQUALS SPACE EXPRESSION NEW_LINE" , 
                             ""],
    
    "SIMPLE_ARITHMETIC_EVALUATION": ["SIMPLE_ARITHMETIC_EVALUATION ARITHMETIC_OPERATOR ENCLOSED_EXPRESSION", 
                                     "ENCLOSED_EXPRESSION",
                                    ], 

    # Conditions
    "SIMPLE_IF_STATEMENT": ["IF SPACE CONDITION SPACE COLON NEW_LINE"],
    "ADVANCED_IF_STATEMENT": ["IF SPACE CHAIN_CONDITION SPACE COLON NEW_LINE"],
    "SIMPLE_ELIF_STATEMENT": ["ELIF SPACE CONDITION SPACE COLON NEW_LINE"],
    "ADVANCED_ELIF_STATEMENT": ["ELIF SPACE CHAIN_CONDITION SPACE COLON NEW_LINE"],
    "ELSE_STATEMENT": ["ELSE SPACE COLON NEW_LINE"],
    
    "CHAIN_CONDITION": ["CHAIN_CONDITION SPACE LOGICAL_OPERATOR_INFIX SPACE ENCLOSED_CONDITION", 
                        "LOGICAL_OPERATOR_PREFIX SPACE ENCLOSED_CONDITION", 
                        "ENCLOSED_CONDITION"],
    "ENCLOSED_CONDITION": ["BRACKET_OPEN CONDITION BRACKET_CLOSE"],
    "CONDITION": ["OPTIONAL_NOT CONDITION_EXPRESSION", "CONDITION_EXPRESSION"],
    "CONDITION_EXPRESSION": ["EXPRESSION_IDENTIFIER SPACE RELATIONAL_OPERATOR SPACE EXPRESSION_IDENTIFIER", 
                             "EXPRESSION_IDENTIFIER SPACE RELATIONAL_OPERATOR SPACE DIGIT"],
    "OPTIONAL_NOT": ["LOGICAL_OPERATOR_PREFIX SPACE", "SPACE"], 

    # Loops
    "FOR_HEADER": ["FOR SPACE EXPRESSION_IDENTIFIER SPACE IN SPACE RANGE BRACKET_OPEN INITIAL COMMA SPACE FINAL COMMA SPACE STEP BRACKET_CLOSE SPACE COLON", 
                   "FOR SPACE EXPRESSION_IDENTIFIER SPACE IN SPACE RANGE BRACKET_OPEN INITIAL COMMA SPACE FINAL BRACKET_CLOSE SPACE COLON"],
    "INITIAL": ["DIGIT"],
    "FINAL": ["STEP * EXECUTION_COUNT + INITIAL - 1"],
    "STEP": ["1", "2", "3"],
    "EXECUTION_COUNT": [ "2", "3"],
    "FOR_LOOP": ["FOR_HEADER NEW_LINE TAB_INDENT DISPLAY"],
    "ADVANCED_FOR_LOOP": ["FOR_LOOP",
                          "FOR_HEADER NEW_LINE TAB_INDENT ADVANCED_DISPLAY"],
    
    
    # Displaying 
    "DISPLAY" : ["PRINT BRACKET_OPEN DISPLAY_IDENTIFIER BRACKET_CLOSE"],
    "ADVANCED_DISPLAY" : ["DISPLAY",
                          "PRINT BRACKET_OPEN DISPLAY_EXPRESSION BRACKET_CLOSE"],

    "LEVEL1.1": ["IDENTIFIER_INITIALIZATION SIMPLE_ASSIGNMENTS ADVANCED_DISPLAY"],
    "LEVEL1.2": ["IDENTIFIER_INITIALIZATION ADVANCED_ASSIGNMENTS ADVANCED_DISPLAY"],
    "LEVEL2.1": ["IDENTIFIER_INITIALIZATION SIMPLE_IF_STATEMENT TAB_INDENT DISPLAY", 
                "IDENTIFIER_INITIALIZATION SIMPLE_IF_STATEMENT TAB_INDENT DISPLAY NEW_LINE SIMPLE_ELIF_STATEMENT TAB_INDENT DISPLAY NEW_LINE ELSE_STATEMENT TAB_INDENT DISPLAY", 
                "IDENTIFIER_INITIALIZATION SIMPLE_IF_STATEMENT TAB_INDENT DISPLAY NEW_LINE ELSE_STATEMENT TAB_INDENT DISPLAY"],
    "LEVEL2.2": ["IDENTIFIER_INITIALIZATION ADVANCED_ASSIGNMENTS ADVANCED_IF_STATEMENT TAB_INDENT ADVANCED_DISPLAY", 
                "IDENTIFIER_INITIALIZATION ADVANCED_ASSIGNMENTS ADVANCED_IF_STATEMENT TAB_INDENT ADVANCED_DISPLAY NEW_LINE ADVANCED_ELIF_STATEMENT TAB_INDENT ADVANCED_DISPLAY NEW_LINE ELSE_STATEMENT TAB_INDENT ADVANCED_DISPLAY", 
                "IDENTIFIER_INITIALIZATION ADVANCED_ASSIGNMENTS ADVANCED_IF_STATEMENT TAB_INDENT ADVANCED_DISPLAY NEW_LINE ELSE_STATEMENT TAB_INDENT ADVANCED_DISPLAY"],
    "LEVEL3.1": ["IDENTIFIER_INITIALIZATION FOR_LOOP"],
    "LEVEL3.2": ["IDENTIFIER_INITIALIZATION ADVANCED_ASSIGNMENTS ADVANCED_FOR_LOOP"],

 
}


def generate_code(symbol, assigned_identifiers, last_variable,  parent=None):
    global init_count
    global max_init
    node = Node(symbol, parent=parent)
    
    if symbol in cfg_rules:
        if symbol == "IDENTIFIER_INITIALIZATION":
            if init_count < max_init:
                init_count += 1
            else:
                symbol = "INITIALIZATION"
    
        rule = random.choice(cfg_rules[symbol])
        symbols = rule.split(" ")

        generated_symbols = [generate_code(s, assigned_identifiers, last_variable, node) for s in symbols]
        
        if symbol == "FINAL":
            return str(eval(''.join(generated_symbols)))
        
        if symbol == "INITIALIZATION":
            assigned_identifiers.add(generated_symbols[0])
            
        if (symbol == "SIMPLE_ASSIGNMENTS") or (symbol == "ADVANCED_ASSIGNMENTS") :
            if generated_symbols[0]:
                last_variable.add(generated_symbols[0])
        
        return ''.join(generated_symbols)

    elif symbol == "EXPRESSION_IDENTIFIER":
        identifier = random.choice(tuple(assigned_identifiers)) if assigned_identifiers else random.choice(cfg_rules["DIGIT"])
        return identifier
        

    elif symbol == "DISPLAY_IDENTIFIER":
            
        try:
            return f"{tuple(last_variable)[0]}"
        except:
            return f"{random.choice(tuple(assigned_identifiers))}"
        
    else:
        return symbol

def print_tree(root):
    for pre, _, node in RenderTree(root):
        print(f"{pre}{node.name}")

def generate_program(level):
    global init_count 
    global max_init 
    
    assigned = set()
    last_variable = set()
    root = Node("ROOT")
    
    init_count = 0
    if level == "1.1" :
        max_init = 1
    elif level == "1.2" :
        max_init = 3
    elif level == "3.1" :
        max_init = 2
    elif level == "3.2" :
        max_init = 4
    else :
        max_init = 5
    
    program = generate_code("LEVEL"+level, assigned, last_variable, root)

    return root, program.replace("SPACE"," ")

In [34]:
level = "3.2"
init_count = 0
max_init = 1

root, program = generate_program(level)
code = program + "\n# output"

SIO = StringIO()
with redirect_stdout(SIO):
    exec(code)
output= SIO.getvalue().strip() 

output = '\n'.join([f'# {line}' if line else f'# ' for line in output.split('\n')])

print(f"""{code}\n{output}""" )
print()
print_tree(root)

c = 8
d = 2
d = 8
e = 9
for d in range(4, 6) :
	print(c)
# output
# 8
# 8

ROOT
└── LEVEL3.2
    ├── IDENTIFIER_INITIALIZATION
    │   ├── IDENTIFIER_INITIALIZATION
    │   │   ├── IDENTIFIER_INITIALIZATION
    │   │   │   ├── IDENTIFIER_INITIALIZATION
    │   │   │   │   └── INITIALIZATION
    │   │   │   │       ├── VARIABLE
    │   │   │   │       │   └── c
    │   │   │   │       ├── SPACE
    │   │   │   │       ├── EQUALS
    │   │   │   │       │   └── =
    │   │   │   │       ├── SPACE
    │   │   │   │       ├── DIGIT
    │   │   │   │       │   └── 8
    │   │   │   │       └── NEW_LINE
    │   │   │   │           └── 

    │   │   │   └── INITIALIZATION
    │   │   │       ├── VARIABLE
    │   │   │       │   └── d
    │   │   │       ├── SPACE
    │   │   │       ├── EQUALS
    │   │   │       │   └── =
    │   │   │       ├── SPACE
    │   │   │       ├── DIGIT
    │   │   │       │   └── 2
    │   │   │       └── NEW_LINE
    │   │   │           └── 

    │   │   └── INI

---

## generation 

In [23]:
def generate_and_write_programs(num_programs, level , filename='data.txt'):
    start_time = time.time()  # Start time

    with open(filename, 'w') as file:
        for _ in range(num_programs):
            try:
                root, program = generate_program(level)
                code = program + "\n# output"

                SIO = StringIO()
                with redirect_stdout(SIO):
                    exec(code)
                output= SIO.getvalue().strip()

                output = '\n'.join([f'# {line}' if line else f'# ' for line in output.split('\n')])
                result = f"""{code}\n{output}"""
                file.write(result + '\n\n')
            except Exception as e:
                continue

    end_time = time.time()  
    print(f"Time taken: {end_time - start_time} seconds")


In [18]:
# Generate and write 1000 programs to 'data.txt'
for level in [
#     "1.1","1.2","2.1","2.2",
    "3.1","3.2"
             ]:
    init_count = 0
    max_init = 1

    iterations = 10000 # 10000

    generate_and_write_programs(iterations, level, "data" + level + ".txt")

Time taken: 5.318938255310059 seconds
Time taken: 7.403544902801514 seconds


In [524]:
! zip data.zip data*

  adding: data1.1.txt (deflated 83%)
  adding: data1.2.txt (deflated 80%)
  adding: data2.1.txt (deflated 86%)
  adding: data2.2.txt (deflated 81%)
  adding: data3.1.txt (deflated 89%)
  adding: data3.2.txt (deflated 86%)


---

## Archive

### First one

In [None]:
# # SEPARATING EXPR IDENT AND ASSIGN IDENT
# cfg_rules = {
    
#     #Building Block
#     "<letter>": ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"],
#     "<digit>": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],
#     "<value>": ["<digit>"],
    
#     "<rel_op>": ["<", ">", "<=", ">=", "!=", "=="],
#     "<arth_op>": ["+", "-", "*", "/"],
#     "<logi_op_infix>": ["and", "or"],
#     "<logi_op_prefix>": ["not"],
#     "<logi_op>": ["<logi_op_infix>", 
#                   "<logi_op_prefix>"],
    
#     "<nl>": ["\n"],
#     "<tab_in>": ["\t"],
#     "<spc>": [" "],
    
#     "<assign_ident>": ["<letter>"],
#     "<term>": ["<expr_ident>", "<digit>"],
#     "<operator>": ["<arth_op>"], # only arithmetic operators

#     "<expr>": ["<term><spc><operator><spc><term>"],
#     "<enclosed_expr>": ["<bra_op><expr><bra_cl>"],
#     "<ident_init>": ["<ident_init><init>", "<init>"],
#     "<init>": ["<assign_ident><spc><=><spc><value><nl>"],
#     "<display>": ["print(<value>)<nl>", 
#                   "print(<term>)<nl>", 
#                   "print(<expr>)<nl>"],
    
#     # Arithmetic Expressions
#     "<assignments>": ["<assign_ident><spc><=><spc><sim_arth_eval><nl>"],
#     "<sim_arth_eval>": ["<sim_arth_eval><arth_op><enclosed_expr>", 
#                         "<enclosed_expr>"], # without functions


#     # If Statement Blocks
#     "<if_stmt>": ["<if><spc><chain_cond><:><nl>"],
#     "<elif_stmt>": ["<elif><spc><chain_cond><:><nl>"],
#     "<else_stmt>": ["<else><:><nl>"],
#     "<chain_cond>": ["<chain_cond><logi_op_infix><encl_cond>", 
#                      "<logi_op_prefix><encl_cond>", 
#                      "<encl_cond>"],

#     "<encl_cond>": ["<bra_op><condition><bra_cl>"],
#     "<condition>": ["<opt_not><cond_expr>", "<cond_expr>"],
#     "<cond_expr>": ["<expr_ident><spc><rel_op><spc><expr_ident>", "<expr_ident><spc><rel_op><spc><value>"],
#     "<opt_not>": ["<logi_op_prefix><spc>" ], 
    
#     # Loops    
#     "<for_hdr>": ["for<spc><expr_ident><spc>in range<bra_op><initial>, <final>, <step><bra_cl>:", 
#                   "for<spc><expr_ident><spc>in range<bra_op><initial>, <final><bra_cl>:"],

#     "<initial>": ["<value>"],
#     "<final>": ["<step> * <exe_count> + <initial> - 1"],
#     "<step>": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
#     "<exe_count>": ["2", "3", "4"],
#     "<while_hdr>": ["<while><bra_op><condition><bra_cl><:>"],
#     "<for_loop>": ["<for_hdr><nl><tab_in><display>"],
#     "<while_loop>": ["<while_hdr><nl><tab_in><display><tab_in><adj_cond>"],
    

#     # Complete Python Programs
#     "<prog_arth_expr_eval>": ["<ident_init><assignments><display>"],
#     "<prog_cond_expr_eval>": ["<ident_init><if_stmt><tab_in><display>", 
#                               "<ident_init><if_stmt><tab_in><display><elif_stmt><tab_in><display><else_stmt><tab_in><display>",
#                               "<ident_init><if_stmt><tab_in><display><else_stmt><tab_in><display>"],
#     "<prog_loop_expr_eval>": ["<ident_init><for_loop>", "<ident_init><while_loop>"],
    
#     "<prog>": ["<prog_arth_expr_eval>", 
# #                "<prog_cond_expr_eval>", 
# #                "<prog_loop_expr_eval>"
#               ],
    
#     # NOT PROVIDED IN THE PAPER BUT NECESSARY !
#     "<if>": ["if"],
#     "<elif>": ["elif"],
#     "<else>": ["else"],
#     "<while>": ["while"],
#     "<adj_cond>": ["<expr_ident> += <value>", "<expr_ident> -= <value>"],
#     "<bra_op>": ['('],
#     "<bra_cl>": [')'],
#     "<=>": ["="],
#     "<:>": [":"],
#     "<,>": [","],
# }




# import random


# def generate_code(symbol, assign_identifiers):
#     if symbol not in cfg_rules:
#         if symbol == "<expr_ident>":
#             if not assign_identifiers:
#                 return random.choice(cfg_rules["<digit>"])
#             else:
#                 identifier = random.choice(tuple(assign_identifiers))
#                 return identifier
#         else:
#             return symbol
    
# #     elif symbol == "<letter>": # TO DELETE
# #         identifier = random.choice(cfg_rules[symbol])
# #         return identifier
#     else:
#         rule = random.choice(cfg_rules[symbol])
#         symbols = []
#         i = 0
#         while i < len(rule):
#             if rule[i] == '<':
#                 end_idx = rule.find('>', i)
#                 if end_idx != -1:
#                     symbols.append(rule[i:end_idx + 1])
#                     i = end_idx + 1
#                 else: # TO DEBUG !
#                     symbols.append(rule[i:])
#                     break
#             else:
#                 symbols.append(rule[i])
#                 i += 1
        
#         # Generate code for each symbol in the rule
#         generated_symbols = [generate_code(s, assign_identifiers) for s in symbols]
        
#         # If the rule is an assignment, add the identifier to the set after generation
#         if symbol == "<init>":
#             assign_identifiers.add(generated_symbols[0])
        
#         return ''.join(generated_symbols)

# def generate_program():
    
#     ass = set()
#     program = generate_code("<prog>", ass)
# #     while any(symbol in program for symbol in cfg_rules.keys()):
# #         program = generate_code(program, ass)

#     return program

### Generate code with trees (first algo)

In [None]:
# from anytree import Node, RenderTree
# import random

# def generate_code(symbol, assign_identifiers, parent=None):
#     node = Node(symbol, parent=parent)

#     if symbol not in cfg_rules:
#         if symbol == "EXPRESSION_IDENTIFIER":
#             if not assign_identifiers:
#                 return random.choice(cfg_rules["DIGIT"])
#             else:
#                 identifier = random.choice(tuple(assign_identifiers))
#                 return identifier
#         else:
#             return symbol
    
#     else:
#         rule = random.choice(cfg_rules[symbol])
#         symbols = []
#         i = 0
#         while i < len(rule):
#             if rule[i] == '<':
#                 end_idx = rule.find('>', i)
#                 if end_idx != -1:
#                     symbols.append(rule[i:end_idx + 1])
#                     i = end_idx + 1
#                 else:
#                     symbols.append(rule[i:])
#                     break
#             else:
#                 symbols.append(rule[i])
#                 i += 1
        
#         # Generate code for each symbol in the rule
#         generated_symbols = [generate_code(s, assign_identifiers, node) for s in symbols]
        
#         # If the rule is an assignment, add the identifier to the set after generation
#         if symbol == "INITIALIZATION":
#             assign_identifiers.add(generated_symbols[0])
        
#         return ''.join(generated_symbols)

# def print_tree(root):
#     for pre, _, node in RenderTree(root):
#         print("%s%s" % (pre, node.name))


### Rules with NLTK

In [None]:
# import nltk

# def convert_to_nltk_cfg(cfg_rules):
#     nltk_rules = []

#     for lhs, rhs in cfg_rules.items():
#         for production in rhs:
#             # Split the production into symbols
#             symbols = production.split(' ')
#             # Convert the symbols into NLTK's format
#             nltk_rhs = ' '.join(f'"{symbol}"' if symbol not in cfg_rules else symbol for symbol in symbols)
#             nltk_rules.append(f'{lhs} -> {nltk_rhs}')

#     nltk_cfg = nltk.CFG.fromstring('\n'.join(nltk_rules))
#     return nltk_cfg
# #     return nltk_rules

# # Convert to NLTK CFG
# nltk_cfg = convert_to_nltk_cfg(cfg_rules)
# # for rule in nltk_cfg:
# #     print(rule)
# print(nltk_cfg)


### Rules before re arranging

In [None]:
# cfg_rules = {
    
#     "VARIABLE": ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"],
#     "DIGIT": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],
    
#     "ARITHMETIC_OPERATOR": ["+", "-", "*", "/"],
#     "RELATIONAL_OPERATOR": ["<", ">", "<=", ">=", "!=", "=="],
#     "LOGICAL_OPERATOR_INFIX": ["and", "or"],
#     "LOGICAL_OPERATOR_PREFIX": ["not"],
#     "LOGICAL_OPERATOR": ["LOGICAL_OPERATOR_INFIX", 
#                          "LOGICAL_OPERATOR_PREFIX"],
    
#     "NEW_LINE": ["\n"],
#     "TAB_INDENT": ["\t"],
    
#     "TERM": ["EXPRESSION_IDENTIFIER", "DIGIT"],
#     "OPERATOR": ["ARITHMETIC_OPERATOR"], 

#     "EXPRESSION": ["TERM SPACE OPERATOR SPACE TERM"],
#     "ENCLOSED_EXPRESSION": ["BRACKET_OPEN EXPRESSION BRACKET_CLOSE"],
    
#     "IDENTIFIER_INITIALIZATION": ["IDENTIFIER_INITIALIZATION INITIALIZATION", "INITIALIZATION"],
#     "INITIALIZATION": ["VARIABLE SPACE EQUALS SPACE DIGIT NEW_LINE"],
    
#     "ASSIGNMENTS": ["VARIABLE SPACE EQUALS SPACE SIMPLE_ARITHMETIC_EVALUATION NEW_LINE"],
#     "SIMPLE_ARITHMETIC_EVALUATION": ["SIMPLE_ARITHMETIC_EVALUATION ARITHMETIC_OPERATOR ENCLOSED_EXPRESSION", 
#                                      "ENCLOSED_EXPRESSION"], 


#     "IF_STATEMENT": ["IF SPACE CHAIN_CONDITION COLON NEW_LINE"],
#     "ELIF_STATEMENT": ["ELIF SPACE CHAIN_CONDITION COLON NEW_LINE"],
#     "ELSE_STATEMENT": ["ELSE COLON NEW_LINE"],
#     "CHAIN_CONDITION": ["CHAIN_CONDITION LOGICAL_OPERATOR_INFIX ENCLOSED_CONDITION", 
#                         "LOGICAL_OPERATOR_PREFIX ENCLOSED_CONDITION", 
#                         "ENCLOSED_CONDITION"],

#     "ENCLOSED_CONDITION": ["BRACKET_OPEN CONDITION BRACKET_CLOSE"],
#     "CONDITION": ["OPTIONAL_NOT CONDITION_EXPRESSION", "CONDITION_EXPRESSION"],
#     "CONDITION_EXPRESSION": ["EXPRESSION_IDENTIFIER SPACE RELATIONAL_OPERATOR SPACE EXPRESSION_IDENTIFIER", "EXPRESSION_IDENTIFIER SPACE RELATIONAL_OPERATOR SPACE DIGIT"],
#     "OPTIONAL_NOT": ["LOGICAL_OPERATOR_PREFIX SPACE", "SPACE"], 
        
#     "FOR_HEADER": ["FOR SPACE EXPRESSION_IDENTIFIER SPACE IN SPACE RANGE BRACKET_OPEN INITIAL COMMA SPACE FINAL COMMA SPACE STEP BRACKET_CLOSE SPACE COLON", 
#                    "FOR SPACE EXPRESSION_IDENTIFIER SPACE IN SPACE RANGE BRACKET_OPEN INITIAL COMMA SPACE FINAL BRACKET_CLOSE SPACE COLON"],

#     "INITIAL": ["DIGIT"],
#     "FINAL": ["STEP * EXECUTION_COUNT + INITIAL - 1"],
#     "STEP": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
#     "EXECUTION_COUNT": ["2", "3", "4"],
#     "WHILE_HEADER": ["WHILE SPACE BRACKET_OPEN CONDITION BRACKET_CLOSE SPACE COLON"],
#     "FOR_LOOP": ["FOR_HEADER NEW_LINE TAB_INDENT DISPLAY"],
#     "WHILE_LOOP": ["WHILE_HEADER NEW_LINE TAB_INDENT DISPLAY TAB_INDENT ADJUST_CONDITION"],
    

#     "PROGRAM_ARITHMETIC_EXPRESSION_EVALUATION": ["IDENTIFIER_INITIALIZATION ASSIGNMENTS DISPLAY"],
#     "PROGRAM_CONDITION_EXPRESSION_EVALUATION": ["IDENTIFIER_INITIALIZATION IF_STATEMENT TAB_INDENT DISPLAY", 
#                                                  "IDENTIFIER_INITIALIZATION IF_STATEMENT TAB_INDENT DISPLAY ELIF_STATEMENT TAB_INDENT DISPLAY ELSE_STATEMENT TAB_INDENT DISPLAY",
#                                                  "IDENTIFIER_INITIALIZATION IF_STATEMENT TAB_INDENT DISPLAY ELSE_STATEMENT TAB_INDENT DISPLAY"],
#     "PROGRAM_LOOP_EXPRESSION_EVALUATION": ["IDENTIFIER_INITIALIZATION FOR_LOOP", "IDENTIFIER_INITIALIZATION WHILE_LOOP"],
    
#     "PROGRAM": [
#                 "PROGRAM_ARITHMETIC_EXPRESSION_EVALUATION",
#                  "PROGRAM_CONDITION_EXPRESSION_EVALUATION",
#                  "PROGRAM_LOOP_EXPRESSION_EVALUATION"
#                ], 
    
#     "IF": ["if"],
#     "ELIF": ["elif"],
#     "ELSE": ["else"],
#     "FOR": ["for"],
#     "IN": ["in"],
#     "RANGE": ["range"],
#     "WHILE": ["while"],
#     "ADJUST_CONDITION": ["EXPRESSION_IDENTIFIER += DIGIT", "EXPRESSION_IDENTIFIER -= DIGIT"],
#     "BRACKET_OPEN": ['('],
#     "BRACKET_CLOSE": [')'],
#     "EQUALS": ["="],
#     "COLON": [":"],
#     "COMMA": [","],
# }

### Generating file before try except

In [306]:
# from contextlib import redirect_stdout

# def generate_and_write_programs(num_programs, filename='data.txt'):
#     start_time = time.time()  # Start time

#     with open(filename, 'w') as file:
#         for _ in range(num_programs):
#             root, program = generate_program()
#             code = program + "\n# output"

#             SIO = StringIO()
#             with redirect_stdout(SIO):
#                 exec(code)
#             output= SIO.getvalue()

#             output = '\n'.join([f'# {line}' if line.strip() else line for line in output.split('\n')])
#             result = f"""{code}\n{output}"""
#             file.write(result + '\n\n')

#     end_time = time.time()  # End time
#     print(f"Time taken: {end_time - start_time} seconds")

### Rules before deleting while

In [None]:
# cfg_rules = {
#     # Variables and digits
#     "VARIABLE": ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"],
#     "DIGIT": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],
    
#     # Operators
#     "ARITHMETIC_OPERATOR": ["+", "-", "*", "/"],
#     "RELATIONAL_OPERATOR": ["<", ">", "<=", ">=", "!=", "=="],
#     "LOGICAL_OPERATOR_INFIX": ["and", "or"],
#     "LOGICAL_OPERATOR_PREFIX": ["not"],
#     "LOGICAL_OPERATOR": ["LOGICAL_OPERATOR_INFIX", "LOGICAL_OPERATOR_PREFIX"],
#     "OPERATOR": ["ARITHMETIC_OPERATOR"], 
 
#     # Formatting
#     "NEW_LINE": ["\n"],
#     "TAB_INDENT": ["\t"],
#     "BRACKET_OPEN": ['('],
#     "BRACKET_CLOSE": [')'],
#     "EQUALS": ["="],
#     "COLON": [":"],
#     "COMMA": [","],

#     # Terms and expressions
#     "TERM": ["EXPRESSION_IDENTIFIER", "DIGIT"],
#     "EXPRESSION": ["TERM SPACE OPERATOR SPACE TERM"],
#     "ENCLOSED_EXPRESSION": ["BRACKET_OPEN EXPRESSION BRACKET_CLOSE"],
    
#     # Initializations and assignments
#     "IDENTIFIER_INITIALIZATION": ["IDENTIFIER_INITIALIZATION INITIALIZATION", 
#                                   "INITIALIZATION"],
#     "INITIALIZATION": ["VARIABLE SPACE EQUALS SPACE DIGIT NEW_LINE"],
#     "ASSIGNMENTS": ["VARIABLE SPACE EQUALS SPACE SIMPLE_ARITHMETIC_EVALUATION NEW_LINE", "VARIABLE SPACE EQUALS SPACE EXPRESSION NEW_LINE"],
#     "SIMPLE_ARITHMETIC_EVALUATION": ["SIMPLE_ARITHMETIC_EVALUATION ARITHMETIC_OPERATOR ENCLOSED_EXPRESSION", 
#                                      "ENCLOSED_EXPRESSION",
#                                     ], 

#     # Conditions
#     "IF_STATEMENT": ["IF SPACE CHAIN_CONDITION SPACE COLON NEW_LINE"],
#     "ELIF_STATEMENT": ["ELIF SPACE CHAIN_CONDITION SPACE COLON NEW_LINE"],
#     "ELSE_STATEMENT": ["ELSE SPACE COLON NEW_LINE"],
#     "CHAIN_CONDITION": ["CHAIN_CONDITION SPACE LOGICAL_OPERATOR_INFIX SPACE ENCLOSED_CONDITION", 
#                         "LOGICAL_OPERATOR_PREFIX SPACE ENCLOSED_CONDITION", 
#                         "ENCLOSED_CONDITION"],
#     "ENCLOSED_CONDITION": ["BRACKET_OPEN CONDITION BRACKET_CLOSE"],
#     "CONDITION": ["OPTIONAL_NOT CONDITION_EXPRESSION", "CONDITION_EXPRESSION"],
#     "CONDITION_EXPRESSION": ["EXPRESSION_IDENTIFIER SPACE RELATIONAL_OPERATOR SPACE EXPRESSION_IDENTIFIER", 
#                              "EXPRESSION_IDENTIFIER SPACE RELATIONAL_OPERATOR SPACE DIGIT"],
#     "OPTIONAL_NOT": ["LOGICAL_OPERATOR_PREFIX SPACE", "SPACE"], 

#     # Loops
#     "FOR_HEADER": ["FOR SPACE EXPRESSION_IDENTIFIER SPACE IN SPACE RANGE BRACKET_OPEN INITIAL COMMA SPACE FINAL COMMA SPACE STEP BRACKET_CLOSE SPACE COLON", 
#                    "FOR SPACE EXPRESSION_IDENTIFIER SPACE IN SPACE RANGE BRACKET_OPEN INITIAL COMMA SPACE FINAL BRACKET_CLOSE SPACE COLON"],
#     "INITIAL": ["DIGIT"],
#     "FINAL": ["STEP * EXECUTION_COUNT + INITIAL - 1"],
#     "STEP": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
#     "EXECUTION_COUNT": ["2", "3", "4"],
#     "WHILE_HEADER": ["WHILE SPACE BRACKET_OPEN CONDITION BRACKET_CLOSE SPACE COLON"],
#     "FOR_LOOP": ["FOR_HEADER NEW_LINE TAB_INDENT DISPLAY"],
#     "WHILE_LOOP": ["WHILE_HEADER NEW_LINE TAB_INDENT DISPLAY NEW_LINE TAB_INDENT ADJUST_CONDITION"],
#     "ADJUST_CONDITION": ["EXPRESSION_IDENTIFIER SPACE += SPACE DIGIT", "EXPRESSION_IDENTIFIER SPACE -= SPACE DIGIT"],

#     # Programs
#     "PROGRAM_ARITHMETIC_EXPRESSION_EVALUATION": ["IDENTIFIER_INITIALIZATION ASSIGNMENTS DISPLAY"],
#     "PROGRAM_CONDITION_EXPRESSION_EVALUATION": ["IDENTIFIER_INITIALIZATION IF_STATEMENT TAB_INDENT DISPLAY", 
#                                                 "IDENTIFIER_INITIALIZATION IF_STATEMENT TAB_INDENT DISPLAY NEW_LINE ELIF_STATEMENT TAB_INDENT DISPLAY NEW_LINE ELSE_STATEMENT TAB_INDENT DISPLAY", 
#                                                 "IDENTIFIER_INITIALIZATION IF_STATEMENT TAB_INDENT DISPLAY NEW_LINE ELSE_STATEMENT TAB_INDENT DISPLAY"],
#     "PROGRAM_LOOP_EXPRESSION_EVALUATION": ["IDENTIFIER_INITIALIZATION FOR_LOOP", 
#                                            "IDENTIFIER_INITIALIZATION WHILE_LOOP"
#                                           ],
#     "PROGRAM": ["PROGRAM_ARITHMETIC_EXPRESSION_EVALUATION", 
#                 "PROGRAM_CONDITION_EXPRESSION_EVALUATION", 
#                 "PROGRAM_LOOP_EXPRESSION_EVALUATION"
#                ], 

#     # Keywords
#     "IF": ["if"],
#     "ELIF": ["elif"],
#     "ELSE": ["else"],
#     "FOR": ["for"],
#     "IN": ["in"],
#     "RANGE": ["range"],
#     "WHILE": ["while"],
# }


### Rules before levels

In [None]:
# cfg_rules = {
#     # Variables and digits
#     "VARIABLE": ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"],
#     "DIGIT": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],
    
#     # Operators
#     "ARITHMETIC_OPERATOR": ["+", "-", "*", "/"],
#     "RELATIONAL_OPERATOR": ["<", ">", "<=", ">=", "!=", "=="],
#     "LOGICAL_OPERATOR_INFIX": ["and", "or"],
#     "LOGICAL_OPERATOR_PREFIX": ["not"],
#     "LOGICAL_OPERATOR": ["LOGICAL_OPERATOR_INFIX", "LOGICAL_OPERATOR_PREFIX"],
#     "OPERATOR": ["ARITHMETIC_OPERATOR"], 
 
#     # Formatting
#     "NEW_LINE": ["\n"],
#     "TAB_INDENT": ["\t"],
#     "BRACKET_OPEN": ['('],
#     "BRACKET_CLOSE": [')'],
#     "EQUALS": ["="],
#     "COLON": [":"],
#     "COMMA": [","],

#     # Terms and expressions
#     "TERM": ["EXPRESSION_IDENTIFIER", "DIGIT"],
#     "EXPRESSION": ["TERM SPACE OPERATOR SPACE TERM"],
#     "ENCLOSED_EXPRESSION": ["BRACKET_OPEN EXPRESSION BRACKET_CLOSE"],
    
#     # Initializations and assignments
#     "IDENTIFIER_INITIALIZATION": ["IDENTIFIER_INITIALIZATION INITIALIZATION", 
#                                   "INITIALIZATION"],
    
#     "INITIALIZATION": ["VARIABLE SPACE EQUALS SPACE DIGIT NEW_LINE"],
#     "ASSIGNMENTS": [
#                     "VARIABLE SPACE EQUALS SPACE SIMPLE_ARITHMETIC_EVALUATION NEW_LINE", 
#                     "VARIABLE SPACE EQUALS SPACE EXPRESSION NEW_LINE"],
#     "SIMPLE_ARITHMETIC_EVALUATION": ["SIMPLE_ARITHMETIC_EVALUATION ARITHMETIC_OPERATOR ENCLOSED_EXPRESSION", 
#                                      "ENCLOSED_EXPRESSION",
#                                     ], 

#     # Conditions
#     "IF_STATEMENT": ["IF SPACE CHAIN_CONDITION SPACE COLON NEW_LINE"],
#     "ELIF_STATEMENT": ["ELIF SPACE CHAIN_CONDITION SPACE COLON NEW_LINE"],
#     "ELSE_STATEMENT": ["ELSE SPACE COLON NEW_LINE"],
#     "CHAIN_CONDITION": ["CHAIN_CONDITION SPACE LOGICAL_OPERATOR_INFIX SPACE ENCLOSED_CONDITION", 
#                         "LOGICAL_OPERATOR_PREFIX SPACE ENCLOSED_CONDITION", 
#                         "ENCLOSED_CONDITION"],
#     "ENCLOSED_CONDITION": ["BRACKET_OPEN CONDITION BRACKET_CLOSE"],
#     "CONDITION": ["OPTIONAL_NOT CONDITION_EXPRESSION", "CONDITION_EXPRESSION"],
#     "CONDITION_EXPRESSION": ["EXPRESSION_IDENTIFIER SPACE RELATIONAL_OPERATOR SPACE EXPRESSION_IDENTIFIER", 
#                              "EXPRESSION_IDENTIFIER SPACE RELATIONAL_OPERATOR SPACE DIGIT"],
#     "OPTIONAL_NOT": ["LOGICAL_OPERATOR_PREFIX SPACE", "SPACE"], 

#     # Loops
#     "FOR_HEADER": ["FOR SPACE EXPRESSION_IDENTIFIER SPACE IN SPACE RANGE BRACKET_OPEN INITIAL COMMA SPACE FINAL COMMA SPACE STEP BRACKET_CLOSE SPACE COLON", 
#                    "FOR SPACE EXPRESSION_IDENTIFIER SPACE IN SPACE RANGE BRACKET_OPEN INITIAL COMMA SPACE FINAL BRACKET_CLOSE SPACE COLON"],
#     "INITIAL": ["DIGIT"],
#     "FINAL": ["STEP * EXECUTION_COUNT + INITIAL - 1"],
#     "STEP": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
#     "EXECUTION_COUNT": ["2", "3", "4"],
#     "FOR_LOOP": ["FOR_HEADER NEW_LINE TAB_INDENT DISPLAY"],

#     # Programs
#     "PROGRAM_ARITHMETIC_EXPRESSION_EVALUATION": ["IDENTIFIER_INITIALIZATION ASSIGNMENTS DISPLAY"],
#     "PROGRAM_CONDITION_EXPRESSION_EVALUATION": ["IDENTIFIER_INITIALIZATION IF_STATEMENT TAB_INDENT DISPLAY", 
#                                                 "IDENTIFIER_INITIALIZATION IF_STATEMENT TAB_INDENT DISPLAY NEW_LINE ELIF_STATEMENT TAB_INDENT DISPLAY NEW_LINE ELSE_STATEMENT TAB_INDENT DISPLAY", 
#                                                 "IDENTIFIER_INITIALIZATION IF_STATEMENT TAB_INDENT DISPLAY NEW_LINE ELSE_STATEMENT TAB_INDENT DISPLAY"],
#     "PROGRAM_LOOP_EXPRESSION_EVALUATION": ["IDENTIFIER_INITIALIZATION FOR_LOOP"],
    
#     "PROGRAM": ["PROGRAM_ARITHMETIC_EXPRESSION_EVALUATION", 
#                 "PROGRAM_CONDITION_EXPRESSION_EVALUATION", 
#                 "PROGRAM_LOOP_EXPRESSION_EVALUATION"
#                ], 

#     # Keywords
#     "IF": ["if"],
#     "ELIF": ["elif"],
#     "ELSE": ["else"],
#     "FOR": ["for"],
#     "IN": ["in"],
#     "RANGE": ["range"],
#     "WHILE": ["while"],
# }


# def generate_code(symbol, assigned_identifiers, last_variable, identifier_initializations,  parent=None):
#     node = Node(symbol, parent=parent)
#     if symbol in cfg_rules:
    
#         rule = random.choice(cfg_rules[symbol])
#         symbols = rule.split(" ")

#         # Generate code for each symbol in the rule
#         generated_symbols = [generate_code(s, assigned_identifiers, last_variable, identifier_initializations, node) for s in symbols]
        
#         # If the rule is an assignment, add the identifier to the set after generation
#         if symbol == "INITIALIZATION":
#             assigned_identifiers.add(generated_symbols[0])
#         if symbol == "ASSIGNMENTS":
#             last_variable.add(generated_symbols[0])
        
#         return ''.join(generated_symbols)

#     elif symbol == "EXPRESSION_IDENTIFIER":
#         identifier = random.choice(tuple(assigned_identifiers)) if assigned_identifiers else random.choice(cfg_rules["DIGIT"])
#         return identifier

#     elif symbol == "DISPLAY":
#         try :
#             return f"print({tuple(last_variable)[0]})"
#         except : 
#             return f"print({random.choice(tuple(assigned_identifiers))})"
#     else:
#         return symbol

# def print_tree(root):
#     for pre, _, node in RenderTree(root):
#         print(f"{pre}{node.name}")

# def generate_program():
#     # Initialize an empty set of assign_identifiers
#     ass = set()
#     last_variable = set()
#     identifier_initializations = 0
#     # Generate the initial program and return the root of the tree
#     root = Node("ROOT")
#     program = generate_code("PROGRAM", ass, last_variable, identifier_initializations , root)

#     return root, program.replace("SPACE"," ")