In [76]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device", device)
from unsloth.chat_templates import get_chat_template
from datasets import load_dataset
import unsloth


from unsloth import FastLanguageModel, is_bfloat16_supported
import torch
from transformers import Trainer, TrainingArguments
from trl import SFTTrainer
import re
from join_string import join_strings
from mcpu5 import simulate_dual_core

device cuda


In [87]:
def code_extractor(inputs):
    pattern = r'"(.*?)"'
    # Using re.DOTALL to match across multiple lines
    matches = re.findall(pattern, inputs, re.DOTALL)
    
    # Strip whitespace from each match
    code_blocks = [match.strip() for match in matches]
    print(code_blocks)
    return code_blocks

In [88]:
def message2code(message):
    inputs = tokenizer.apply_chat_template(
        message,
        tokenize = True,
        add_generation_prompt = True, # Must add for generation
        return_dict = True,
        return_tensors = "pt",
    ).to("cuda")
    outputs = model.generate(input_ids = inputs["input_ids"],
                        attention_mask = inputs["attention_mask"],
                        max_new_tokens = 256,
                        use_cache = True)
    outputs = tokenizer.batch_decode(outputs)[0]
    return code_extractor(outputs)[0].split("\n")

In [89]:
def make_random_code():
    message_init = [
    {"from": "human", "value": f"""I have a cpu simulator with registers R1 up to R10, and that takes assembly instructions STORE, LOAD, ADD, MUL as input. \n
    Here is an example of a list of instructions in this language:
    \n DIV R5, R6\n SUB R7, R8 \n LOAD R9, 30\nADD R9, R10\n
    Can you write a random list of instructions and present it inside quotation marks please?
    Please make a list between 1 and 40 instructions.
        """},
    ]
    return message2code(message_init)

In [90]:
class History:
    def __init__(self, max_size = 100):
        self.max_size = max_size
        self.memory_program = []
        self.memory_signature = []
    def store(self,sample:dict[list]):
        for j in range(len(sample["program"])):
            self.memory_program.append(sample["program"][j])
            self.memory_signature.append(sample["signature"][j])
        self.eviction()
    def eviction(self):
        if len(self.memory_program)>self.maxsize:
            self.memory_program = self.memory_program[-self.maxsize:]
            self.memory_signature = self.memory_signature[-self.maxsize:]
    def select_closest_code(self,signature: dict)->dict:
        min_distance = 0
        idx = 0
        for j,signature_buffer in enumerate(self.memory_signature):
            dist = 0
            for k in signature_buffer.keys():
                dist += (signature_buffer[k] - signature[k])**2
            if min_distance:
                if dist<min_distance:
                    min_distance= dist
                    idx = j
            else:
                min_distance = dist
                idx = j
        return {"code": self.code_list[idx] ,"signature": self.signature_list[idx]}

In [91]:
class GoalGenerator:
    def __init__(self):
        pass

In [92]:
class OptimizationPolicy:
    """
    Takes the code corresponding to the closest signature to the desired goal signature
    """
    def __init__(self):    
        pass
    def __call__(goal:dict[list],H:History):
        closest_code = H.select_closest_code(goal) #most promising sample from the history
        output = self.light_code_mutation(closest_code) #expansion strategie: small random mutation
    def light_code_mutation(self,program:list[str]):
        messages = [
        {"from": "human", "value": f"""I have a cpu simulator with registers R1 up to R10, and that takes assembly instructions STORE, LOAD, ADD, MUL as input. \n
        Here is an example of a list of instructions in this language:
        \n DIV R5, R6\n SUB R7, R8 \n LOAD R9, 30\nADD R9, R10\n
        
        A mutation of a list of instructions consists in inserting, deleting or replacing a few instruction in program. For instance, here is a mutation of the list above. I added a the instruction LOAD in the fist line and I have replaced the last instruction by an instruction STORE.
        
        \nLOAD R4, 30\n DIV R5, R6\n SUB R7, R8 \n LOAD R9, 30\nSTORE R1, 20\n
        
        Can you perform a single light mutation based on the following list of instructions bellow?     
        Your answer has to be in the following format:
        
            Mutated list of intructions inside quotation marks
            Reflexion on how you mutate it
        List of instructions:
        {join_strings(program)}
            """},
        ]
        return message2code(messages)

In [93]:
class IMGEP:
    def __init__(self,N:int, N_init:int,H:History, G:GoalGenerator, Pi:OptimizationPolicy):
        """
        N: int. The experimental budget
        """
        self.N = N
        self.H = H
        self.G = G
        self.N_init = N_init
        self.Pi = Pi
    def __call__(self):
        for i in range(self.N):
            if i<self.N_init:
                #Initial random iterations
                core1_code = make_random_code()
            else:
                break
                #Sample target goal
                goal_code = self.G(self.H)
                core1_code = self.Pi(goal_code,self.H)
            core1_exec_time, core2_exec_time = simulate_dual_core(
            core1_code = core1_code,
            core2_code =["MUL R3, R4",
                        "STORE R1, 20",
                        "MOV R5, R6",
                        "LOAD R1, 10",
                        "ADD R1, R2",
                        "MUL R3, R4",])
            self.H.store({"program":[core1_code],
                         "signature": [{"core1_exec_time":core1_exec_time,
                                        "core2_exec_time": core2_exec_time}]})

In [94]:
max_seq_length = 512# Can increase for longer reasoning traces

model, tokenizer = FastLanguageModel.from_pretrained(
    #model_name = "mistralai/Mistral-7B-v0.1",
    model_name = "meta-llama/meta-Llama-3.1-8B-Instruct",
    max_seq_length = max_seq_length,
    #load_in_4bit = True, # False for LoRA 16bit
    #fast_inference = True, # Enable vLLM fast inference
    #max_lora_rank = lora_rank,
    #gpu_memory_utilization = 0.1, # Reduce if out of memory
)


tokenizer = get_chat_template(
    tokenizer,
    chat_template = "chatml", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth
    mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, # ShareGPT style
    map_eos_token = True, # Maps <|im_end|> to </s> instead
)
model = FastLanguageModel.get_peft_model(
    model,
)

==((====))==  Unsloth 2025.3.9: Fast Llama patching. Transformers: 4.49.0. vLLM: 0.7.3.
   \\   /|    NVIDIA RTX 2000 Ada Generation Laptop GPU. Num GPUs = 1. Max memory: 7.754 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 8.9. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 

In [43]:
make_random_code()

['LOAD R3, 20',
 'MUL R4, R5',
 'ADD R6, R7',
 'LOAD R8, 40',
 'SUB R9, R10',
 'STORE R1, 15',
 'LOAD R2, 25',
 'MUL R4, R6',
 'ADD R7, R9',
 'LOAD R5, 10']

In [38]:
program1 = ["MOV R1, R2",
    "LOAD R1, 10",
    "ADD R1, R2",
    "MUL R3, R4",
    "STORE R1, 20",
    "MOV R5, R6",
    "LOAD R1, 10",
    "ADD R1, R2",
    "MUL R3, R4",
    "STORE R1, 20",
    "DIV R5, R6",
    "SUB R7, R8",
    "LOAD R9, 30",
    "ADD R9, R10"]

messages = [
    {"from": "human", "value": f"""I have a cpu simulator with registers R1 up to R10, and that takes assembly instructions STORE, LOAD, ADD, MUL as input. \n
Here is an example of a list of instructions in this language:
\n DIV R5, R6\n SUB R7, R8 \n LOAD R9, 30\nADD R9, R10\n

A mutation of a list of instructions consists in inserting, deleting or replacing a few instruction in program. For instance, here is a mutation of the list above. I added a the instruction LOAD in the fist line and I have replaced the last instruction by an instruction STORE.

\nLOAD R4, 30\n DIV R5, R6\n SUB R7, R8 \n LOAD R9, 30\nSTORE R1, 20\n

Can you perform a single light mutation based on the following list of instructions bellow?     
Your answer has to be in the following format:

    Mutated list of intructions inside quotation marks
    Reflexion on how you mutate it
List of instructions:
{join_strings(program1)}
    """},
]

In [61]:
H = History(max_size = 100)

In [59]:
Pi = OptimizationPolicy()

In [62]:
G = GoalGenerator()

In [85]:
imgep = IMGEP(N=3, N_init = 3,H=H, G=G, Pi=Pi)

In [86]:
imgep()

IndexError: list index out of range