In [1]:
# The model that you want to train from the Hugging Face hub
model_name = "mistralai/Mistral-7B-Instruct-v0.1"


import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

hub_name = "StarkWizard/Mistral-7b-instruct-cairo-instruct"



Load model from hub for inference

- If you just need inference, run this
- we load the model from HFace Hub in 4 bits


In [2]:
import torch
from transformers import AutoTokenizer, TextStreamer, GenerationConfig, BitsAndBytesConfig
from attention_sinks import AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=hub_name,
                                             trust_remote_code=True,
                                             device_map={"": 0},
                                             attention_sink_size=4,
                                             quantization_config=bnb_config,
                                            attention_sink_window_size=252, # <- Low for the sake of faster generation
                                             )
model.eval()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

[Attention Sinks] Injected Position Shifting into 32 attention classes.
[Attention Sinks] Injected Attention Sink KV Cache into 1 model class.


MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )

---
Sampling Inference
---

In [11]:
from transformers import TextStreamer, GenerationConfig


#prompt = "Create an array and append some animal names"
#prompt = "give an exemple of constructor"
#prompt="create an array 'messages' that contains a u128, a u32, a u256"
#prompt = "create a structure for mailAccount"
#prompt = "create an array of felt and append 1 to the array"
#prompt = "create a felt and affect it a value of 1"

#prompt = "write a  contract that returns the fibonacci of the caller address"
#prompt = "write an empty contract template"
#prompt = "what are spans used for"
#prompt = "How do I know if an array is empty"
#prompt = "what makes Cairo special"
#prompt = "Create an array and append some domestic animal names"
text =f"""[INST]
<<SYS>>
provide only one solution and no other possible solution, stick to the main topic, do not introduce any new topics or new question not provided by the student.
Make sure the explanations never be longer than 100 words.Don’t justify your answers. <SYS>>

Question: I'm working in Cairo 1 :{prompt} 
[/INST]"""

input_ids = tokenizer.encode(text, return_tensors="pt").to(model.device)

with torch.no_grad():
    streamer = TextStreamer(tokenizer)
    generated_tokens = model.generate(
        input_ids,
        generation_config=GenerationConfig(
            # use_cache=True is required, the rest can be changed up.
            use_cache=True,
            min_new_tokens=1,
            max_new_tokens=1050,
            penalty_alpha=0.6,
            top_k=280,
            do_sample=True,
            top_p=0.96,
            no_repeat_ngram_size =10,
            repetition_penalty = 1.2,
            temperature=0.001,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        ),
        streamer=streamer,
    )
    # Decode the final generated text
    output_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)

<s> [INST]
<<SYS>>
provide only one solution and no other possible solution, stick to the main topic, do not introduce any new topics or new question not provided by the student.
Make sure the explanations never be longer than 100 words.Don’t justify your answers. <SYS>>

Question: I'm working in Cairo 1 :write a  contract that returns the fibonacci of the caller address 
[/INST]
```
#[starknet::contract]
mod Fib {
   #[view]
   fn get_fib(self: @ContractState) -> (u256, u256) {
       let mut x = 0;
       let mut y = 1;
       if self.caller == '0x...sn_address_here' {
           return (y, x);
       } else {
           x = x + y;
           y = y + x;
           return (y, x);
        }
   }
}
```

This code defines a view function `get_fib`, which checks whether the calling address is equal to a specific address ('0x...sn_address_here'). If it is, it simply returns the values of `y` and `x`. Otherwise, it computes the Fibonacci numbers recursively using `x` and `y`, and then retur

---
Beam Inference

In [8]:

import transformers
import torch

model.eval()

pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=200,

    num_beams=2,
    early_stopping=True,
     output_attentions=False,
      output_hidden_states=False,
      temperature=0.01

)
#prompt = 'In Cairo 1, write a full sample contract with a constructor that stores the caller into owner'
prompt ="In cairo 1, increment i 6 times using  loop, provide an explanation of the code"
prompt = "Create an array and append some domestic animal names and write a test to check the value of the item at index 2"
prompt = " what is an array span ?"
prompt = "give an exemple of a simplestorage contract with unit tests"
prompt = "give an exemple of a  contract stores in owner the value of the callers address, add one function that returns the fibonacci value of owner, explain the code"
prompt = "write an empty contract template"
text =f"""[INST]
<<SYS>>
Write explanations and Cairo 1 code  to solve the following coding problem that obeys the constraints and passes the example test cases. Explain the code. Only use Cairo syntax and no other language. Please wrap your code answer using ```
</SYS>>Question: I'm working in Cairo 1 :{prompt} 
[/INST]"""
with torch.no_grad():
    sequences = pipeline(
    text,

        num_return_sequences=1,
                bos_token_id=model.config.bos_token_id,
                eos_token_id=model.config.eos_token_id,
                pad_token_id=model.config.eos_token_id,

    )
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

Result: [INST]
<<SYS>>
Write explanations and Cairo 1 code  to solve the following coding problem that obeys the constraints and passes the example test cases. Explain the code. Only use Cairo syntax and no other language. Please wrap your code answer using ```
</SYS>>Question: I'm working in Cairo 1 :write an empty contract template 
[/INST]
```
#[starknet::contract]
mod Contract {

    #[storage]
    struct Storage {
        counter: u128,
    }

    #[external(v0)]
    fn set(ref self: ContractState, x: u128) {
        self.counter.write(x);
    }

    #[external(v0)]
    fn get(ref self: ContractState) -> u128 {
        self.counter.read()
    }
}
```


```
#[starknet::contract]
mod Contract {

    #[storage]
    struct Storage {
        counter: u128,
    }

    #[external(v0)]
    fn set(ref self: ContractState, x: u128) {
        self.counter.write(x);

