In [2]:
# The model that you want to train from the Hugging Face hub
model_name = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"

# The instruction dataset to use
dataset_name = "pechaut/cairo-instruct"

# Fine-tuned model name
new_model = "Mistral-7b-instruct-cairo-GPTQ"

# Output directory where the model predicti
# ons and checkpoints will be stored
output_dir = "./results"

import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

hub_name = "pechaut/Mistral-7b-instruct-cairo-instruct"

max_steps = 1000 # to tweak to get the best out of the model 

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline,GPTQConfig
 
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

quantization_config_loading = GPTQConfig(bits=4, disable_exllama=True, tokenizer=tokenizer)

model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_name,
                                             trust_remote_code=True,
                                             quantization_config=quantization_config_loading,
                                             device_map={"": 0}
                                             )

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute and has already quantized weights. However, loading attributes (e.g. disable_exllama, use_cuda_fp16, max_input_length) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.


In [4]:
from datasets import load_dataset, Dataset
from peft import LoraConfig, AutoPeftModelForCausalLM, prepare_model_for_kbit_training, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig, TrainingArguments
from trl import SFTTrainer
import os

model.config.use_cache=False
model.config.pretraining_tp=1
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)


Training

Loading Dataset

In [5]:
from datasets import load_dataset

# Load the dataset
dataset_train = load_dataset(dataset_name, split="train", download_mode='force_redownload',ignore_verifications=True)
dataset_test = load_dataset(dataset_name, split="eval", download_mode='force_redownload',ignore_verifications=True)



Downloading readme:   0%|          | 0.00/527 [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/2.42k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/34.4k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating eval split:   0%|          | 0/5 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/230 [00:00<?, ? examples/s]

Downloading readme:   0%|          | 0.00/527 [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/2.42k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/34.4k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating eval split:   0%|          | 0/5 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/230 [00:00<?, ? examples/s]

In [6]:



peft_config = LoraConfig(
    r=64, lora_alpha=16, lora_dropout=0.1, bias="none", task_type="CAUSAL_LM", target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head"
    ],inference_mode = False
)

model = get_peft_model(model, peft_config)
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    gradient_checkpointing = True,
    optim="paged_adamw_32bit",
    evaluation_strategy="steps",
    learning_rate=2e-5,
    lr_scheduler_type="cosine",
    warmup_ratio=0.03,
    save_strategy="epoch",
    logging_dir="./logs", 
    logging_steps=5,
    num_train_epochs=1,
    max_steps=max_steps,
    fp16=False,
    push_to_hub=True
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset_train,
    eval_dataset=dataset_test,
    peft_config=peft_config,
    dataset_text_field="text",
    args=training_arguments,
    tokenizer=tokenizer,
    packing=False,
    max_seq_length=512
)


Map:   0%|          | 0/230 [00:00<?, ? examples/s]

Map:   0%|          | 0/5 [00:00<?, ? examples/s]



In [7]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )
print_trainable_parameters(model)

trainable params: 170082304 || all params: 432492544 || trainable%: 39.32606616219493


In [8]:

trainer.train()
trainer.push_to_hub()


You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss
5,4.1068,4.85728
10,5.5606,4.836658
15,3.4196,4.793443
20,4.2192,4.718629
25,3.3524,4.634434
30,3.0857,4.489045
35,3.6776,4.340126
40,4.19,4.14984
45,3.5773,3.909785
50,2.9436,3.673285


'https://huggingface.co/pechaut/Mistral-7b-instruct-cairo-test/tree/main/'

In [29]:

import pandas as pd
df = pd.DataFrame(dataset_train)

# Vérifier la présence de NaN dans la colonne 'text'
nan_count = df['text'].isna().sum()

if nan_count > 0:
    print(f"Il y a {nan_count} valeurs NaN dans la colonne 'text'.")
else:
    print("Il n'y a pas de valeurs NaN dans la colonne 'text'.")

Il n'y a pas de valeurs NaN dans la colonne 'text'.


In [7]:
new_model

'Mistral-7b-instruct-cairo-test'

In [3]:
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline,GPTQConfig
 
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

quantization_config_loading = GPTQConfig(bits=4, disable_exllama=True, tokenizer=tokenizer)

model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_name,
                                             trust_remote_code=True,
                                             quantization_config=quantization_config_loading,
                                             device_map={"": 0}
                                             )

model = PeftModel.from_pretrained(model, new_model + "/checkpoint-460")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute and has already quantized weights. However, loading attributes (e.g. disable_exllama, use_cuda_fp16, max_input_length) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.


In [9]:
#prompt = "Create an array and append some animal names"
#prompt = "give an exemple of constructor"
#prompt="create an array 'messages' that contains a u128, a u32, a u256"
#prompt = "create a structure for mailAccount"
#prompt = "create an array of felt and append 1 to the array"
#prompt = "create a felt and affect it a value of 1"
#prompt="create a function for fibonacci"
#prompt = "what are spans used for"
#prompt = "How do I know if an array is empty"
prompt = "what makes Cairo special"
text =f"[INST]I'm working in Cairo. You are a cairo expert and know no other computer language, answer in less than 200 words: {prompt} [/INST]"

input_ids = tokenizer(text, return_tensors="pt", truncation=True).input_ids.cuda()

outputs = model.generate(input_ids=input_ids,       
    max_new_tokens=200,
    do_sample=True,
    top_k=50,
    top_p=0.95,
    temperature=0.01,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    repetition_penalty=1.15
    )


print(f"Prompt:\n{prompt}\n")
print(f"Answer:\n{tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(text):]}")


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Prompt:
create a structure for mailAccount

Answer:

    #[derive(Copy, Drop)]
    struct MailAccount {
        username: felt252,
        password: felt252,
        email: felt252,
    }
Explanation:
- This code defines a structure called MailAccount that contains three fields: username, password, and email. Each field is of a specific data type: username and password are both of type felt252 (which represents a long</s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></


In [8]:
import torch

#prompt = "Create an array and append some animal names"
#prompt = "give an exemple of constructor"
#prompt="create an array 'messages' that contains a u128, a u32, a u256"
prompt = "create a structure for mailAccount"
#prompt = "create an array of felt and append 1 to the array"
#prompt = "create a felt and affect it a value of 1"
#prompt="create a function for fibonacci"
#prompt = "what are spans used for"
#prompt = "How do I know if an array is empty"
text =f"[INST]I'm working in Cairo. You are a cairo expert and know no other computer language, answer in less than 200 words: {prompt} [/INST]"


model_input = tokenizer(text, return_tensors="pt").to("cuda")



model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST]I'm working in Cairo. You are a cairo expert and know no other computer language, answer in less than 200 words: create a structure for mailAccount [/INST]
    #[derive(Copy, Drop)]
    struct MailAccount {
        username: felt252,
        password: felt252,
        email: felt252,
    }
Explanation:
- This code defines a structure called MailAccount that contains three fields: username, password, and email. Each field is of a specific data type: username and password are both of type felt252 (which represents a long</s>
