In [32]:
pip install transformers datasets torch accelerate peft


Note: you may need to restart the kernel to use updated packages.


Load the Model and Install Dependencies

In [13]:
# load_model.py
from transformers import AutoModelForCausalLM

def load_model(model_name):
    # Load model
    model = AutoModelForCausalLM.from_pretrained(model_name)
    return model

if __name__ == "__main__":
    model_name = "AINovice2005/LeEmpereur-unhealed"
    model = load_model(model_name)

# load_tokenizer.py
from transformers import AutoTokenizer

def load_tokenizer(model_name):
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return tokenizer

if __name__ == "__main__":
    model_name = "AINovice2005/LeEmpereur"
    tokenizer = load_tokenizer(model_name)


config.json:   0%|          | 0.00/655 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/6.87k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.39G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of MistralForCausalLM were not initialized from the model checkpoint at AINovice2005/LeEmpereur-unhealed and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Write arguments for the Dataset Mapping and Trainer.

In [18]:
from transformers import Trainer, AutoModelForCausalLM, AutoTokenizer, TrainingArguments,AdamW
import torch
from datasets import load_dataset
from peft import get_peft_model, LoraConfig, TaskType

# Define training arguments
def get_training_args(output_dir="./results2", epochs=2, batch_size=2, grad_accum=16, save_steps=3000, log_steps=100):
    return TrainingArguments(
        output_dir=output_dir,
        overwrite_output_dir=True,
        evaluation_strategy="no",  # Change to "epoch" or "steps" if you have an eval dataset
        learning_rate=1e-2,
        weight_decay=0.01,
        per_device_train_batch_size=batch_size,
        gradient_accumulation_steps=grad_accum,
        fp16=True,  # Mixed precision training for large models
        gradient_checkpointing=True,  # Enable gradient checkpointing
        num_train_epochs=epochs,
        logging_steps=log_steps,
        save_steps=save_steps,
        save_total_limit=3,  # Keep only the last 3 checkpoints
        max_steps=3000,
        report_to="tensorboard",  # Log to TensorBoard
    )

# Load and preprocess dataset
def load_and_preprocess_dataset(tokenizer, dataset_name="mlabonne/FineTome-100k", split="train[:25%]", max_length=512):
    dataset = load_dataset(dataset_name, split=split)

    def preprocess_function(examples):
        def extract_text(conversation):
            if isinstance(conversation, list):
                texts = []
                for turn in conversation:
                    if isinstance(turn, dict) and 'content' in turn:
                        texts.append(turn['content'])
                    elif isinstance(turn, str):
                        texts.append(turn)
                return ' '.join(texts)
            elif isinstance(conversation, str):
                return conversation
            else:
                raise ValueError(f"Unexpected type for conversation: {type(conversation)}")
        texts = [extract_text(conv) for conv in examples['conversations']]
        tokenized_inputs = tokenizer(texts, padding="max_length", truncation=True, max_length=max_length)
        tokenized_inputs["labels"] = tokenized_inputs["input_ids"].copy()
        return tokenized_inputs

    train_dataset = dataset.map(preprocess_function, batched=True, num_proc=4)
    return train_dataset

# Load model and tokenizer
model_name = "AINovice2005/LeEmpereur"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Get training arguments
training_args = get_training_args()

# LoRA configuration
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM, 
    r=8, 
    lora_alpha=32, 
    target_modules=["self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj", "self_attn.o_proj", "mlp.gate_proj", "mlp.down_proj", "mlp.up_proj", "lm_head"],
    lora_dropout=0.1, 
    bias="none"
)

# Apply LoRA to the model
model = get_peft_model(model, lora_config)

# Load dataset (only 1% of the total dataset)
train_dataset = load_and_preprocess_dataset(tokenizer, split="train[:25%]")

optimizer = AdamW(model.parameters(), lr=1e-4, weight_decay=0.01)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    optimizers=(optimizer, None)
)

# Start training
trainer.train()

# Save the model
trainer.save_model("./trained_model")
tokenizer.save_pretrained("./trained_model")

print("Training complete and model saved.")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

max_steps is given, it will override any value given in num_train_epochs


Step,Training Loss
100,24.5534
200,0.0126
300,0.0101
400,0.0086
500,0.0072
600,0.006
700,0.0048
800,0.004
900,0.0033
1000,0.0029


Training complete and model saved.


Save The Model

In [9]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Replace with your base model, e.g., GPT-2 or any other model
model_name = "AINovice2005/LeEmpereur"

# Load and save the model and tokenizer
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Save the model locally
model.save_pretrained("path_to_save_model")
tokenizer.save_pretrained("path_to_save_model")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

('path_to_save_model/tokenizer_config.json',
 'path_to_save_model/special_tokens_map.json',
 'path_to_save_model/tokenizer.json')

Load the Model For Testing

In [10]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load the tokenizer and model from the local directory
model_path = "/teamspace/studios/this_studio/path_to_save_model"  # e.g., 'my_local_model/'
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)

# Move the model to the correct device (GPU or CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [15]:
# Prepare input text
input_text = "Hi,how are you doing?"
inputs = tokenizer(input_text, return_tensors="pt").to(device)


In [16]:
# Perform inference
with torch.no_grad():
    outputs = model.generate(**inputs, max_new_tokens=50, do_sample=True)

# Decode the output text
output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"Model Output: {output_text}")


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Model Output: Hi,how are you doing?

A circle with radius 14 and the sum of its digits are to make a rolls (You's a non positive face has square centimidor, and a positive number of number its sum and two digit partates in


Upload the Model on the Hugging Face Hub

In [1]:
pip install huggingface_hub

Note: you may need to restart the kernel to use updated packages.


In [2]:
from huggingface_hub import notebook_login

# This will prompt you to log in through your browser in a notebook-friendly way
notebook_login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [8]:
from huggingface_hub import HfApi, HfFolder

# Define your directory and model ID
local_directory = "/teamspace/studios/this_studio/path_to_save_model"
model_id = "AINovice2005/LeEmpereur-final"

# Initialize the API
api = HfApi()

# Upload the directory to the Hub
api.upload_folder(
    folder_path=local_directory,
    repo_id=model_id,
    repo_type="model"
)

model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.91G [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

CommitInfo(commit_url='https://huggingface.co/AINovice2005/LeEmpereur-final/commit/caf619bfe2cd8355b852c53f0931db167214fa24', commit_message='Upload folder using huggingface_hub', commit_description='', oid='caf619bfe2cd8355b852c53f0931db167214fa24', pr_url=None, repo_url=RepoUrl('https://huggingface.co/AINovice2005/LeEmpereur-final', endpoint='https://huggingface.co', repo_type='model', repo_id='AINovice2005/LeEmpereur-final'), pr_revision=None, pr_num=None)