In [7]:
# =========================================
# 1. Preparación del entorno
# =========================================
!pip install -q transformers accelerate bitsandbytes peft datasets


In [8]:
# =========================================
# 2. Imports
# =========================================
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
#Parameter efficient fine tunning
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
model_name = "mistralai/Mistral-7B-Instruct-v0.2"


In [16]:
# =========================================
# 3. Modelo base (Mistral 7B Instruct)
# =========================================
from huggingface_hub import login
from google.colab import userdata
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# Hugging Face login
hf_token = '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'
if hf_token:
    login(token=hf_token)
else:
    print("Hugging Face token not found in Colab secrets. Please add it to proceed.")

# Quantization config (instead of load_in_4bit=True)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype="float16",   # try "bfloat16" if GPU supports it
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load base model with 4-bit quantization
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

# Preparar modelo para fine-tuning LoRA
model = prepare_model_for_kbit_training(model)

# Configuración de LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Inyectar LoRA en el modelo
model = get_peft_model(model, lora_config)


ImportError: Using `bitsandbytes` 4-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`

In [None]:
# =========================================
# 4. Dataset Joshua-like (ejemplo mini)
# =========================================

#get from github the dataset
!wget https://raw.githubusercontent.com/AKRIS21/JGraham/main/jg_lines.json

from datasets import load_dataset

# Load dataset (assuming JSON)
dataset = load_dataset("json", data_files="jg_lines.json")

# Convert chat into plain text format (instruction/response style)
def format_example(example):
    user_msg = ""
    assistant_msg = ""

    for msg in example["messages"]:
        if msg["role"] == "user":
            user_msg = msg["content"]
        elif msg["role"] == "assistant":
            assistant_msg = msg["content"]

    return {"text": f"User: {user_msg}\nJoshua: {assistant_msg}"}

dataset = dataset.map(format_example)
train_data = dataset["train"]

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#=========================================
# 5. Entrenamiento LoRA
# =========================================
training_args = TrainingArguments(
    output_dir="./jgraham-lora",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    num_train_epochs=3,
    logging_steps=10,
    save_strategy="epoch",
    fp16=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    tokenizer=tokenizer
)

trainer.train()

# Guardar LoRA en Drive
model.save_pretrained("/content/drive/MyDrive/jgraham-lora")

In [3]:
# =========================================
# 6. Uso del modelo Joshua
# =========================================
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load model + LoRA
base_model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True, device_map="auto")
model = PeftModel.from_pretrained(base_model, "/content/drive/MyDrive/jgraham-lora")
tokenizer = AutoTokenizer.from_pretrained(model_name)

# List of prompts
prompts = [
    "Usuario: No quiero hacer más, estoy cansado.\nJoshua Graham:",
    "Usuario: ¿Crees que Dios perdona a todos?\nJoshua Graham:",
    "Usuario: He visto a Caesar, ¿qué piensas de él?\nJoshua Graham:"
]

# Generate answers for each
for prompt in prompts:
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=120, do_sample=True, temperature=0.7)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print("="*50)
    print(response)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2.
401 Client Error. (Request ID: Root=1-68acae46-6f5ab98c6aefc2c433a3a571;6670f245-6151-4fa0-9ab0-a4ccb1e71812)

Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/config.json.
Access to model mistralai/Mistral-7B-Instruct-v0.2 is restricted. You must have access to it and be authenticated to access it. Please log in.