In [1]:
import torch
print("torch.cuda:", torch.cuda.is_available(), "device_count:", torch.cuda.device_count())


torch.cuda: False device_count: 0


In [4]:
!pip install transformers datasets peft bitsandbytes accelerate
!pip install google-colab


Collecting jedi>=0.16 (from ipython==7.34.0->google-colab)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading jedi-0.19.2-py2.py3-none-any.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m39.4 MB/s[0m  [33m0:00:00[0m
[?25hInstalling collected packages: jedi
Successfully installed jedi-0.19.2


In [12]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
from huggingface_hub import login
login(token="HF-TOKEN") # replace HF-TOKEN with your actual token


In [3]:
from datasets import load_dataset

# Load Alpaca dataset (tiny sample for demo)
dataset = load_dataset("tatsu-lab/alpaca")
dataset = dataset["train"]

print("Sample:", dataset[0])
print("Dataset size:", len(dataset))


Sample: {'instruction': 'Give three tips for staying healthy.', 'input': '', 'output': '1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule.', 'text': 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nGive three tips for staying healthy.\n\n### Response:\n1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule.'}
Dataset size: 52002


In [4]:
from transformers import AutoTokenizer

model_name = "mistralai/Mistral-7B-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)

# Ensure pad_token is set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token


In [6]:
from datasets import DatasetDict

def format_example(example):
    instruction = example.get("instruction", "")
    input_text = example.get("input", "")
    response = example.get("output", "")

    if input_text:
        prompt = f"### Instruction:\n{instruction}\n\n### Input:\n{input_text}\n\n### Response:\n{response}"
    else:
        prompt = f"### Instruction:\n{instruction}\n\n### Response:\n{response}"

    return {"text": prompt}

processed_dataset = dataset.map(format_example)


In [7]:
if isinstance(processed_dataset, DatasetDict):
    print("Dataset is already split.")
else:
    split_point = len(processed_dataset) // 2
    processed_dataset = DatasetDict({
        "train": processed_dataset.select(range(split_point)),
        "test": processed_dataset.select(range(split_point, len(processed_dataset)))
    })

print(processed_dataset.keys())  # ['train', 'test']


dict_keys(['train', 'test'])


In [8]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)

tokenized_dataset = processed_dataset.map(tokenize_function, batched=True, remove_columns=["text"])

print(tokenized_dataset["train"][0])


Map:   0%|          | 0/26001 [00:00<?, ? examples/s]

Map:   0%|          | 0/26001 [00:00<?, ? examples/s]

{'instruction': 'Give three tips for staying healthy.', 'input': '', 'output': '1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule.', 'input_ids': [1, 27332, 3133, 3112, 28747, 13, 28777, 495, 1712, 10636, 354, 13465, 7783, 28723, 13, 13, 27332, 12107, 28747, 13, 28740, 28723, 28749, 270, 264, 19971, 9751, 304, 1038, 1864, 298, 3024, 8969, 302, 21566, 304, 19045, 28723, 28705, 13, 28750, 28723, 1529, 25451, 12274, 298, 1840, 574, 2187, 5038, 304, 2967, 28723, 28705, 13, 28770, 28723, 2483, 2066, 4289, 304, 9087, 264, 10652, 4289, 9442, 28723, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2

In [None]:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
import torch

# Clear GPU memory
torch.cuda.empty_cache()

# 4-bit quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    llm_int8_enable_fp32_cpu_offload=True
)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="cpu",
    torch_dtype=torch.float32,  # safer for CPU
    trust_remote_code=True
)

# 🔹 Do NOT enable gradient checkpointing
# model.gradient_checkpointing_enable()  # Commented for stability

# LoRA configuration
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)


Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/mistral_lora_out",
    per_device_train_batch_size=1,          # 🔑 small batch
    gradient_accumulation_steps=8,          # simulate bigger batch
    warmup_steps=20,
    max_steps=50,                           # 🔑 tiny demo for Colab Free
    learning_rate=2e-4,
    fp16=True,
    logging_steps=5,
    save_strategy="steps",
    save_steps=25,
    report_to="wandb"                       # optional
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    data_collator=data_collator
)


In [None]:
trainer.train()

# Save trained model
trainer.save_model("/content/drive/MyDrive/mistral_lora_out/final")


In [None]:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
import torch
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    llm_int8_enable_fp32_cpu_offload=True
)

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    device_map="auto",
    trust_remote_code=True
)

from peft import PeftModel

ft_model = PeftModel.from_pretrained(
    base_model,
    "/content/drive/MyDrive/mistral_lora_out/final"
)
ft_model.eval()


In [None]:
def chat_with_model(prompt, max_new_tokens=128):
    torch.cuda.empty_cache()
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    with torch.no_grad():
        outputs = ft_model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            top_p=0.9,
            temperature=0.7,
            use_cache=False
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Test
input_prompt = "your token numbers"
print(chat_with_model(input_prompt))
