In [None]:
# 1️⃣ Install
!pip install --upgrade pip
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install trl accelerate bitsandbytes datasets

In [None]:


# 2️⃣ Imports
import torch
from unsloth import FastLanguageModel, is_bfloat16_supported
from transformers import AutoTokenizer, TrainingArguments
from trl import SFTTrainer
from datasets import load_dataset

# 3️⃣ Config & Load
MODEL_NAME    = "google/gemma-2-2b"
max_seq_length = 512
dtype          = torch.float16       # T4 favors fp16
load_in_4bit   = True                # QLoRA-style 4-bit weights

# This handles quantization under the hood
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name    = MODEL_NAME,
    max_seq_length= max_seq_length,
    dtype         = dtype,
    load_in_4bit  = load_in_4bit,
)

# 4️⃣ Patch in LoRA adapters
model = FastLanguageModel.get_peft_model(
    model,
    r                     = 8,
    target_modules       = ["q_proj", "v_proj"],
    lora_alpha           = 16,
    lora_dropout         = 0.05,
    bias                 = "none",
    use_gradient_checkpointing = "unsloth",
    random_state         = 42,
)


In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"


In [None]:
# Load Alpaca-cleaned
ds_chat    = load_dataset("yahma/alpaca-cleaned")["train"]
train_chat = ds_chat.shuffle(42).select(range(1000))
eval_chat  = ds_chat.shuffle(42).select(range(100,300))

# Format into a single "text" field
EOS = tokenizer.eos_token
def format_chat(ex):
    instr = ex["instruction"]
    inp   = ex["input"]
    out   = ex["output"]
    prompt = (
        "### Instruction:\n" + instr +
        (("\n### Input:\n"+inp) if inp else "") +
        "\n### Response:\n" + out + EOS
    )
    return {"text": prompt}

# use the exact same format_chat you had, but map with batched=False
train_chat = train_chat.map(format_chat, batched=False, remove_columns=ds_chat.column_names)
eval_chat  = eval_chat.map (format_chat, batched=False, remove_columns=ds_chat.column_names)


# Trainer for chat
chat_args = TrainingArguments(
    per_device_train_batch_size   = 2,
    gradient_accumulation_steps   = 1,
    num_train_epochs              = 1,
    logging_steps                 = 50,
    save_steps                    = 200,
    fp16                          = True,
    output_dir                    = "./lora_chat"
)

trainer_chat = SFTTrainer(
    model               = model,
    tokenizer           = tokenizer,
    train_dataset       = train_chat,
    eval_dataset        = eval_chat,
    dataset_text_field  = "text",
    max_seq_length      = max_seq_length,
    args                = chat_args,
)

trainer_chat.train()


In [None]:
# Load MBPP
ds_code     = load_dataset("commit0/mbpp")
train_code  = ds_code["train"].shuffle(42).select(range(300))
eval_code   = ds_code["validation"].shuffle(42).select(range(90))

def format_code(ex):
    src = ex["prompt"]
    sol = ex["canonical_solution"]
    return {"text": (src + "\n" + sol + tokenizer.eos_token)}

train_code = train_code.map(format_code, batched=False, remove_columns=ds_code["train"].column_names)
eval_code  = eval_code.map(format_code,  batched=False, remove_columns=ds_code["validation"].column_names)

# Trainer for code
code_args = TrainingArguments(
    per_device_train_batch_size   = 2,
    gradient_accumulation_steps   = 1,
    num_train_epochs              = 1,
    logging_steps                 = 50,
    save_steps                    = 200,
    fp16                          = True,
    output_dir                    = "./lora_code"
)

trainer_code = SFTTrainer(
    model               = model,
    tokenizer           = tokenizer,
    train_dataset       = train_code,
    eval_dataset        = eval_code,
    dataset_text_field  = "text",
    max_seq_length      = max_seq_length,
    args                = code_args,
)

trainer_code.train()


## Part B

In [None]:
#part B

# 1️⃣ Load & preprocess a French text corpus (Wikipedia)
from datasets import load_dataset

# take the first 10 k articles of French Wiki
ds_fr = load_dataset("wikipedia", "20220301.fr", split="train[:10000]")

# quick filter out any empty pages
ds_fr = ds_fr.filter(lambda x: len(x["text"].strip()) > 0)

# tokenize to IDs
def tokenize_fr(ex):
    return tokenizer(ex["text"], truncation=True, max_length=max_seq_length)

ds_fr = ds_fr.map(tokenize_fr, batched=True, remove_columns=["title", "text"])


In [None]:

# This gives you the original Gemma-2 backbone again, without any adapters.
model_base, tokenizer = FastLanguageModel.from_pretrained(
    model_name     = MODEL_NAME,
    max_seq_length = max_seq_length,
    dtype          = torch.float16,  # keep your 4-bit + fp16 setup
    load_in_4bit   = True,
)

# ─── Now apply the CPT LoRA adapters to this fresh model ────────────────────
model_cpt = FastLanguageModel.get_peft_model(
    model_base,
    r              = 16,
    target_modules = [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
        "lm_head", "embed_tokens",
    ],
    lora_alpha   = 16,
    lora_dropout = 0.05,
)



## Part C

In [None]:
# ─── Part C: Chat Templates & Multi‐Task Finetuning ──────────────────────────
device = next(model.parameters()).device

# C1. Zero‐Shot Classification
def classify(text):
    prompt = f"### Instruction:\nClassify sentiment\n### Input:\n{text}\n### Response:\n"
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    out = model.generate(**inputs, max_new_tokens=16)
    return tokenizer.decode(out[0], skip_special_tokens=True)

print(classify("I love this product!"))

# C2. Stateful Conversational Chat
# ─── Simple Stateful Chat Loop (no extra imports) ───────────────────────────

device = next(model.parameters()).device
history = []  # will hold tuples of (speaker, text)

def chat_step(user_input: str, max_new_tokens=64):
    # 1) append user
    history.append(("User", user_input))
    # 2) build the full prompt
    prompt = ""
    for speaker, text in history:
        if speaker == "User":
            prompt += f"### User: {text}\n"
        else:
            prompt += f"### Assistant: {text}\n"
    prompt += "### Assistant:"  # the model completes this

    # 3) tokenize + generate
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    out = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True,
        temperature=0.7,
    )
    # 4) extract only the newly generated portion
    gen = out[0][ inputs.input_ids.shape[-1] : ]
    response = tokenizer.decode(gen, skip_special_tokens=True).strip()

    # 5) append assistant
    history.append(("Assistant", response))
    return response

# ─── Example Usage ───────────────────────────────────────────────────────────
print(chat_step("Hi there! How are you today?"))
print(chat_step("Can you tell me a joke?"))
print(chat_step("Thanks, that was fun. What's the weather like in Paris?"))


In [None]:
# — C3 (small): Extend GPT-2’s Context Window to 2048 —
from transformers import AutoTokenizer, AutoModelForCausalLM

MODEL = "gpt2"  # 124M, ~500MB on disk
tok = AutoTokenizer.from_pretrained(MODEL)
model_small = AutoModelForCausalLM.from_pretrained(MODEL, device_map="auto")

# 1) Desired new context length
NEW_CTX = 2048

# 2) Grab & repeat the positional embeddings
# GPT-2 stores them in model.transformer.wpe
old_pos = model_small.transformer.wpe.weight.data             # [1024, hidden]
repeat  = NEW_CTX // old_pos.size(0)                          # =2
model_small.transformer.wpe.weight.data = old_pos.repeat(repeat, 1)

# 3) Update config
model_small.config.n_positions = NEW_CTX
model_small.config.n_ctx       = NEW_CTX

# 4) Quick test
prompt = "Hello " * 300   # ~300 tokens
ids    = tok(prompt, return_tensors="pt").input_ids.to(model_small.device)
out    = model_small.generate(ids, max_new_tokens=20)
print("✅ Extended to", model_small.config.n_positions, "tokens.")
print(tok.decode(out[0], skip_special_tokens=True))

