# Nizami-1.7B CoT Fine-Tune (Unsloth + LoRA)  
> GitHub Actions tarafından otomatik trigger edilir.

In [None]:
# ========== 1. Environment & Secrets ==========
import os, subprocess, torch
HF_TOKEN   = os.getenv("HF_TOKEN")
WANDB_KEY  = os.getenv("WANDB_API_KEY")
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")  # private repo clone için

# Kaggle secret’ları notebook’a düşerken otomatik gelir
assert HF_TOKEN and WANDB_KEY, "HF_TOKEN veya WANDB_API_KEY eksik!"

In [None]:
# ========== 2. Repo’yu çek ==========
REPO_URL = f"https://{GITHUB_TOKEN}@github.com/Rustamshry34/LLMOps.git"
!git clone $REPO_URL repo
%cd repo
!pip install -r docker/requirements.txt

In [None]:
# ========== 4. Train (pipeline.txt ile aynı hiper-parametreler) ==========
from datasets import load_dataset
from unsloth import FastLanguageModel
from trl import SFTTrainer, SFTConfig
import torch
from huggingface_hub import login
login(HF_TOKEN)

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Qwen3-0.6B",
    max_seq_length=5000,
    dtype=torch.float16,
    load_in_4bit=False,
    device_map={"": 0}
)

model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.0,
    bias="none",
    use_gradient_checkpointing=False
)

# ----- data -----
reasoning_ds = load_dataset("moremilk/CoT_Temporal_Reasoning_Dataset", split="train")
non_reasoning_ds = load_dataset("mlabonne/FineTome-100k", split="train")

def generate_conversation(examples):
    problems, meta, answers = examples["question"], examples["metadata"], examples["answer"]
    conversations = []
    for p, m, a in zip(problems, meta, answers):
        resp = f"<think>{m.get('reasoning', '')}</think>\n\n<answer>{a}</answer>"
        conversations.append([
            {"role": "user", "content": p},
            {"role": "assistant", "content": resp}
        ])
    return {"conversations": conversations}

reasoning_ds = reasoning_ds.map(generate_conversation, batched=True, remove_columns=reasoning_ds.column_names)
reasoning_text = [tokenizer.apply_chat_template(conv, tokenize=False) for conv in reasoning_ds["conversations"]]

from unsloth.chat_templates import standardize_sharegpt
non_ds = standardize_sharegpt(non_reasoning_ds)
non_text = tokenizer.apply_chat_template(non_ds["conversations"], tokenize=False)

import pandas as pd, datasets
data = pd.Series(reasoning_text + non_text.sample(n=0, random_state=2407).tolist())  # %0 non-reasoning
data.name = "text"
combined_ds = datasets.Dataset.from_pandas(pd.DataFrame(data)).shuffle(seed=3407)

# ----- training -----
from trl import SFTTrainer, SFTConfig
import wandb
wandb.login(key=WANDB_KEY)

args = SFTConfig(
    output_dir="./outputs",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    num_train_epochs=2,
    learning_rate=3e-5,
    warmup_steps=61,
    lr_scheduler_type="cosine",
    logging_steps=10,
    fp16=True,
    report_to="wandb",
    run_name="nizami-1.7B-cot-kaggle",
    max_seq_length=5000,
    dataset_text_field="text",
    dataloader_num_workers=16,
    weight_decay=0.01
)

trainer = SFTTrainer(model=model, tokenizer=tokenizer, args=args, train_dataset=combined_ds)
trainer.train()
model.save_pretrained("./outputs")
tokenizer.save_pretrained("./outputs")
wandb.finish()

In [None]:
# ========== 5. HF Hub’a yükle ==========
!python scripts/upload_model.py

In [None]:
# ========== 6. Kaggle çıktısını GitHub’a bildir (status = complete) ==========
print("✅ Kaggle eğitimi tamamlandı, model HF Hub’da.")