In [None]:
import json

file = json.load(open("dataset_example.jsonl", "r"))
print(file[3])

In [None]:
!pip install unsloth trl peft accelerate bitsandbytes

In [None]:
import torch
from datasets import Dataset
from transformers import TrainingArguments
from trl import SFTTrainer
from unsloth import FastLanguageModel


In [None]:
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")

In [None]:
from unsloth import FastLanguageModel
import torch

model_name = "unsloth/Qwen3-1.7B-unsloth-bnb-4bit"

max_seq_length = 2048  
dtype = None  

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=True,
)

In [None]:
def format_qwen3_prompt(example):
    """Format data for Qwen3 chat template"""
    
    messages = [
        {"role": "user", "content": example['input']},
        {"role": "assistant", "content": example['output']}
    ]


    
    formatted = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=False
    )

    return formatted

formatted_data = [format_qwen3_prompt(item) for item in file]
dataset = Dataset.from_dict({"text": formatted_data})

print(f"Dataset formatted for Qwen3! Total examples: {len(dataset)}")
print("\nExample formatted prompt:")
print(formatted_data[0][:500] + "..." if len(formatted_data[0]) > 500 else formatted_data[0])

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=32,  
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
        
        "embed_tokens", "lm_head"
    ],
    lora_alpha=64,
    lora_dropout=0.1,  
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

print("LoRA adapters configured for Qwen3!")

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    args=TrainingArguments(
        per_device_train_batch_size=1,  
        gradient_accumulation_steps=8,  
        warmup_steps=20,
        num_train_epochs=8,  
        learning_rate=1e-4,  
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=5,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="cosine",  
        seed=3407,
        output_dir="qwen3_clone_outputs",
        save_strategy="epoch",
        save_total_limit=2,
        dataloader_pin_memory=False,
        report_to="none",
        remove_unused_columns=False,
        max_grad_norm=1.0,  
    ),
)

print("Training configured for Qwen3!")

In [None]:
print("Starting Qwen3 fine-tuning...")
trainer_stats = trainer.train()
print("Training completed!")

In [None]:
FastLanguageModel.for_inference(model)

def test_qwen3_clone(question):
    """Test the Qwen3 personal clone with a question"""
    messages = [
        {"role": "user", "content": question}
    ]

   
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")

   
    outputs = model.generate(
        input_ids=inputs,
        max_new_tokens=300,
        use_cache=True,
        temperature=0.6,
        do_sample=True,
        top_p=0.8,
        repetition_penalty=1.1,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )

    
    response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

    
    if "assistant" in response:
        response_parts = response.split("assistant")
        if len(response_parts) > 1:
            response = response_parts[-1].strip()

    return response


test_questions = [
    "Тестовый вопрос"

]

print("\n" + "="*60)
print("TESTING QWEN3 PERSONAL CLONE")
print("="*60)

for i, question in enumerate(test_questions, 1):
    print(f"\n[{i}] Вопрос: {question}")
    try:
        answer = test_qwen3_clone(question)
        print(f"    Ответ: {answer}")
    except Exception as e:
        print(f"    Ошибка: {e}")
    print("-" * 40)


In [None]:
model.save_pretrained_gguf("gguf_model", tokenizer, quantization_method="f16")

In [None]:
from google.colab import files
import os

gguf_files = [f for f in os.listdir("gguf_model") if f.endswith(".gguf")]
if gguf_files:
    gguf_file = os.path.join("gguf_model", gguf_files[0])
    print(f"Downloading: {gguf_file}")
    files.download(gguf_file)