# QWEN3 Turkish Training - Quick Start\n\nBu notebook QWEN3 modelini Türkçe veri seti ile eğitir.

In [None]:
# Colab kontrolü ve Drive mount\nimport sys\nIS_COLAB = 'google.colab' in sys.modules\n\nif IS_COLAB:\n    from google.colab import drive\n    drive.mount('/content/drive')\n    print('✅ Google Drive mounted')\nelse:\n    print('💻 Running locally')

In [None]:
# Gerekli paketleri yükle\nif IS_COLAB:\n    !pip install -q torch transformers datasets accelerate bitsandbytes\n    !pip install -q peft trl wandb sentencepiece protobuf\n    print('✅ Packages installed')

In [None]:
# Training script'ini yükle ve çalıştır\nimport os\n\n# Script'i GitHub'dan indir\nif IS_COLAB:\n    !wget -q https://raw.githubusercontent.com/HuseyinAts/teknofest-2025-egitim-eylemci/main/notebooks/qwen3_training_production_v4_fixed.py -O /content/training_script.py\n    print('✅ Training script downloaded')\n    \n    # Script'i çalıştır\n    exec(open('/content/training_script.py').read())

In [None]:
# Alternatif: Manuel olarak training başlat\nfrom transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer\nfrom datasets import load_dataset\nimport torch\n\n# Model ve tokenizer yükle\nmodel_name = 'Qwen/Qwen2.5-0.5B'  # Küçük model test için\n\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n    model_name,\n    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,\n    device_map='auto'\n)\n\nprint(f'✅ Model loaded: {model_name}')

In [None]:
# Veri setini yükle\ndataset = load_dataset('Huseyin/turkish-200k-dataset', split='train[:1000]')  # İlk 1000 örnek\n\n# Tokenize fonksiyonu\ndef tokenize_function(examples):\n    return tokenizer(\n        examples['text'],\n        padding='max_length',\n        truncation=True,\n        max_length=512\n    )\n\n# Veriyi tokenize et\ntokenized_dataset = dataset.map(tokenize_function, batched=True)\n\n# Train/test split\nsplit_dataset = tokenized_dataset.train_test_split(test_size=0.1)\ntrain_dataset = split_dataset['train']\neval_dataset = split_dataset['test']\n\nprint(f'✅ Dataset ready: {len(train_dataset)} train, {len(eval_dataset)} eval')

In [None]:
# Training ayarları\ntraining_args = TrainingArguments(\n    output_dir='/content/drive/MyDrive/qwen_checkpoints',\n    num_train_epochs=1,\n    per_device_train_batch_size=4,\n    per_device_eval_batch_size=4,\n    warmup_steps=100,\n    weight_decay=0.01,\n    logging_dir='/content/logs',\n    logging_steps=10,\n    save_steps=500,\n    eval_steps=100,\n    evaluation_strategy='steps',\n    save_strategy='steps',\n    load_best_model_at_end=True,\n    fp16=torch.cuda.is_available(),\n)\n\n# Trainer oluştur\ntrainer = Trainer(\n    model=model,\n    args=training_args,\n    train_dataset=train_dataset,\n    eval_dataset=eval_dataset,\n    tokenizer=tokenizer,\n)\n\nprint('✅ Trainer ready')

In [None]:
# Training başlat\nprint('🚀 Starting training...')\ntrainer.train()\n\n# Modeli kaydet\ntrainer.save_model('/content/drive/MyDrive/qwen_final_model')\ntokenizer.save_pretrained('/content/drive/MyDrive/qwen_final_model')\n\nprint('✅ Training complete! Model saved to Drive.')

In [None]:
# Test et\ndef generate_text(prompt):\n    inputs = tokenizer(prompt, return_tensors='pt').to(model.device)\n    outputs = model.generate(\n        **inputs,\n        max_length=100,\n        temperature=0.7,\n        do_sample=True,\n        top_p=0.95\n    )\n    return tokenizer.decode(outputs[0], skip_special_tokens=True)\n\n# Test örnekleri\ntest_prompts = [\n    'Türkiye\'nin başkenti',\n    'Yapay zeka nedir?',\n    'Matematik çalışmak için'\n]\n\nfor prompt in test_prompts:\n    print(f'\\nPrompt: {prompt}')\n    print(f'Response: {generate_text(prompt)}')