In [None]:
# Import required libraries
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from datasets import Dataset, DatasetDict
import torch
import os
import subprocess
import threading
import time

from huggingface_hub import login
from kaggle_secrets import UserSecretsClient

import torch
print(torch.cuda.is_available())


# Install bitsandbytes
!pip install bitsandbytes

# Get Hugging Face token from Kaggle secrets
user_secrets = UserSecretsClient()
hf_token = user_secrets.get_secret("HF_TOKEN")

# Log in to Hugging Face
login(token=hf_token)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Model and paths
model_name = "meta-llama/Llama-3.2-1B"
model_path = "/kaggle/working/base_model"

# Load model and tokenizer
try:
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(model_path).to(device)
    print("Loaded model from local directory...")
except:
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
    model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
    model.save_pretrained(model_path)
    tokenizer.save_pretrained(model_path)
    print("Downloaded and saved model...")

tokenizer.pad_token = tokenizer.eos_token

# Load and process dataset
data_dir = "/kaggle/input/hunting-beast-youtube-transcripts"
all_text = ""
for file in os.listdir(data_dir):
    file_path = os.path.join(data_dir, file)
    with open(file_path, "r", encoding="utf-8") as f:
        all_text += f.read() + " "

chunk_size = 2048
text_chunks = [{"text": all_text[i:i + chunk_size]} for i in range(0, len(all_text), chunk_size)]
full_dataset = Dataset.from_list(text_chunks)

split_ratio = 0.8
train_size = int(split_ratio * len(full_dataset))
train_dataset = full_dataset.select(range(train_size))
test_dataset = full_dataset.select(range(train_size, len(full_dataset)))
dataset = DatasetDict({"train": train_dataset, "test": test_dataset})

# GPU monitoring thread
def monitor_gpu():
    while True:
        try:
            output = subprocess.check_output(['nvidia-smi'], stderr=subprocess.STDOUT)
            print(output.decode('utf-8'))
            if torch.cuda.is_available():
                print(f"PyTorch CUDA memory allocated: {torch.cuda.memory_allocated() / (1024**3):.2f} GB")
        except subprocess.CalledProcessError as e:
            print(f"Error executing nvidia-smi: {e.output.decode('utf-8')}")
        except FileNotFoundError:
            print("nvidia-smi not found. Make sure NVIDIA drivers are installed.")
        except Exception as e:
            print(f"An unexpected error occurred: {e}")
        time.sleep(5)

gpu_monitor_thread = threading.Thread(target=monitor_gpu, daemon=True)
gpu_monitor_thread.start()

# Fine-tuning function
def fine_tune_LLM(model):
    def tokenize_function(examples):
        return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=2048, return_tensors="pt").to(device)

    tokenized_data = dataset.map(tokenize_function, batched=True, remove_columns=["text"])

    model.train()
    model = prepare_model_for_kbit_training(model)

    peft_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj'],
        lora_dropout=0.1,
        bias="none",
        task_type="CAUSAL_LM"
    )
    model = get_peft_model(model, peft_config)
    model.print_trainable_parameters()

    tokenizer.pad_token = tokenizer.eos_token
    data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

    training_args = TrainingArguments(
        output_dir="fine_tuned_model",
        gradient_checkpointing=True,
        gradient_checkpointing_kwargs={'use_reentrant': False},
        overwrite_output_dir=True,
        learning_rate=1e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=1,
        weight_decay=0.01,
        logging_dir="logs",
        logging_strategy="epoch",
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        fp16=True,
        optim="paged_adamw_8bit",
    )

    trainer = Trainer(
        model=model,
        train_dataset=tokenized_data["train"],
        eval_dataset=tokenized_data["test"],
        args=training_args,
        data_collator=data_collator
    )

    print("\nBeginning to train model...")
    trainer.train()

    eval_results = trainer.evaluate()
    return eval_results["eval_loss"]

# Run fine-tuning
eval_loss = fine_tune_LLM(model)
print(f"Final Evaluation Loss: {eval_loss}")