<a href="https://colab.research.google.com/github/Rithan377/llama86mil/blob/main/llama85mil1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 🛠️ Install dependencies
!pip install -q datasets transformers peft huggingface_hub

# ✅ Imports
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from peft import LoraConfig, get_peft_model, TaskType

# ✅ Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = "meta-llama/Prompt-Guard-86M"
hf_token = ""  # Leave blank or use os.getenv("HF_TOKEN") if needed securely

# ✅ Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
model = AutoModelForSequenceClassification.from_pretrained(model_name, token=hf_token)

# ✅ Apply LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    task_type=TaskType.SEQ_CLS,
    target_modules=[
        "embed_tokens",
        "self_attn.q_proj",
        "self_attn.k_proj",
        "self_attn.v_proj",
        "self_attn.o_proj",
        "mlp.up_proj",
        "mlp.down_proj"
    ],
    bias="none",
)
model = get_peft_model(model, lora_config)
model = model.to(device)

# ✅ Load dataset
ds = load_dataset("buio/heart-disease")

# ✅ Convert structured data to text
def row_to_text(batch):
    return {
        "text": [
            f"Age: {batch['age'][i]}, Sex: {batch['sex'][i]}, Chest Pain Type: {batch['cp'][i]}, "
            f"Resting BP: {batch['trestbps'][i]}, Cholesterol: {batch['chol'][i]}, "
            f"Fasting Blood Sugar: {batch['fbs'][i]}, Max Heart Rate: {batch['thalach'][i]}, "
            f"Exercise Induced Angina: {batch['exang'][i]}, Depression: {batch['oldpeak'][i]}, "
            f"Slope: {batch['slope'][i]}, Major Vessels: {batch['ca'][i]}, Thalassemia: {batch['thal'][i]}, "
            f"Target: {batch['target'][i]}"
            for i in range(len(batch['age']))
        ]
    }

ds_text = ds.map(row_to_text, batched=True)

# ✅ Tokenization
def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)

tokenized_ds = ds_text.map(tokenize_function, batched=True)
tokenized_ds = tokenized_ds.rename_column("target", "label")
tokenized_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

# ✅ Train/Validation split
split_dataset = tokenized_ds["train"].train_test_split(test_size=0.1)
train_dataset = split_dataset["train"]
eval_dataset = split_dataset["test"]

# ✅ Training config
training_args = TrainingArguments(
    output_dir="./lora_finetuned_model",
    evaluation_strategy="steps",
    save_strategy="steps",
    eval_steps=100,
    save_steps=500,
    logging_dir="./logs",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    num_train_epochs=10,
    learning_rate=2e-5,
    fp16=torch.cuda.is_available(),
    report_to="none",
)

# ✅ Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)

# ✅ Start training
trainer.train()

# ✅ Inference helper
def query_model(input_text: str):
    inputs = tokenizer(
        input_text,
        return_tensors="pt",
        truncation=True,
        padding="max_length",
        max_length=512
    ).to(device)

    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_class = torch.argmax(logits, dim=-1).item()
    return predicted_class

# ✅ Example inference
input_text = "Age: 63, Sex: 1, Chest Pain Type: 3, Resting BP: 145, Cholesterol: 233, Fasting Blood Sugar: 1, Max Heart Rate: 150, Exercise Induced Angina: 0, Depression: 2.3, Slope: 3, Major Vessels: 0, Thalassemia: 2"
predicted_class = query_model(input_text)
print(f"Predicted class: {predicted_class}")
