In [1]:
!pip install bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-c

In [2]:
import torch
import random
import numpy as np
import wandb
from torch.utils.data import Dataset
import os
import json
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer,AutoModel,BitsAndBytesConfig,Trainer, TrainingArguments
from peft import prepare_model_for_kbit_training, get_peft_model, LoraConfig, TaskType
from sklearn.metrics import accuracy_score, precision_recall_fscore_support,roc_auc_score
from transformers.modeling_outputs import SequenceClassifierOutput
print(torch.__version__)
print(torch.version.cuda)
print(torch.cuda.is_available())
print(torch.cuda.device_count())

2.6.0+cu124
12.4
True
1


In [3]:
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33me274028[0m ([33me274028-metu-middle-east-technical-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:

import zipfile
import os

with zipfile.ZipFile("dataset.zip", 'r') as zip_ref:
    zip_ref.extractall("dataset")


In [5]:
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

In [6]:
def parse_reviews_to_dataframe(dataset_dir):
    records = []

    for year_dir in os.listdir(dataset_dir):
        year_path = os.path.join(dataset_dir, year_dir)
        if not os.path.isdir(year_path):
            continue

        review_dir = os.path.join(year_path, f"{year_dir}_review")
        if not os.path.exists(review_dir):
            continue

        for fname in os.listdir(review_dir):
            if not (fname.endswith(".json") and "ICLR" in fname):
                continue

            file_path = os.path.join(review_dir, fname)
            with open(file_path, "r") as f:
                try:
                    data = json.load(f)
                except Exception as e:
                    print(f"Failed to parse {file_path}: {e}")
                    continue

            paper_id = data.get("id", fname.replace(".json", ""))
            meta_review = data.get("metaReview", "")
            reviews = data.get("reviews", [])

            review_texts = []
            rating_scores = []

            for review in reviews:
                review_text = review.get("review", "")
                rating_raw = review.get("rating", "")
                try:
                    rating_score = int(rating_raw.split(":")[0].strip())
                    rating_scores.append(rating_score)
                except Exception as e:
                    print(f"Invalid rating. Error is e: {e}")

                review_texts.append(review_text)

            #full_text = " ".join(review_texts + [meta_review]).strip()
            avg_rating = sum(rating_scores) / len(rating_scores)
            label = 1 if avg_rating >= 6 else 0

            records.append({
                "paper_id": paper_id,
                "text": meta_review,
                "avg_rating": avg_rating,
                "label": label
            })

    return pd.DataFrame(records)

# Example usage:
df = parse_reviews_to_dataframe("dataset/dataset")
print(df.head())

         paper_id                                               text  \
0  ICLR_2019_1111  The paper proposes GAN regularized by Determin...   
1  ICLR_2019_1564  This work proposes a method for both instance ...   
2  ICLR_2019_1377  The reviewers highlighted aspects of the work ...   
3  ICLR_2019_1179  This paper studies training of the generative ...   
4   ICLR_2019_892  The authors propose a GAN-based anomaly detect...   

   avg_rating  label  
0         5.0      0  
1         3.0      0  
2         5.0      0  
3         5.0      0  
4         4.0      0  


In [7]:
train_df, temp_df = train_test_split(df, test_size=0.2, stratify=df["label"], random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df["label"], random_state=42)

In [8]:
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, padding_side="right")
tokenizer.pad_token = tokenizer.eos_token

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

In [9]:
class ReviewDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length=1024):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data.iloc[idx]
        encoding = self.tokenizer(
            item["text"],
            truncation=True,
            padding="max_length",
            max_length=self.max_length,
            return_tensors="pt"
        )
        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "labels": torch.tensor(item["label"], dtype=torch.long)
        }


In [10]:
train_dataset = ReviewDataset(train_df, tokenizer)
val_dataset = ReviewDataset(val_df, tokenizer)
test_dataset = ReviewDataset(test_df, tokenizer)

In [11]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModel.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

In [16]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)

    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, predictions, average="binary", zero_division=0
    )
    acc = accuracy_score(labels, predictions)

    metrics = {
        "accuracy": acc,
        "precision": precision,
        "recall": recall,
        "f1": f1,
    }

    if logits.shape[1] == 2:
        try:
            probs = torch.nn.functional.softmax(torch.tensor(logits), dim=-1)[:, 1].numpy()
            auc = roc_auc_score(labels, probs)
            metrics["auc"] = auc
        except Exception as e:
            print(f"Warning: AUC calculation failed: {e}")

    return metrics

def sweep_train():
    wandb.init()
    config = wandb.config

    # === Define target modules ===
    target_modules = []
    for i in range(12, 22):
        target_modules.append(f"layers.{i}.self_attn.q_proj")
        target_modules.append(f"layers.{i}.self_attn.v_proj")

    # === LoRA Config with last layers only ===
    peft_config = LoraConfig(
        task_type="SEQ_CLS",
        r=config.r,
        lora_alpha=16,
        lora_dropout=config.lora_dropout,
        bias="none",
        inference_mode=False,
        target_modules=target_modules
    )

    # === Load Quantized Base Model ===
    base_model = AutoModel.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto"
    )

    # === Apply LoRA adapters ===
    base_model = get_peft_model(base_model, peft_config)
    """
    # === Print LoRA trainable params ===
    print("Trainable LoRA parameters:")
    for name, param in base_model.named_parameters():
        if param.requires_grad:
            print(" -", name)
    """

    # === Wrap with classification head ===
    class QuantizedClassifier(torch.nn.Module):
        def __init__(self, base_model, hidden_size=2048, num_labels=2):
            super().__init__()
            self.base = base_model
            self.classifier = torch.nn.Linear(hidden_size, num_labels)

        def forward(self, input_ids, attention_mask=None, labels=None):
            outputs = self.base(input_ids=input_ids, attention_mask=attention_mask)
            last_hidden = outputs.last_hidden_state
            pooled = (last_hidden * attention_mask.unsqueeze(-1)).sum(1) / attention_mask.sum(1, keepdim=True)
            logits = self.classifier(pooled)
            loss = None
            if labels is not None:
                loss_fn = torch.nn.CrossEntropyLoss()
                loss = loss_fn(logits, labels)
            return SequenceClassifierOutput(
                loss=loss,
                logits=logits,
                hidden_states=outputs.hidden_states if hasattr(outputs, "hidden_states") else None,
                attentions=outputs.attentions if hasattr(outputs, "attentions") else None,
            )

    model = QuantizedClassifier(base_model, hidden_size=2048, num_labels=2)


    # Training setup
    training_args = TrainingArguments(
        output_dir="./results",
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        num_train_epochs=10,
        logging_dir="./logs",
        logging_steps=100,
        save_steps=500,
        save_total_limit=1,
        save_strategy="epoch",
        fp16=True,
        report_to="wandb",
        run_name="term-project"
    )


    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

    trainer.train()

    eval_results = trainer.evaluate(eval_dataset=test_dataset)
    print("Test Set Evaluation:", eval_results)
    wandb.log(eval_results)


In [17]:
sweep_config = {
    "method": "grid",
    "metric": {
        "name": "eval/accuracy",
        "goal": "maximize"
    },
    "parameters": {
        "learning_rate": {"values": [2e-5, 3e-5]},
        "r": {"values": [4, 8]},
        "lora_dropout": {"values": [0.05, 0.1]}
    }
}

sweep_id = wandb.sweep(sweep_config, project="tinyllama-lora")
wandb.agent(sweep_id, function=sweep_train, count=6)

Create sweep with ID: tvfwq4ot
Sweep URL: https://wandb.ai/e274028-metu-middle-east-technical-university/tinyllama-lora/sweeps/tvfwq4ot


[34m[1mwandb[0m: Agent Starting Run: z50c66l3 with config:
[34m[1mwandb[0m: 	learning_rate: 2e-05
[34m[1mwandb[0m: 	lora_dropout: 0.05
[34m[1mwandb[0m: 	r: 4


  trainer = Trainer(


Step,Training Loss
100,0.7803
200,0.613
300,0.565
400,0.5016
500,0.456
600,0.5424
700,0.5429
800,0.4515
900,0.5456
1000,0.5327


Test Set Evaluation: {'eval_loss': 0.4891979992389679, 'eval_accuracy': 0.859073359073359, 'eval_precision': 0.8243243243243243, 'eval_recall': 0.7218934911242604, 'eval_f1': 0.7697160883280757, 'eval_auc': 0.9103440090876722, 'eval_runtime': 12.2254, 'eval_samples_per_second': 42.371, 'eval_steps_per_second': 10.634, 'epoch': 10.0}


0,1
epoch,▁
eval/accuracy,▁
eval/auc,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁

0,1
epoch,10.0
eval/accuracy,0.85907
eval/auc,0.91034
eval/f1,0.76972
eval/loss,0.4892
eval/precision,0.82432
eval/recall,0.72189
eval/runtime,12.2254
eval/samples_per_second,42.371
eval/steps_per_second,10.634


[34m[1mwandb[0m: Agent Starting Run: a79wyrup with config:
[34m[1mwandb[0m: 	learning_rate: 2e-05
[34m[1mwandb[0m: 	lora_dropout: 0.05
[34m[1mwandb[0m: 	r: 8


  trainer = Trainer(


Step,Training Loss
100,0.6936
200,0.6008
300,0.5744
400,0.5206
500,0.4311
600,0.5341
700,0.5392
800,0.4458
900,0.5336
1000,0.5205


Test Set Evaluation: {'eval_loss': 0.4770839512348175, 'eval_accuracy': 0.861003861003861, 'eval_precision': 0.8299319727891157, 'eval_recall': 0.7218934911242604, 'eval_f1': 0.7721518987341772, 'eval_auc': 0.9103100998626676, 'eval_runtime': 12.222, 'eval_samples_per_second': 42.383, 'eval_steps_per_second': 10.637, 'epoch': 10.0}


0,1
epoch,▁
eval/accuracy,▁
eval/auc,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁

0,1
epoch,10.0
eval/accuracy,0.861
eval/auc,0.91031
eval/f1,0.77215
eval/loss,0.47708
eval/precision,0.82993
eval/recall,0.72189
eval/runtime,12.222
eval/samples_per_second,42.383
eval/steps_per_second,10.637


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6c96ypdm with config:
[34m[1mwandb[0m: 	learning_rate: 2e-05
[34m[1mwandb[0m: 	lora_dropout: 0.1
[34m[1mwandb[0m: 	r: 4


  trainer = Trainer(


Step,Training Loss
100,0.6452
200,0.5992
300,0.5627
400,0.4659
500,0.4264
600,0.537
700,0.5384
800,0.4494
900,0.5448
1000,0.5413


Test Set Evaluation: {'eval_loss': 0.5193253755569458, 'eval_accuracy': 0.862934362934363, 'eval_precision': 0.831081081081081, 'eval_recall': 0.727810650887574, 'eval_f1': 0.7760252365930599, 'eval_auc': 0.912115766094166, 'eval_runtime': 12.2339, 'eval_samples_per_second': 42.341, 'eval_steps_per_second': 10.626, 'epoch': 10.0}


0,1
epoch,▁
eval/accuracy,▁
eval/auc,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁

0,1
epoch,10.0
eval/accuracy,0.86293
eval/auc,0.91212
eval/f1,0.77603
eval/loss,0.51933
eval/precision,0.83108
eval/recall,0.72781
eval/runtime,12.2339
eval/samples_per_second,42.341
eval/steps_per_second,10.626


[34m[1mwandb[0m: Agent Starting Run: gbwxbplg with config:
[34m[1mwandb[0m: 	learning_rate: 2e-05
[34m[1mwandb[0m: 	lora_dropout: 0.1
[34m[1mwandb[0m: 	r: 8


  trainer = Trainer(


Step,Training Loss
100,0.6936
200,0.6007
300,0.5748
400,0.5216
500,0.4309
600,0.5335
700,0.5391
800,0.4458
900,0.5338
1000,0.5196


Test Set Evaluation: {'eval_loss': 0.47495904564857483, 'eval_accuracy': 0.8687258687258688, 'eval_precision': 0.8435374149659864, 'eval_recall': 0.7337278106508875, 'eval_f1': 0.7848101265822784, 'eval_auc': 0.9106831013377189, 'eval_runtime': 12.147, 'eval_samples_per_second': 42.644, 'eval_steps_per_second': 10.702, 'epoch': 10.0}


0,1
epoch,▁
eval/accuracy,▁
eval/auc,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁

0,1
epoch,10.0
eval/accuracy,0.86873
eval/auc,0.91068
eval/f1,0.78481
eval/loss,0.47496
eval/precision,0.84354
eval/recall,0.73373
eval/runtime,12.147
eval/samples_per_second,42.644
eval/steps_per_second,10.702


[34m[1mwandb[0m: Agent Starting Run: 4oserjzn with config:
[34m[1mwandb[0m: 	learning_rate: 3e-05
[34m[1mwandb[0m: 	lora_dropout: 0.05
[34m[1mwandb[0m: 	r: 4


  trainer = Trainer(


Step,Training Loss
100,0.6453
200,0.5991
300,0.5602
400,0.4638
500,0.4282
600,0.5354
700,0.5392
800,0.45
900,0.5435
1000,0.5421


Test Set Evaluation: {'eval_loss': 0.5205214619636536, 'eval_accuracy': 0.8687258687258688, 'eval_precision': 0.8389261744966443, 'eval_recall': 0.7396449704142012, 'eval_f1': 0.7861635220125787, 'eval_auc': 0.9119546972753937, 'eval_runtime': 12.2488, 'eval_samples_per_second': 42.29, 'eval_steps_per_second': 10.613, 'epoch': 10.0}


0,1
epoch,▁
eval/accuracy,▁
eval/auc,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁

0,1
epoch,10.0
eval/accuracy,0.86873
eval/auc,0.91195
eval/f1,0.78616
eval/loss,0.52052
eval/precision,0.83893
eval/recall,0.73964
eval/runtime,12.2488
eval/samples_per_second,42.29
eval/steps_per_second,10.613


[34m[1mwandb[0m: Agent Starting Run: 2pv10olk with config:
[34m[1mwandb[0m: 	learning_rate: 3e-05
[34m[1mwandb[0m: 	lora_dropout: 0.05
[34m[1mwandb[0m: 	r: 8


  trainer = Trainer(


Step,Training Loss
100,0.6935
200,0.6008
300,0.5744
400,0.5206
500,0.4311
600,0.5341
700,0.5393
800,0.4459
900,0.5336
1000,0.5206


Test Set Evaluation: {'eval_loss': 0.47707635164260864, 'eval_accuracy': 0.861003861003861, 'eval_precision': 0.8299319727891157, 'eval_recall': 0.7218934911242604, 'eval_f1': 0.7721518987341772, 'eval_auc': 0.9102931452501652, 'eval_runtime': 12.2333, 'eval_samples_per_second': 42.343, 'eval_steps_per_second': 10.627, 'epoch': 10.0}


0,1
epoch,▁
eval/accuracy,▁
eval/auc,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁

0,1
epoch,10.0
eval/accuracy,0.861
eval/auc,0.91029
eval/f1,0.77215
eval/loss,0.47708
eval/precision,0.82993
eval/recall,0.72189
eval/runtime,12.2333
eval/samples_per_second,42.343
eval/steps_per_second,10.627
