In [1]:
pip install "ray[air]" transformers datasets accelerate evaluate torch


Collecting evaluate
  Downloading evaluate-0.4.5-py3-none-any.whl.metadata (9.5 kB)
Collecting ray[air]
  Downloading ray-2.48.0-cp311-cp311-manylinux2014_x86_64.whl.metadata (19 kB)
Collecting colorful (from ray[air])
  Downloading colorful-0.5.7-py2.py3-none-any.whl.metadata (16 kB)
Collecting py-spy>=0.2.0 (from ray[air])
  Downloading py_spy-0.4.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl.metadata (510 bytes)
Collecting watchfiles (from ray[air])
  Downloading watchfiles-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Collecting opentelemetry-sdk>=1.30.0 (from ray[air])
  Downloading opentelemetry_sdk-1.36.0-py3-none-any.whl.metadata (1.5 kB)
Collecting opencensus (from ray[air])
  Downloading opencensus-0.11.4-py2.py3-none-any.whl.metadata (12 kB)
Collecting opentelemetry-exporter-prometheus (from ray[air])
  Downloading opentelemetry_exporter_prometheus-0.57b0-py3-none-any.whl.metadata (1.8 kB)
Collecting opentelemetry-proto (from 

In [2]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("lakshmi25npathi/imdb-dataset-of-50k-movie-reviews")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/imdb-dataset-of-50k-movie-reviews


In [7]:
# Filename: ray_lightweight_llm_finetune.py

import ray
from ray.train.torch import TorchTrainer
from ray.train.huggingface.transformers import prepare_trainer, RayTrainReportCallback
from ray.train import ScalingConfig, RunConfig
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments
import evaluate
import torch

# Step 1: Define Tokenization
def tokenize_data(example, tokenizer, max_length=128):
    return tokenizer(example["text"], padding="max_length", truncation=True, max_length=max_length)

# Step 2: Prepare Dataset
def prepare_dataset(tokenizer):
    dataset = load_dataset("imdb", split="train[:2000]")  # small subset for demo
    tokenized_dataset = dataset.map(lambda x: tokenize_data(x, tokenizer), batched=True)
    tokenized_dataset = tokenized_dataset.train_test_split(test_size=0.2)
    return tokenized_dataset

# Step 3: Define Trainer Initialization Function
def train_loop_per_worker(config):
    from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

    train_dataset = config["train_dataset"]
    eval_dataset = config["eval_dataset"]
    tokenizer_name = config["tokenizer_name"]

    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
    model = AutoModelForSequenceClassification.from_pretrained(tokenizer_name, num_labels=2)

    training_args = TrainingArguments(
        output_dir="./results",
        eval_strategy="epoch",
        save_strategy="epoch",
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=2,
        logging_dir="./logs"
    )

    metric = evaluate.load("accuracy")

    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        predictions = logits.argmax(axis=-1)
        return metric.compute(predictions=predictions, references=labels)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
        tokenizer=tokenizer,
    )

    trainer.train()


# Step 4: Launch Ray and Fine-Tune
if __name__ == "__main__":
  ray.shutdown()
  ray.init(ignore_reinit_error=True)

  model_name = "distilbert-base-uncased"  # Small, lightweight model
  tokenizer = AutoTokenizer.from_pretrained(model_name)

  tokenized = prepare_dataset(tokenizer)

  # Use TorchTrainer instead of the undefined TransformersTrainer
  trainer = TorchTrainer(
        train_loop_per_worker=train_loop_per_worker,
        train_loop_config={
            "train_dataset": tokenized["train"],
            "eval_dataset": tokenized["test"],
            "tokenizer_name": model_name,
        },
        scaling_config=ScalingConfig(num_workers=2, use_gpu=False),
        run_config=RunConfig(name="lightweight-llm-finetune")
    )

  result = trainer.fit()
  print("Training completed:", result)
  ray.shutdown()

2025-08-10 12:06:12,283	INFO worker.py:1918 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

2025-08-10 12:06:18,333	INFO data_parallel_trainer.py:339 -- GPUs are detected in your Ray cluster, but GPU training is not enabled for this trainer. To enable GPU training, make sure to set `use_gpu` to True in your scaling config.



View detailed results here: /root/ray_results/lightweight-llm-finetune
To visualize your results with TensorBoard, run: `tensorboard --logdir /tmp/ray/session_2025-08-10_12-06-02_859952_169/artifacts/2025-08-10_12-06-18/lightweight-llm-finetune/driver_artifacts`


[36m(TrainTrainable pid=6775)[0m 2025-08-10 12:06:24.934225: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(TrainTrainable pid=6775)[0m E0000 00:00:1754827584.955168    6775 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(TrainTrainable pid=6775)[0m E0000 00:00:1754827584.961439    6775 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[36m(TrainTrainable pid=6775)[0m W0000 00:00:1754827584.978784    6775 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
[36m(TrainTrainable pid=6775)[0m W0000 00:00:1754827584.978815    6775 computation_placer.cc:177] computation placer alr


Training started with configuration:
+---------------------------------------------------------+
| Training config                                         |
+---------------------------------------------------------+
| train_loop_config/eval_dataset                          ... num_rows: 400
}) |
| train_loop_config/tokenizer_name   ...bert-base-uncased |
| train_loop_config/train_dataset                         ...num_rows: 1600
}) |
+---------------------------------------------------------+


[36m(TorchTrainer pid=6775)[0m GPUs are detected in your Ray cluster, but GPU training is not enabled for this trainer. To enable GPU training, make sure to set `use_gpu` to True in your scaling config.
[36m(RayTrainWorker pid=6877)[0m Setting up process group for: env:// [rank=0, world_size=2]
[36m(TorchTrainer pid=6775)[0m Started distributed worker processes: 
[36m(TorchTrainer pid=6775)[0m - (node_id=010542429d295ed0a1d3fb98cbaa7dd56a526c81965387695b1b143b, ip=172.28.0.12, pid=6877) world_rank=0, local_rank=0, node_rank=0
[36m(TorchTrainer pid=6775)[0m - (node_id=010542429d295ed0a1d3fb98cbaa7dd56a526c81965387695b1b143b, ip=172.28.0.12, pid=6878) world_rank=1, local_rank=1, node_rank=0
[36m(RayTrainWorker pid=6877)[0m 2025-08-10 12:06:41.000929: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(RayTrainWorker pid=6878)[0m E0000 00:00:

[36m(RayTrainWorker pid=6877)[0m {'eval_loss': 0.00024920873693190515, 'eval_accuracy': 1.0, 'eval_runtime': 43.3832, 'eval_samples_per_second': 9.22, 'eval_steps_per_second': 0.576, 'epoch': 1.0}


 50%|█████     | 101/200 [12:43<34:50, 21.12s/it]
 51%|█████     | 102/200 [12:50<27:30, 16.84s/it]
 52%|█████▏    | 103/200 [12:57<22:37, 13.99s/it]
 52%|█████▏    | 104/200 [13:06<19:45, 12.34s/it]
 52%|█████▎    | 105/200 [13:13<17:15, 10.90s/it]
 53%|█████▎    | 106/200 [13:20<14:53,  9.51s/it]
 54%|█████▎    | 107/200 [13:28<14:01,  9.05s/it]
 54%|█████▍    | 108/200 [13:34<12:31,  8.16s/it]
 55%|█████▍    | 109/200 [13:42<12:16,  8.09s/it]
 55%|█████▌    | 110/200 [13:48<11:17,  7.52s/it]
 56%|█████▌    | 111/200 [13:56<11:18,  7.62s/it]
 56%|█████▌    | 112/200 [14:02<10:32,  7.18s/it]
 56%|█████▋    | 113/200 [14:09<10:23,  7.17s/it]
 57%|█████▋    | 114/200 [14:16<10:09,  7.09s/it]
 57%|█████▊    | 115/200 [14:23<09:59,  7.06s/it]
 58%|█████▊    | 116/200 [14:30<09:52,  7.05s/it]
 58%|█████▊    | 117/200 [14:36<09:28,  6.85s/it]
 59%|█████▉    | 118/200 [14:44<09:47,  7.16s/it]
 60%|█████▉    | 119/200 [14:51<09:17,  6.88s/it]
 60%|██████    | 120/200 [14:59<09:40,  7.26s/it]


[36m(RayTrainWorker pid=6877)[0m {'eval_loss': 0.00014874870248604566, 'eval_accuracy': 1.0, 'eval_runtime': 42.8055, 'eval_samples_per_second': 9.345, 'eval_steps_per_second': 0.584, 'epoch': 2.0}


[36m(RayTrainWorker pid=6877)[0m                                                  100%|██████████| 200/200 [25:18<00:00,  6.67s/it]100%|██████████| 200/200 [25:18<00:00,  7.59s/it]


[36m(RayTrainWorker pid=6877)[0m {'train_runtime': 1518.306, 'train_samples_per_second': 2.108, 'train_steps_per_second': 0.132, 'train_loss': 0.01456852912902832, 'epoch': 2.0}


2025-08-10 12:32:12,209	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/root/ray_results/lightweight-llm-finetune' in 0.0150s.



Training completed after 0 iterations at 2025-08-10 12:32:12. Total running time: 25min 53s

Training completed: Result(
  metrics={},
  path='/root/ray_results/lightweight-llm-finetune/TorchTrainer_6bfc5_00000_0_2025-08-10_12-06-18',
  filesystem='local',
  checkpoint=None
)
