# 1. Check GPU Availability
Make sure Google Colab is using a GPU.

In [1]:
import torch
torch.cuda.is_available()


True

If this returns True, you're good to go! If not, go to Runtime > Change runtime type > GPU.

# Install Required Libraries
Run this command to install transformers, torch, and accelerate.

In [2]:
!pip install -U torch transformers datasets accelerate peft bitsandbytes



In [4]:
!pip install --upgrade torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

Looking in indexes: https://download.pytorch.org/whl/cu118
Collecting torchvision
  Downloading https://download.pytorch.org/whl/cu118/torchvision-0.21.0%2Bcu118-cp311-cp311-linux_x86_64.whl.metadata (6.1 kB)
Collecting torchaudio
  Downloading https://download.pytorch.org/whl/cu118/torchaudio-2.6.0%2Bcu118-cp311-cp311-linux_x86_64.whl.metadata (6.6 kB)
Downloading https://download.pytorch.org/whl/cu118/torchvision-0.21.0%2Bcu118-cp311-cp311-linux_x86_64.whl (6.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.5/6.5 MB[0m [31m45.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading https://download.pytorch.org/whl/cu118/torchaudio-2.6.0%2Bcu118-cp311-cp311-linux_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m47.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torchvision, torchaudio
  Attempting uninstall: torchvision
    Found existing installation: torchvision 0.20.1+cu124
    Uninstalling to

In [6]:
!pip uninstall torch
!pip install vllm

Found existing installation: torch 2.6.0
Uninstalling torch-2.6.0:
  Would remove:
    /usr/local/bin/torchfrtrace
    /usr/local/bin/torchrun
    /usr/local/lib/python3.11/dist-packages/functorch/*
    /usr/local/lib/python3.11/dist-packages/torch-2.6.0.dist-info/*
    /usr/local/lib/python3.11/dist-packages/torch/*
    /usr/local/lib/python3.11/dist-packages/torchgen/*
Proceed (Y/n)? Y
  Successfully uninstalled torch-2.6.0
Collecting vllm
  Downloading vllm-0.7.3-cp38-abi3-manylinux1_x86_64.whl.metadata (25 kB)
Collecting blake3 (from vllm)
  Downloading blake3-1.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Collecting fastapi!=0.113.*,!=0.114.0,>=0.107.0 (from fastapi[standard]!=0.113.*,!=0.114.0,>=0.107.0; python_version >= "3.9"->vllm)
  Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting prometheus-fastapi-instrumentator>=7.0.0 (from vllm)
  Downloading prometheus_fastapi_instrumentator-7.0.2-py3-none-any.whl.metadata (13 

# 3. Load DeepSeek LLM from Hugging Face

Load the model with LoRA (Low-Rank Adaptation) for efficient fine-tuning.

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
import torch

model_name = "deepseek-ai/deepseek-llm-7b-base"

# Configure 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16  # Use float16 for faster computation
)

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)


# Apply LoRA for memory-efficient fine-tuning
lora_config = LoraConfig(
    r=8,  # Low-rank adaptation size
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],  # Apply LoRA to attention layers
    lora_dropout=0.05,
    bias="none"
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

print("✅ DeepSeek LLM Loaded with LoRA and 4-bit Precision!")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/792 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.61M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/584 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/22.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.97G [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.6k [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.85G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/121 [00:00<?, ?B/s]

trainable params: 3,932,160 || all params: 6,914,297,856 || trainable%: 0.0569
✅ DeepSeek LLM Loaded with LoRA and 4-bit Precision!


# 4. Load and Preprocess the IMDB Dataset

Download the IMDB dataset and prepare it for causal language modeling.



In [3]:
from datasets import load_dataset

# Load IMDB dataset
dataset = load_dataset("imdb")

# Display dataset structure and a few samples
print("Dataset Structure:")
print(dataset)

print("Sample Data:")
print(dataset["train"][0])
print(dataset["train"][1])

README.md:   0%|          | 0.00/7.81k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

unsupervised-00000-of-00001.parquet:   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

Dataset Structure:
DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    unsupervised: Dataset({
        features: ['text', 'label'],
        num_rows: 50000
    })
})
Sample Data:
{'text': 'I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ever tried to enter this country, therefore being a fan of films considered "controversial" I really had to see this for myself.<br /><br />The plot is centered around a young Swedish drama student named Lena who wants to learn everything she can about life. In particular she wants to focus her attentions to making some sort of documentary on what the average Swede thought about certain political issues such as the Vietnam War and race issues in the United States. I

### Tokenize Dataset

In [4]:
def tokenize_function(examples):
    inputs = tokenizer(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=512
    )
    inputs["labels"] = inputs["input_ids"].copy()  # Use input_ids as labels for causal LM
    return inputs

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Verify tokenized sample
print("Tokenized Sample with Labels:")
print(tokenized_datasets["train"][0])

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

Tokenized Sample with Labels:
{'text': 'I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ever tried to enter this country, therefore being a fan of films considered "controversial" I really had to see this for myself.<br /><br />The plot is centered around a young Swedish drama student named Lena who wants to learn everything she can about life. In particular she wants to focus her attentions to making some sort of documentary on what the average Swede thought about certain political issues such as the Vietnam War and race issues in the United States. In between asking politicians and ordinary denizens of Stockholm about their opinions on politics, she has sex with her drama teacher, classmates, and married men.<br /><br />What kills me about I AM CURIOUS-YELLOW is that 40 years ago, this was considered pornographic. Really, the sex and nu

# 5. Set Training Parameterss

In [6]:
import os
os.environ["WANDB_DISABLED"] = "true"

from transformers import TrainingArguments

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=3e-4,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    num_train_epochs=1,
    weight_decay=0.01,
    logging_dir="./logs",
    fp16=True,
)

print("✅ WandB Disabled!")

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


✅ WandB Disabled!


# Get sample Data

To speed up the training

In [7]:
small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(500))
small_test_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(100))

# 5. Initialize Trainer and Train

Set up the Trainer and start fine-tuning.

In [8]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset.remove_columns(["text"]),  # Remove raw text column
    eval_dataset=small_test_dataset.remove_columns(["text"]),
)

print("🚀 Trainer Initialized!")


No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


🚀 Trainer Initialized!


In [9]:
torch.cuda.empty_cache()
print("✅ Cleared CUDA Cache")


✅ Cleared CUDA Cache


# 6. Fine-Tune DeepSeek LLM

In [10]:
print("🚀 Starting Fine-Tuning...")
trainer.train()

🚀 Starting Fine-Tuning...


Epoch,Training Loss,Validation Loss
0,No log,No log


TrainOutput(global_step=62, training_loss=1.9978304216938634, metrics={'train_runtime': 564.8461, 'train_samples_per_second': 0.885, 'train_steps_per_second': 0.11, 'total_flos': 9896307481116672.0, 'train_loss': 1.9978304216938634, 'epoch': 0.992})

#7. Run Predictions with the Fine-Tuned Model
Now that we have fine-tuned DeepSeek LLM, let's generate predictions for new text inputs.



In [11]:
def generate_prediction(review_text):
    inputs = tokenizer(review_text, return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_length=100, pad_token_id=tokenizer.eos_token_id)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example reviews
reviews = [
    "The movie was absolutely fantastic! I loved the cinematography and the acting was superb.",
    "This was the worst movie I've ever seen. The plot made no sense and the dialogue was terrible.",
    "It was an okay movie. Some parts were really good, but overall it was just average."
]

# Run predictions
for review in reviews:
    print(f"Review: {review}")
    print(f"Predicted Sentiment: {generate_prediction(review)}")
    print("-" * 80)


Review: The movie was absolutely fantastic! I loved the cinematography and the acting was superb.
Predicted Sentiment: The movie was absolutely fantastic! I loved the cinematography and the acting was superb.
--------------------------------------------------------------------------------
Review: This was the worst movie I've ever seen. The plot made no sense and the dialogue was terrible.
Predicted Sentiment: This was the worst movie I've ever seen. The plot made no sense and the dialogue was terrible. The acting was even worse. I'm not sure if this was a comedy or a drama. It was a terrible movie. I would not recommend this movie to anyone. I would not recommend this movie to anyone. I would not recommend this movie to anyone. I would not recommend this movie to anyone. I would not recommend this movie to anyone. I would not recommend this movie
--------------------------------------------------------------------------------
Review: It was an okay movie. Some parts were really good, 