In [2]:
# 1. Install PyTorch (CPU Version)
%pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cpu

# 2. Install Hugging Face Libraries
%pip install transformers==4.35.2 peft==0.7.1 accelerate==0.25.0 datasets==2.16.1 scikit-learn pandas psutil

# 3. Verify Installation
import torch
print(f"‚úÖ PyTorch Installed: {torch.__version__}")

Looking in indexes: https://download.pytorch.org/whl/cpu
Collecting torch==2.1.2
  Using cached https://download.pytorch.org/whl/cpu/torch-2.1.2%2Bcpu-cp310-cp310-linux_x86_64.whl (184.9 MB)
Collecting torchvision==0.16.2
  Using cached https://download.pytorch.org/whl/cpu/torchvision-0.16.2%2Bcpu-cp310-cp310-linux_x86_64.whl (1.5 MB)
Collecting torchaudio==2.1.2
  Using cached https://download.pytorch.org/whl/cpu/torchaudio-2.1.2%2Bcpu-cp310-cp310-linux_x86_64.whl (1.6 MB)
Collecting sympy (from torch==2.1.2)
  Downloading https://download.pytorch.org/whl/sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy->torch==2.1.2)
  Downloading https://download.pytorch.org/whl/mpmath-1.3.0-py3-none-any.whl (536 kB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m536.2/536.2 kB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading https://download.pytorch.org/whl

In [1]:
import os
import torch
import psutil
import pandas as pd
import numpy as np
import shutil
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from transformers import (
    BertTokenizer, 
    BertForSequenceClassification, 
    Trainer, 
    TrainingArguments,
    DataCollatorWithPadding
)
from datasets import Dataset
from peft import LoraConfig, get_peft_model, TaskType, PeftModel

# ==========================================
# 1. SETUP & DATA PREPARATION
# ==========================================
print("üöÄ [1/5] Preparing Data...")

# Load subset of 20newsgroups (3 categories for clarity)
categories = ['sci.space', 'rec.autos', 'comp.graphics']
newsgroups = fetch_20newsgroups(subset='all', categories=categories)
data = pd.DataFrame({'text': newsgroups.data, 'label': newsgroups.target})

# Split: Train (70%), Val (15%), Test (15%)
train_df, temp_df = train_test_split(data, test_size=0.3, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

# Convert to Hugging Face Datasets
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)
test_dataset = Dataset.from_pandas(test_df)

# Tokenization
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
def tokenize_function(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=128)

print("   Tokenizing...")
train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

# CRITICAL: Create a small sample (2.5%) for fast CPU training
train_sample = train_dataset.shuffle(seed=42).select(range(int(0.025 * len(train_dataset))))
print(f"‚úÖ Data Ready! Training on {len(train_sample)} samples (CPU Optimized).")

# ==========================================
# 2. TRAIN TRADITIONAL MODEL (Baseline)
# ==========================================
print("\nüöÄ [2/5] Training TRADITIONAL Model (Full Fine-Tuning)...")

model_trad = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)

training_args_trad = TrainingArguments(
    output_dir='./results_traditional',
    num_train_epochs=1,              # 1 Epoch is enough for demo
    per_device_train_batch_size=4,   # Small batch for CPU
    evaluation_strategy="no",        # Skip eval during training to save time
    logging_steps=10,
    learning_rate=2e-5,
    use_cpu=True,                    # Force CPU
    report_to="none"
)

trainer_trad = Trainer(
    model=model_trad,
    args=training_args_trad,
    train_dataset=train_sample,
    eval_dataset=val_dataset,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
)

trainer_trad.train()

# Save Traditional Model
save_path_trad = "./traditional_model"
if os.path.exists(save_path_trad): shutil.rmtree(save_path_trad) # Cleanup old
model_trad.save_pretrained(save_path_trad)
print("‚úÖ Traditional Model Saved.")

# Clean up memory
del model_trad, trainer_trad

# ==========================================
# 3. TRAIN LoRA MODEL (CPU Optimized)
# ==========================================
print("\nüöÄ [3/5] Training LoRA Model (Efficient)...")

# Load fresh base model
model_lora = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)

# Define LoRA Config
lora_config = LoraConfig(
    r=8,                 
    lora_alpha=16,
    target_modules=["query", "value"], # Target attention layers
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.SEQ_CLS,
)

# Apply LoRA
model_lora = get_peft_model(model_lora, lora_config)
model_lora.print_trainable_parameters()

training_args_lora = TrainingArguments(
    output_dir='./results_lora',
    num_train_epochs=1,
    per_device_train_batch_size=4,
    evaluation_strategy="no",
    logging_steps=10,
    learning_rate=2e-4,              # Higher LR for LoRA
    use_cpu=True,
    report_to="none"
)

trainer_lora = Trainer(
    model=model_lora,
    args=training_args_lora,
    train_dataset=train_sample,
    eval_dataset=val_dataset,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
)

trainer_lora.train()

# Save LoRA Model
save_path_lora = "./lora_model"
if os.path.exists(save_path_lora): shutil.rmtree(save_path_lora)
model_lora.save_pretrained(save_path_lora)
print("‚úÖ LoRA Model Saved.")

# ==========================================
# 4. EVALUATION & COMPARISON
# ==========================================
print("\nüöÄ [4/5] Evaluating Size & Performance...")

def get_dir_size(path):
    total = 0
    for dirpath, _, filenames in os.walk(path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            total += os.path.getsize(fp)
    return total / (1024 * 1024)

# 1. Size Comparison
size_trad = get_dir_size(save_path_trad)
size_lora = get_dir_size(save_path_lora)
base_size = 420.0 # Approx size of BERT base

print("\n" + "="*40)
print("üíæ STORAGE COMPARISON")
print("="*40)
print(f"Traditional Model: {size_trad:.2f} MB")
print(f"LoRA Adapter:      {size_lora:.2f} MB")
print(f"Space Saved:       {size_trad - size_lora:.2f} MB")
print("-" * 40)

# 2. Performance Evaluation
print("\nRunning Prediction on Test Set...")
preds = trainer_lora.predict(test_dataset)
pred_labels = np.argmax(preds.predictions, axis=-1)
accuracy = accuracy_score(preds.label_ids, pred_labels)

print("\n" + "="*40)
print("üèÜ MODEL PERFORMANCE")
print("="*40)
print(f"Test Accuracy: {accuracy:.4f}")
print("-" * 40)
print(classification_report(preds.label_ids, pred_labels, target_names=categories))

  from .autonotebook import tqdm as notebook_tqdm
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2065/2065 [00:10<00:00, 188.64 examples/s]
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 442/442 [00:02<00:00, 218.72 examples/s]
Map:   0%|          | 0/443 [00:00<?, ? examples/s]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


üöÄ [1/5] Preparing Data...
‚úÖ Data Ready! Training on 51 samples (CPU Optimized).

üöÄ [2/5] Training TRADITIONAL Model (Full Fine-Tuning)...


Step,Training Loss
10,1.1431


‚úÖ Traditional Model Saved.

üöÄ [3/5] Training LoRA Model (Efficient)...
trainable params: 297,219 || all params: 109,781,766 || trainable%: 0.27073621679578375


Step,Training Loss
10,1.0903


‚úÖ LoRA Model Saved.

üöÄ [4/5] Evaluating Size & Performance...

üíæ STORAGE COMPARISON
Traditional Model: 417.67 MB
LoRA Adapter:      1.15 MB
Space Saved:       416.53 MB
----------------------------------------

Running Prediction on Test Set...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
