In [None]:
''' 
================================================================================
XLM-RoBERTa Aspect-Based Sentiment Classification
================================================================================
PROJECT : NLP-Driven ABSA for Gastronomy Tourism Insights in Malaysia
PIPELINE: Pipelined-ABSA (Decoupled) — Step 3: Sentiment Classification
INPUT   : Dataset/aspect_categorization.pkl   (output of Notebook 5)
OUTPUT  : models/xlm_roberta_absa_best.pt     (best checkpoint)
          results/training_metrics.json        (loss/acc curves)

ACADEMIC JUSTIFICATION
----------------------
- XLM-RoBERTa (Conneau et al., 2020): Pre-trained on 100 languages including
  Malay and Chinese. Superior zero/few-shot cross-lingual transfer vs.
  monolingual BERT, critical for Manglish code-switching.
- Aspect-Conditioned Input (Sun et al., 2019): We prepend the aspect category
  to the segment text as "[aspect] [SEP] [segment]". This forces the model to
  learn aspect-specific sentiment representations rather than general polarity.
- Class-Weighted Loss (Japkowicz & Stephen, 2002): Our dataset is severely
  imbalanced (89% positive). We use the inverse-frequency weights computed in
  Notebook 4 to prevent the model from trivially predicting "positive".
- Weak Supervision (Ratner et al., 2016): Star ratings are noisy proxies for
  sentiment. The consistency filtering in Notebook 4 already removed the worst
  offenders (4.1% noise). Residual noise is tolerable for fine-tuning.
================================================================================
'''

In [1]:
# ==============================================================================
# STAGE 0: Environment & Dependency Verification
# ==============================================================================

import sys
import importlib

REQUIRED = {
    "torch": "PyTorch",
    "transformers": "HuggingFace Transformers",
    "pandas": "Pandas",
    "numpy": "NumPy",
    "sklearn": "Scikit-Learn",
}


def check_environment():
    """Verify all required packages are installed and print versions.

    Why:
        Explicit environment checks prevent cryptic import errors mid-training,
        which is especially costly when running on GPU with long epoch times.
    """
    print("=" * 70)
    print("ENVIRONMENT CHECK")
    print("=" * 70)
    all_ok = True
    for module_name, display_name in REQUIRED.items():
        try:
            mod = importlib.import_module(module_name)
            version = getattr(mod, "__version__", "unknown")
            print(f"  ✓  {display_name:<30} v{version}")
        except ImportError:
            print(f"  ✗  {display_name:<30} NOT INSTALLED")
            all_ok = False

    # Special check: torch CUDA availability
    import torch

    cuda_avail = torch.cuda.is_available()
    device_name = torch.cuda.get_device_name(0) if cuda_avail else "CPU only"
    print(f"\n  GPU Available: {cuda_avail}  →  {device_name}")
    print(f"  Python:        {sys.version}")
    print("=" * 70)

    if not all_ok:
        raise RuntimeError(
            "Some packages are missing. Install them before continuing."
        )
    return torch.device("cuda" if cuda_avail else "cpu")


DEVICE = check_environment()

ENVIRONMENT CHECK
  ✗  PyTorch                        NOT INSTALLED


None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


  ✓  HuggingFace Transformers       v4.57.1
  ✓  Pandas                         v2.2.3
  ✓  NumPy                          v2.2.0
  ✓  Scikit-Learn                   v1.5.1


ModuleNotFoundError: No module named 'torch'

In [2]:
import torch; print(torch.cuda.is_available()) 

ModuleNotFoundError: No module named 'torch'