In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [27]:
import torch
import os
import tqdm
from typing import Optional
from torch.utils.data import DataLoader
from transformers import ViltProcessor

# Core
from evaluation_service import EvaluationService
from curriculum_service import CurriculumManager
from checkpoint_service import CheckpointManager

# Model
from vilt_adapter import ViLTAdapter

# Data
from clevr_dataset_py import CLEVRCurriculumViltDataset,vilt_collate_fn,build_answer_vocab
CHECKPOINT_ROOT = "/content/drive/MyDrive/Colab Notebooks/FYP/checkpoints"

In [28]:
class CurriculumTrainer:
    def __init__(
        self,
        questions_dir: str,
        images_dir: str,
        answer2id: dict,
        run_name: str = "curriculum_run_v1",
        output_dir: str = "./outputs",
        batch_size: int = 32,
        max_tiers: int = 5,
        use_sspl: bool = False
    ):
        self.output_dir = os.path.join(output_dir, run_name)
        self.questions_dir = questions_dir
        self.images_dir = images_dir
        self.batch_size = batch_size
        self.answer2id = answer2id

        # Components
        self.evaluation_service = EvaluationService()
        self.curriculum = CurriculumManager(max_tiers=max_tiers)
        self.checkpoint_manager = CheckpointManager(checkpoint_dir=os.path.join(CHECKPOINT_ROOT, run_name))

        # Model
        self.model_adapter = ViLTAdapter() # This loads the model and optimizer

        # Processor (needed for dataset)
        self.processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-mlm")

        self.use_sspl = use_sspl

    def data_loader_for_tier(
        self,
        tier: int,
        split: str = "train",
        shuffle: bool = True,
    ) -> DataLoader:
        """
        Creates a DataLoader for a specific tier using user's custom logic structure.
        """
        tiers = [tier] if tier is not None else None

        # Determine dataset parameters based on split
        # Note: The user's snippet hardcoded split logic inside the call, here we make it dynamic

        dataset_sample = CLEVRCurriculumViltDataset(
            questions_dir=self.questions_dir,
            images_dir=self.images_dir,
            processor=self.processor,
            split=split,
            tiers=tiers,
            answer2id=self.answer2id if split in ["train", "val"] else None, # answer2id mostly needed for train/val
            max_length=32, # Default max length
        )

        loader = DataLoader(
            dataset_sample,
            batch_size=self.batch_size,
            shuffle=shuffle if split == "train" else False,
            num_workers=0, # As requested for Drive/Colab compatibility
            pin_memory=True,
            collate_fn=vilt_collate_fn,
        )
        return loader

    def train(self):
        """
        Main training loop.
        """

        # 1. Load State
        start_tier, stored_metrics = self.checkpoint_manager.load_latest(
            self.model_adapter.model,
            self.model_adapter.optimizer,
            self.curriculum
        )

        print(f"Starting/Resuming at Tier {start_tier}")

        # 2. Loop Tiers
        for tier in range(start_tier, self.curriculum.max_tiers + 1):
            if self.curriculum.is_completed:
                print("Curriculum already completed!")
                break

            self.curriculum.current_tier = tier
            print(f"\n{'='*20}\n Entering Tier {tier} \n{'='*20}")

            # Setup Data using new method
            train_loader = self.data_loader_for_tier(
                tier=tier,
                split='train',
                shuffle=True
            )

            # For validation, we might want all previous tiers or just current?
            # Usually curriculum validates on current capability.
            # The User's Logic implies passing `tiers=[tier]`, so just current.
            val_loader = self.data_loader_for_tier(
                tier=tier,
                split='val',
                shuffle=False
            )

            # Loop Epochs
            max_epochs_per_tier = 2
            for epoch in range(max_epochs_per_tier):
                print(f"\nTier {tier} - Epoch {epoch+1}")

                # --- TRAIN ---
                epoch_losses = []
                progress = tqdm.tqdm(train_loader, desc="Training")

                for batch in progress:
                    metrics = self.model_adapter.train_step(batch)
                    epoch_losses.append(metrics['loss'])
                    progress.set_postfix({'loss': metrics['loss']})

                # --- VALIDATE ---
                print("Validating...")
                val_losses = []
                val_accs = []

                for batch in tqdm.tqdm(val_loader, desc="Validation"):
                    out = self.model_adapter.validation_step(batch)
                    acc = self.evaluation_service.compute_accuracy(out['logits'], out['labels'])

                    val_losses.append(out['loss'])
                    val_accs.append(acc)

                avg_val_loss = sum(val_losses) / len(val_losses) if val_losses else 0
                avg_val_acc = sum(val_accs) / len(val_accs) if val_accs else 0

                print(f"Validation: Loss={avg_val_loss:.4f}, Accuracy={avg_val_acc:.4f}")

                # Record metrics
                self.evaluation_service.record_metrics(avg_val_loss, avg_val_acc)

                # --- CURRICULUM CHECK ---
                should_advance = self.curriculum.should_advance(self.evaluation_service)

                # --- CHECKPOINT ---
                metrics_state = self.evaluation_service.get_latest_metrics()
                self.checkpoint_manager.save(
                    model_state=self.model_adapter.get_state_dict(),
                    optimizer_state=self.model_adapter.get_optimizer_state_dict(),
                    curriculum_state=self.curriculum.get_config_state(),
                    metrics=metrics_state,
                    tier=tier,
                    is_best=should_advance
                )

                if should_advance:
                    self.curriculum.advance_tier()
                    break


In [25]:
import os
import argparse
import json
from google.colab import drive


QUESTIONS_DIR = "/content/drive/MyDrive/Colab Notebooks/FYP/dataset/clevr_kaggle/CLEVR_v1.0/questions"
IMAGES_DIR    = "/content/drive/MyDrive/Colab Notebooks/FYP/dataset/clevr_kaggle/CLEVR_v1.0/images"

def main():
  # Mount Drive
    drive.mount("/content/drive")

    parser = argparse.ArgumentParser(description="Competence-Aware Curriculum VQA Training")
    parser.add_argument("--batch_size", type=int, default=32)
    parser.add_argument("--use_sspl", action="store_true")

    # IMPORTANT for Colab
    args, _ = parser.parse_known_args()

    # 1Ô∏è‚É£ Build answer vocabulary from TRAIN questions
    print("Building answer vocabulary...")
    tier_paths = [os.path.join(QUESTIONS_DIR, f"CLEVR_train_questions_L{i}.json") for i in [1,2,3,4,5]]
    answer2id = build_answer_vocab(
        tier_paths
    )

    print(f"Answer vocab size: {len(answer2id)}")

    # 2Ô∏è‚É£ Initialize trainer
    trainer = CurriculumTrainer(
        questions_dir=QUESTIONS_DIR,
        images_dir=IMAGES_DIR,
        answer2id=answer2id,
        batch_size=args.batch_size,
        use_sspl=args.use_sspl
    )

    print("üöÄ Starting Curriculum Training...")
    trainer.train()


In [29]:

if __name__ == "__main__":
    main()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Building answer vocabulary...
Answer vocab size: 28


Loading weights:   0%|          | 0/206 [00:00<?, ?it/s]

ViltForQuestionAnswering LOAD REPORT from: dandelin/vilt-b32-mlm-itm
Key                                          | Status     | 
---------------------------------------------+------------+-
mlm_score.transform.LayerNorm.bias           | UNEXPECTED | 
mlm_score.transform.dense.weight             | UNEXPECTED | 
mlm_score.transform.LayerNorm.weight         | UNEXPECTED | 
mlm_score.bias                               | UNEXPECTED | 
itm_score.fc.weight                          | UNEXPECTED | 
mlm_score.transform.dense.bias               | UNEXPECTED | 
itm_score.fc.bias                            | UNEXPECTED | 
mlm_score.decoder.weight                     | UNEXPECTED | 
vilt.embeddings.text_embeddings.position_ids | UNEXPECTED | 
classifier.{0, 1, 3}.bias                    | MISSING    | 
classifier.{0, 1, 3}.weight                  | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those 

preprocessor_config.json:   0%|          | 0.00/251 [00:00<?, ?B/s]

The image processor of type `ViltImageProcessor` is now loaded as a fast processor by default, even if the model checkpoint was saved with a slow processor. This is a breaking change and may produce slightly different outputs. To continue using the slow processor, instantiate this class with `use_fast=False`. 


config.json:   0%|          | 0.00/653 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/320 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

üöÄ Starting Curriculum Training...
No checkpoint found. Starting from scratch.
Starting/Resuming at Tier 1

 Entering Tier 1 

Tier 1 - Epoch 1


Training:   0%|          | 0/6142 [02:00<?, ?it/s]


FileNotFoundError: Image not found: /content/drive/MyDrive/Colab Notebooks/FYP/dataset/clevr_kaggle/CLEVR_v1.0/images/train/CLEVR_train_066108.png

In [30]:
%ls

base_model.py          curriculum_service.py  [0m[01;34m__pycache__[0m/  vilt_adapter.py
checkpoint_service.py  [01;34mdrive[0m/                 [01;34msample_data[0m/
clevr_dataset_py.py    evaluation_service.py  [01;34mservices[0m/
