In [1]:
# Cell 1 — Mount Google Drive & create project folders
from google.colab import drive
drive.mount('/content/drive')

import os

PROJ = "/content/drive/MyDrive/VQAScore_Project"
DIRS = [
    f"{PROJ}",
    f"{PROJ}/data",
    f"{PROJ}/data/generated_images",
    f"{PROJ}/data/videos",              # optional (for T2V)
    f"{PROJ}/models",
    f"{PROJ}/results",
    f"{PROJ}/results/plots",
    f"{PROJ}/notebooks",
]

for d in DIRS:
    os.makedirs(d, exist_ok=True)

print("Project root:", PROJ)
print("Created folders:")
print("\n".join(DIRS))

Mounted at /content/drive
Project root: /content/drive/MyDrive/VQAScore_Project
Created folders:
/content/drive/MyDrive/VQAScore_Project
/content/drive/MyDrive/VQAScore_Project/data
/content/drive/MyDrive/VQAScore_Project/data/generated_images
/content/drive/MyDrive/VQAScore_Project/data/videos
/content/drive/MyDrive/VQAScore_Project/models
/content/drive/MyDrive/VQAScore_Project/results
/content/drive/MyDrive/VQAScore_Project/results/plots
/content/drive/MyDrive/VQAScore_Project/notebooks


In [None]:
# Cell 2 — (Optional, recommended) Persist HF cache to Drive to speed up future sessions
import os

HF_CACHE = f"{PROJ}/.cache/huggingface"
os.makedirs(HF_CACHE, exist_ok=True)

os.environ["HF_HOME"] = HF_CACHE
os.environ["HUGGINGFACE_HUB_CACHE"] = HF_CACHE
os.environ["TRANSFORMERS_CACHE"] = HF_CACHE
os.environ["TORCH_HOME"] = f"{PROJ}/.cache/torch"

print("HF/Transformers cache set to:", HF_CACHE)

HF/Transformers cache set to: /content/drive/MyDrive/VQAScore_Project/.cache/huggingface


In [2]:
# Cell 3 — Install core dependencies (CUDA 12.1-compatible wheels on Colab)
%pip install --upgrade pip
%pip install "torch>=2.2" torchvision --extra-index-url https://download.pytorch.org/whl/cu121
%pip install transformers accelerate datasets pillow tqdm pandas matplotlib seaborn scipy
%pip install open_clip_torch
%pip install diffusers==0.30.0

Collecting pip
  Downloading pip-25.3-py3-none-any.whl.metadata (4.7 kB)
Downloading pip-25.3-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m69.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.3
Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu121
Collecting open_clip_torch
  Downloading open_clip_torch-3.2.0-py3-none-any.whl.metadata (32 kB)
Collecting ftfy (from open_clip_torch)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading open_clip_torch-3.2.0-py3-none-any.whl (1.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m60.2 MB/s[0m  [33m0:00:00[0m
[?25hDownloading ftfy-6.3.1-py3-none-any.whl (44 kB)
Installing collected packages: ftfy, op

In [None]:
# # Cell 4 — Clone and install t2v_metrics persistently in Google Drive (no flash-attn)

# import os

# repo_url = "https://github.com/linzhiqiu/t2v_metrics.git"
# repo_dir = "/content/drive/MyDrive/VQAScore_Project/t2v_metrics"

# # 1) Clone or update the repo inside Drive
# if not os.path.exists(repo_dir):
#     !git clone "$repo_url" "$repo_dir"
# else:
#     %cd "$repo_dir"
#     !git fetch origin
#     !git pull --rebase
#     %cd /content

# # 2) Install minimal dependencies manually (skip heavy extras like flash-attn)
# %pip install --quiet --upgrade pip
# %pip install --quiet einops pandas numpy scipy tqdm pillow
# %pip install --quiet transformers accelerate datasets
# %pip install --quiet open_clip_torch
# %pip install --quiet diffusers==0.30.0

# # 3) Make sure flash-attn is removed (not needed here)
# !pip uninstall -y flash-attn || true

# # 4) Editable install without deps to avoid conflicting wheels
# %cd "$repo_dir"
# !pip install -e . --no-deps
# %cd /content

/content/t2v_metrics
Already up to date.
/content
[0m/content/t2v_metrics
Obtaining file:///content/t2v_metrics
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Collecting torch==2.5.1 (from t2v_metrics==3.0)
  Using cached torch-2.5.1-cp312-cp312-manylinux1_x86_64.whl.metadata (28 kB)
Collecting torchvision==0.20.1 (from t2v_metrics==3.0)
  Using cached torchvision-0.20.1-cp312-cp312-manylinux1_x86_64.whl.metadata (6.1 kB)
Collecting torchaudio==2.5.1 (from t2v_metrics==3.0)
  Using cached torchaudio-2.5.1-cp312-cp312-manylinux1_x86_64.whl.metadata (6.4 kB)
Collecting xformers (from t2v_metrics==3.0)
  Using cached xformers-0.0.32.post2-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (1.1 kB)
Collecting numpy==1.26.4 (from t2v_metrics==3.0)
  Using cached numpy-1.26.4-cp312-cp312-man

ModuleNotFoundError: No module named 'iopath'

In [3]:
%pip install --quiet --upgrade pip
%pip install --quiet einops pandas numpy scipy tqdm pillow
%pip install --quiet transformers accelerate datasets
%pip install --quiet open_clip_torch
%pip install --quiet diffusers==0.30.0
# 3) Make sure flash-attn is removed (not needed here)
!pip uninstall -y flash-attn || true

[0m

In [4]:
!pip install --force-reinstall --no-deps t2v-metrics

# Install Git-based dependencies
!pip install git+https://github.com/LLaVA-VL/LLaVA-NeXT.git
!pip install git+https://github.com/openai/CLIP.git
!pip install git+https://github.com/linzhiqiu/pytorchvideo.git

# Install flash-attention (CUDA 12.2, Python 3.10)
# !pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.8/flash_attn-2.5.8+cu122torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl

Collecting t2v-metrics
  Downloading t2v_metrics-3.0-py3-none-any.whl.metadata (24 kB)
Downloading t2v_metrics-3.0-py3-none-any.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m3.2 MB/s[0m  [33m0:00:00[0m
[?25hInstalling collected packages: t2v-metrics
Successfully installed t2v-metrics-3.0
Collecting git+https://github.com/LLaVA-VL/LLaVA-NeXT.git
  Cloning https://github.com/LLaVA-VL/LLaVA-NeXT.git to /tmp/pip-req-build-58jlw7ni
  Running command git clone --filter=blob:none --quiet https://github.com/LLaVA-VL/LLaVA-NeXT.git /tmp/pip-req-build-58jlw7ni
  Resolved https://github.com/LLaVA-VL/LLaVA-NeXT.git to commit e9835311c6f515a13702eb7a7750fcd936f65ed8
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: llava
  Building wheel for llava (pyproject.toml) ... [?25l[?25hdone
  C

In [5]:
# Cell 6 — Quick import checks
import torch, transformers, datasets, pandas as pd, numpy as np
import PIL, matplotlib, seaborn, scipy
import open_clip
from diffusers import StableDiffusionPipeline
import importlib

try:
    import t2v_metrics
    print("t2v_metrics import: OK")
except Exception as e:
    print("t2v_metrics import FAILED:", e)

print("CUDA available:", torch.cuda.is_available())
print("Device:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")
print("Torch:", torch.__version__)
print("Transformers:", transformers.__version__)

t2v_metrics import FAILED: No module named 'fairscale'
CUDA available: True
Device: NVIDIA L4
Torch: 2.9.0+cu126
Transformers: 4.57.1


In [None]:
# Cell 7 — Small utilities (seed, paths, dir check) reused by all notebooks
import random, json

DATA_DIR = f"{PROJ}/data"
IMG_DIR = f"{PROJ}/data/generated_images"
VID_DIR = f"{PROJ}/data/videos"
RES_DIR = f"{PROJ}/results"
PLOTS_DIR = f"{PROJ}/results/plots"

def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

def ensure_dirs():
    for d in [DATA_DIR, IMG_DIR, VID_DIR, RES_DIR, PLOTS_DIR]:
        os.makedirs(d, exist_ok=True)

set_seed(42)
ensure_dirs()
print("Utils ready. Paths:")
print("DATA_DIR:", DATA_DIR)
print("IMG_DIR :", IMG_DIR)
print("VID_DIR :", VID_DIR)
print("RES_DIR :", RES_DIR)
print("PLOTS   :", PLOTS_DIR)

Utils ready. Paths:
DATA_DIR: /content/drive/MyDrive/VQAScore_Project/data
IMG_DIR : /content/drive/MyDrive/VQAScore_Project/data/generated_images
VID_DIR : /content/drive/MyDrive/VQAScore_Project/data/videos
RES_DIR : /content/drive/MyDrive/VQAScore_Project/results
PLOTS   : /content/drive/MyDrive/VQAScore_Project/results/plots


In [None]:
# Cell 8 — Create baseline VQA templates (used in Phase 4; OK to define now)
templates = {
    "templates": [
        'Does this figure show "{}"? Please answer yes or no.',
        'Is there "{}" in this image? Please answer yes or no.',
        'Does the photo depict "{}"? Please answer yes or no.'
    ]
}
with open(f"{DATA_DIR}/templates.json", "w") as f:
    json.dump(templates, f, indent=2)

print("Saved templates.json at:", f"{DATA_DIR}/templates.json")

Saved templates.json at: /content/drive/MyDrive/VQAScore_Project/data/templates.json


In [None]:
# Cell 9 — Freeze environment to requirements.txt for reproducibility
!pip freeze | sed 's/@.*//' > /content/requirements_colab.txt
!cp /content/requirements_colab.txt "{PROJ}/requirements.txt"
print("Saved requirements to", f"{PROJ}/requirements.txt")

[1;31merror[0m: [1msubprocess-exited-with-error[0m

[31m×[0m [32mgit rev-parse HEAD[0m did not run successfully.
[31m│[0m exit code: [1;36m128[0m
[31m╰─>[0m No available output.

[1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
[0mSaved requirements to /content/drive/MyDrive/VQAScore_Project/requirements.txt


In [None]:
# Cell 10 — Phase 1 completion summary
summary = {
    "proj_root": PROJ,
    "cuda_available": torch.cuda.is_available(),
    "device_name": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU",
    "folders": DIRS,
    "templates_path": f"{DATA_DIR}/templates.json",
    "requirements_path": f"{PROJ}/requirements.txt",
    "t2v_metrics_installed": bool(importlib.util.find_spec("t2v_metrics")),
}
import json
print(json.dumps(summary, indent=2))

{
  "proj_root": "/content/drive/MyDrive/VQAScore_Project",
  "cuda_available": true,
  "device_name": "NVIDIA L4",
  "folders": [
    "/content/drive/MyDrive/VQAScore_Project",
    "/content/drive/MyDrive/VQAScore_Project/data",
    "/content/drive/MyDrive/VQAScore_Project/data/generated_images",
    "/content/drive/MyDrive/VQAScore_Project/data/videos",
    "/content/drive/MyDrive/VQAScore_Project/models",
    "/content/drive/MyDrive/VQAScore_Project/results",
    "/content/drive/MyDrive/VQAScore_Project/results/plots",
    "/content/drive/MyDrive/VQAScore_Project/notebooks"
  ],
  "templates_path": "/content/drive/MyDrive/VQAScore_Project/data/templates.json",
  "requirements_path": "/content/drive/MyDrive/VQAScore_Project/requirements.txt",
  "t2v_metrics_installed": false
}

✅ Phase 1 is complete
