# Welcome to Modal notebooks!

Write Python code and collaborate in real time. Your code runs in Modal's
**serverless cloud**, and anyone in the same workspace can join.

This notebook comes with some common Python libraries installed. Run
cells with `Shift+Enter`.

In [1]:
import torch

if torch.cuda.is_available():
    print("CUDA (NVIDIA GPU) is available.")
    print(f"Number of CUDA devices: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        print(f"  Device {i} Name: {torch.cuda.get_device_name(i)}")
else:
    print("CUDA (NVIDIA GPU) is not available.")

CUDA (NVIDIA GPU) is available.
Number of CUDA devices: 1
  Device 0 Name: NVIDIA A100 80GB PCIe


In [2]:
import os

target_dir = './GenImage'
os.makedirs(target_dir, exist_ok=True)
print(f"Created or found directory: {target_dir}")
os.chdir(target_dir)
print(f"Current directory: {os.getcwd()}")

Created or found directory: ./GenImage
Current directory: /root/GenImage


In [None]:
import subprocess, os, sys

repo_url_ssh = "https://github.com/Hadayxinchao/WeatherDiffusion.git"
repo_url_https = "https://github.com/Hadayxinchao/WeatherDiffusion.git"
repo_dir = "WeatherDiffusion"

if os.path.exists(repo_dir):
    print(f"Repo already exists at {repo_dir}, skipping clone.")
else:
    try:
        print(f"Cloning via SSH: {repo_url_ssh}")
        subprocess.check_call(["git", "clone", repo_url_ssh])
    except Exception as e:
        print(f"SSH clone failed ({e}); falling back to HTTPS...")
        subprocess.check_call(["git", "clone", repo_url_https])

os.chdir(repo_dir)
print("Now in repo:", os.getcwd())
print("Git remotes:")
subprocess.check_call(["git", "remote", "-v"])


Cloning into 'weatherdiff'...
remote: Enumerating objects: 103, done.[K
remote: Counting objects:   0% (1/103)[Kremote: Counting objects:   1% (2/103)[Kremote: Counting objects:   2% (3/103)[Kremote: Counting objects:   3% (4/103)[Kremote: Counting objects:   4% (5/103)[Kremote: Counting objects:   5% (6/103)[Kremote: Counting objects:   6% (7/103)[Kremote: Counting objects:   7% (8/103)[Kremote: Counting objects:   8% (9/103)[Kremote: Counting objects:   9% (10/103)[Kremote: Counting objects:  10% (11/103)[Kremote: Counting objects:  11% (12/103)[Kremote: Counting objects:  12% (13/103)[Kremote: Counting objects:  13% (14/103)[Kremote: Counting objects:  14% (15/103)[Kremote: Counting objects:  15% (16/103)[Kremote: Counting objects:  16% (17/103)[Kremote: Counting objects:  17% (18/103)[Kremote: Counting objects:  18% (19/103)[Kremote: Counting objects:  19% (20/103)[Kremote: Counting objects:  20% (21/103)[Kremote: Counting objects:  21% 

In [4]:
print(os.listdir())

['.gitignore', 'LICENSE', 'README.md', 'calculate_psnr_ssim.py', 'configs', 'datasets', 'eval_diffusion.py', 'models', 'train_diffusion.py', 'utils', 'weatherdiff.ipynb']


# Data prepare

In [None]:
# Install gdown for downloading from Google Drive
!pip install -q gdown

import gdown
import os

# Create data directory
data_dir = './data/custom_haze'
os.makedirs(data_dir, exist_ok=True)

# Download dataset from Google Drive
# The folder ID is extracted from the URL
folder_id = '1bPlY4KsZT-4mebvyYAqvjQhvXEFb79nC'
print("Downloading dataset from Google Drive...")
gdown.download_folder(id=folder_id, output=data_dir, quiet=False, use_cookies=False)
print("Download complete.")

Downloading O-HAZE.zip...
Download complete.


In [None]:
# Kiểm tra cấu trúc thư mục đã tải về
print("Dataset structure:")
for root, dirs, files in os.walk(data_dir):
    level = root.replace(data_dir, '').count(os.sep)
    indent = ' ' * 2 * level
    print(f'{indent}{os.path.basename(root)}/')
    subindent = ' ' * 2 * (level + 1)
    for file in files[:5]:  # Only show first 5 files
        print(f'{subindent}{file}')
    if len(files) > 5:
        print(f'{subindent}... and {len(files) - 5} more files')

Extraction complete.


In [None]:
# Di chuyển vào thư mục output (nơi chứa train và test folders)
output_dir = os.path.join(data_dir, 'output')
print(f"Output directory: {output_dir}")
print(f"Contents: {os.listdir(output_dir) if os.path.exists(output_dir) else 'Directory not found'}")

In [None]:
import os
import glob

# Tạo file danh sách cho training set
# Với custom dataset, train_haze chứa ảnh haze nhưng không có GT riêng
# Chúng ta cần tạo paired list giữa haze và origin images

train_haze_dir = os.path.join(output_dir, 'train', 'train_haze')
test_haze_dir = os.path.join(output_dir, 'test', 'test_haze')
test_origin_dir = os.path.join(output_dir, 'test', 'test_origin')

# Get all training haze images
train_haze_images = sorted(glob.glob(os.path.join(train_haze_dir, '*.*')))
print(f"Found {len(train_haze_images)} training haze images")

# Get all test images
test_haze_images = sorted(glob.glob(os.path.join(test_haze_dir, '*.*')))
test_origin_images = sorted(glob.glob(os.path.join(test_origin_dir, '*.*')))
print(f"Found {len(test_haze_images)} test haze images")
print(f"Found {len(test_origin_images)} test origin images")

# Create train.txt - for self-supervised training (haze image as both input and target)
# Or you can use haze as input and haze as output for denoising
train_list_path = os.path.join(data_dir, 'train.txt')
with open(train_list_path, 'w') as f:
    for haze_img in train_haze_images:
        # Format: input_path gt_path
        # Nếu không có GT riêng, dùng chính ảnh haze làm target (self-supervised)
        # Hoặc bạn có thể điều chỉnh nếu có GT riêng
        f.write(f"{haze_img} {haze_img}\n")
print(f"Created {train_list_path}")

# Create test.txt - paired haze and origin images
test_list_path = os.path.join(data_dir, 'test.txt')
with open(test_list_path, 'w') as f:
    for haze_img, origin_img in zip(test_haze_images, test_origin_images):
        f.write(f"{haze_img} {origin_img}\n")
print(f"Created {test_list_path}")

# Hiển thị vài dòng đầu tiên
print("\nFirst 3 lines of train.txt:")
with open(train_list_path, 'r') as f:
    for i, line in enumerate(f):
        if i >= 3:
            break
        print(f"  {line.strip()}")

print("\nFirst 3 lines of test.txt:")
with open(test_list_path, 'r') as f:
    for i, line in enumerate(f):
        if i >= 3:
            break
        print(f"  {line.strip()}")

--- Processing folder: ./GT/ ---
Renamed: 01_outdoor_GT.jpg -> 01_outdoor.jpg
Renamed: 02_outdoor_GT.jpg -> 02_outdoor.jpg
Renamed: 03_outdoor_GT.JPG -> 03_outdoor.jpg
Renamed: 04_outdoor_GT.jpg -> 04_outdoor.jpg
Renamed: 05_outdoor_GT.jpg -> 05_outdoor.jpg
Renamed: 06_outdoor_GT.jpg -> 06_outdoor.jpg
Renamed: 07_outdoor_GT.jpg -> 07_outdoor.jpg
Renamed: 08_outdoor_GT.jpg -> 08_outdoor.jpg
Renamed: 09_outdoor_GT.jpg -> 09_outdoor.jpg
Renamed: 10_outdoor_GT.jpg -> 10_outdoor.jpg
Renamed: 11_outdoor_GT.jpg -> 11_outdoor.jpg
Renamed: 12_outdoor_GT.jpg -> 12_outdoor.jpg
Renamed: 13_outdoor_GT.jpg -> 13_outdoor.jpg
Renamed: 14_outdoor_GT.jpg -> 14_outdoor.jpg
Renamed: 15_outdoor_GT.jpg -> 15_outdoor.jpg
Renamed: 16_outdoor_GT.jpg -> 16_outdoor.jpg
Renamed: 17_outdoor_GT.jpg -> 17_outdoor.jpg
Renamed: 18_outdoor_GT.jpg -> 18_outdoor.jpg
Renamed: 19_outdoor_GT.jpg -> 19_outdoor.jpg
Renamed: 20_outdoor_GT.jpg -> 20_outdoor.jpg
Renamed: 21_outdoor_GT.JPG -> 21_outdoor.jpg
Renamed: 22_outdoor_GT

# Training với Custom Dataset

In [None]:
# Kiểm tra cấu trúc thư mục cuối cùng
print("Current directory:", os.getcwd())
print("\nData directory structure:")
print(f"data_dir in config: {os.path.join(os.getcwd(), 'data', 'custom_haze')}")
print(f"Exists: {os.path.exists(os.path.join(os.getcwd(), 'data', 'custom_haze'))}")

# Kiểm tra file lists
train_txt = os.path.join(os.getcwd(), 'data', 'custom_haze', 'train.txt')
test_txt = os.path.join(os.getcwd(), 'data', 'custom_haze', 'test.txt')
print(f"\ntrain.txt exists: {os.path.exists(train_txt)}")
print(f"test.txt exists: {os.path.exists(test_txt)}")

In [None]:
import os
import yaml
import subprocess
import sys
import modal
import shutil

# --- CẤU HÌNH ---
BASE_CONFIG_PATH = "configs/custom_haze.yml"
VOL_NAME = "weather-diffusion-vol"

# Tên file model gốc bạn muốn dùng để finetune (CẦN CÓ SẴN trong folder ./ckpts)
# Nếu bạn chưa có, hãy đảm bảo đã upload file WeatherDiff64.pth.tar vào ./ckpts 
# hoặc code sẽ tự tìm trong Modal Volume
BASE_MODEL_NAME = "WeatherDiff64.pth.tar" 

# Định nghĩa 5 thí nghiệm
experiments = {
    "case1_baseline": {
        "desc": "Config Goc (No changes)",
        "changes": {} 
    },
    "case2_highWD": {
        "desc": "Tang Weight Decay -> 0.01",
        "changes": {"optim": {"weight_decay": 0.01}}
    },
    "case3_lowLR": {
        "desc": "Giam LR -> 5e-6",
        "changes": {"optim": {"lr": 0.000005}}
    },
    "case4_dropout": {
        "desc": "Tang Dropout -> 0.2",
        "changes": {"model": {"dropout": 0.2}}
    },
    "case5_combined": {
        "desc": "Ket hop tat ca: WD=0.01, LR=5e-6, Dropout=0.2",
        "changes": {
            "optim": {"weight_decay": 0.01, "lr": 0.000005},
            "model": {"dropout": 0.2}
        }
    }
}

# Hàm helper update config
def update_nested_dict(d, u):
    for k, v in u.items():
        if isinstance(v, dict):
            d[k] = update_nested_dict(d.get(k, {}), v)
        else:
            d[k] = v
    return d

# Kết nối Volume
vol = modal.Volume.from_name(VOL_NAME, create_if_missing=True)

# Kiểm tra base model local
if not os.path.exists(f"./ckpts/{BASE_MODEL_NAME}"):
    print(f"⚠️ CẢNH BÁO: Không tìm thấy './ckpts/{BASE_MODEL_NAME}'.")
    print("   Code sẽ cố gắng tìm trên Volume hoặc train từ đầu (Random Init).")
    try:
        vol.read_file(f"checkpoints/{BASE_MODEL_NAME}", f"./ckpts/{BASE_MODEL_NAME}")
        print("   ✅ Đã tải Base Model từ Volume về.")
    except:
        pass

# --- VÒNG LẶP CHÍNH ---
for case_name, exp_data in experiments.items():
    print(f"\n{'='*60}")
    print(f"🚀 RUNNING EXPERIMENT: {case_name}")
    print(f"📝 {exp_data['desc']}")
    print(f"{'='*60}")

    # 1. Tạo Config
    with open(BASE_CONFIG_PATH, 'r') as f:
        cfg = yaml.safe_load(f)
    
    cfg = update_nested_dict(cfg, exp_data["changes"])
    # Đặt tên dataset riêng (CustomHaze_caseX) để file checkpoint không bị đè lên nhau
    cfg['data']['dataset'] = f"CustomHaze_{case_name}" 
    
    config_file = f"configs/{case_name}.yml"
    with open(config_file, 'w') as f:
        yaml.dump(cfg, f)
    
    # 2. Chuẩn bị file Resume/Pretrained
    # File model đầu ra sẽ có tên: CustomHaze_caseX_ddpm.pth.tar (quy định bởi ddm.py)
    target_ckpt_name = f"CustomHaze_{case_name}_ddpm.pth.tar"
    local_ckpt_path = f"./ckpts/{target_ckpt_name}"
    
    resume_path = ""
    
    # Check xem case này đã từng chạy dở trên Volume chưa?
    try:
        vol_files = [e.path for e in vol.listdir(f"experiments/{case_name}")]
        if target_ckpt_name in [os.path.basename(p) for p in vol_files]:
            print("🔄 Phát hiện checkpoint cũ trên Volume. Đang tải về để RESUME...")
            vol.read_file(f"experiments/{case_name}/{target_ckpt_name}", local_ckpt_path)
            resume_path = local_ckpt_path
    except:
        pass 

    # Nếu chưa có file resume, dùng Base Model để bắt đầu Finetune
    if not resume_path and os.path.exists(f"./ckpts/{BASE_MODEL_NAME}"):
        print("🆕 Chưa có checkpoint riêng. Copy Base Model để bắt đầu FINETUNE...")
        shutil.copy(f"./ckpts/{BASE_MODEL_NAME}", local_ckpt_path)
        resume_path = local_ckpt_path
    
    # 3. Chạy Training
    cmd = [
        'python', 'train_diffusion.py',
        '--config', f'{case_name}.yml',
        '--sampling_timesteps', '25',
        '--image_folder', f'results/{case_name}_patches/'
    ]
    if resume_path:
        cmd.extend(['--resume', resume_path])

    print(f"▶️ Executing: {' '.join(cmd)}")
    
    try:
        process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
        for line in process.stdout:
            # Chỉ in các dòng quan trọng để log đỡ dài
            if any(k in line for k in ["epoch:", "step:", "Error", "Resume"]):
                print(line.strip())
        process.wait()
    except KeyboardInterrupt:
        print("\n🛑 Dừng bởi người dùng.")
        break # Dừng toàn bộ nếu user bấm stop
    except Exception as e:
        print(f"❌ Lỗi: {e}")

    # 4. Lưu kết quả lên Volume (Vào folder riêng experiments/caseX)
    print(f"\n💾 Đang lưu model {case_name} lên Volume...")
    if os.path.exists(local_ckpt_path):
        remote_path = f"experiments/{case_name}/{target_ckpt_name}"
        with vol.batch_upload() as batch:
            batch.put_file(local_ckpt_path, remote_path)
        print(f"✅ Đã lưu: {remote_path}")
    else:
        print("⚠️ Không tìm thấy file model output để lưu.")

print("\n🎉 HOÀN TẤT TOÀN BỘ 5 THÍ NGHIỆM!")

Using device: cuda
=> using dataset 'MyDataset'
Data Loaded!
=> creating denoising-diffusion model...
Found 40 images in ./haze_data/train/hazy
Found 5 images in ./haze_data/test/hazy
Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 1e-05
    maximize: False
    weight_decay: 0.001
)
Current learning rate before training: 1e-05
epoch:  0
epoch:  1
epoch:  2
epoch:  3
step: 10, loss: 10449.59375, data time: 3.966639518737793
epoch:  4
epoch:  5
epoch:  6
step: 20, loss: 7910.8818359375, data time: 2.0040950775146484
epoch:  7
epoch:  8
epoch:  9
step: 30, loss: 6066.8623046875, data time: 1.216320514678955
epoch:  10
epoch:  11
epoch:  12
epoch:  13
step: 40, loss: 4578.3701171875, data time: 3.9093542098999023
epoch:  14
epoch:  15
epoch:  16
step: 50, loss: 3654.35595703125, data time: 1.8904286623001099
epoch:  17
epoch:  18
epo