# Data preperation for Swinir

code written by kushwanth

In [1]:
!pip install basicsr  huggingface_hub transformers

Collecting basicsr
  Downloading basicsr-1.4.2.tar.gz (172 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m172.5/172.5 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting addict (from basicsr)
  Downloading addict-2.4.0-py3-none-any.whl.metadata (1.0 kB)
Collecting lmdb (from basicsr)
  Downloading lmdb-1.6.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB)
Collecting tb-nightly (from basicsr)
  Downloading tb_nightly-2.20.0a20250422-py3-none-any.whl.metadata (1.9 kB)
Collecting yapf (from basicsr)
  Downloading yapf-0.43.0-py3-none-any.whl.metadata (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.8/46.8 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.7->basicsr)
  Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-c

# tried to down sample and then build hr sr pair

In [5]:
# -----------------------------------------------
# Swin2SR Super-Resolution: 64→256 Training, 256→1024 Inference
# -----------------------------------------------
import os, glob
from PIL import Image
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.utils import save_image

from transformers import (
    AutoImageProcessor,
    Swin2SRForImageSuperResolution
)
from torch.amp import autocast, GradScaler

# -----------------------------------------------
# Paths & Hyperparameters
# -----------------------------------------------
HR_FULL_DIR    = '/kaggle/input/paintings/resized_dataset'
CHECKPOINT_DIR = '/kaggle/working/checkpoints'
OUTPUT_DIR     = '/kaggle/working/outputs'

MODEL_NAME        = 'caidas/swin2SR-classical-sr-x4-64'
EPOCHS            = 100
BATCH_SIZE        = 8
LR                = 1e-4
NUM_WORKERS       = 2
TRAIN_CROP_SIZE   = 64
INFERENCE_SIZE    = 256
SCALE             = 2

os.makedirs(CHECKPOINT_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# -----------------------------------------------
# Processor & Model
# -----------------------------------------------
processor = AutoImageProcessor.from_pretrained(MODEL_NAME, do_pad=False)
model     = Swin2SRForImageSuperResolution.from_pretrained(MODEL_NAME).to(device)
model.train()

# -----------------------------------------------
# Dataset & Loader
# -----------------------------------------------
class PatchDataset(Dataset):
    def __init__(self, folder, crop_size=64, scale=4):
        self.image_paths = glob.glob(os.path.join(folder, '*.png')) + glob.glob(os.path.join(folder, '*.jpg'))
        self.crop_size = crop_size
        self.scale = scale
        self.hr_transform = transforms.Compose([
            transforms.RandomCrop(crop_size),
            transforms.ToTensor()
        ])

    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx]).convert('RGB')
        hr  = self.hr_transform(img)
        lr  = transforms.Resize(self.crop_size // self.scale)(hr)
        return {'lr': lr, 'hr': hr}

    def __len__(self):
        return len(self.image_paths)

train_ds = PatchDataset(HR_FULL_DIR, crop_size=TRAIN_CROP_SIZE, scale=SCALE)
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)

# -----------------------------------------------
# Training Setup
# -----------------------------------------------
criterion = nn.L1Loss()
optimizer = torch.optim.AdamW(model.parameters(), lr=LR)
scaler    = GradScaler(device='cuda')

# -----------------------------------------------
# Training Loop
# -----------------------------------------------
for epoch in range(EPOCHS):
    model.train()
    epoch_loss = 0.0
    pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}")

    for batch in pbar:
        lr = batch['lr'].to(device)
        hr = batch['hr'].to(device)

        pixel_values = processor(images=lr, return_tensors="pt").pixel_values.to(device)

        optimizer.zero_grad()
        with autocast(device_type='cuda'):
            out = model(pixel_values=pixel_values)
            sr  = out.reconstruction
            loss = criterion(sr, hr)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        epoch_loss += loss.item()
        pbar.set_postfix(loss=loss.item())

    torch.save(model.state_dict(), f"{CHECKPOINT_DIR}/swin2sr_epoch{epoch+1}.pt")

# -----------------------------------------------
# Inference on Full Images
# -----------------------------------------------
model.eval()
image_paths = glob.glob(os.path.join(HR_FULL_DIR, '*.png')) + glob.glob(os.path.join(HR_FULL_DIR, '*.jpg'))

with torch.no_grad():
    for path in tqdm(image_paths, desc='Inference'):
        img = Image.open(path).convert("RGB").resize((INFERENCE_SIZE, INFERENCE_SIZE))
        inputs = processor(images=img, return_tensors="pt").pixel_values.to(device)

        with autocast(device_type='cuda'):
            out = model(pixel_values=inputs)
            sr_img = out.reconstruction.clamp(0, 1)

        fname = os.path.basename(path)
        save_image(sr_img, os.path.join(OUTPUT_DIR, fname))


Epoch 1/100:   0%|          | 0/496 [00:00<?, ?it/s]It looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.
Epoch 1/100: 100%|██████████| 496/496 [01:54<00:00,  4.33it/s, loss=0.15] 
Epoch 2/100: 100%|██████████| 496/496 [01:54<00:00,  4.34it/s, loss=0.177] 
Epoch 3/100: 100%|██████████| 496/496 [01:54<00:00,  4.35it/s, loss=0.169] 
Epoch 4/100: 100%|██████████| 496/496 [01:53<00:00,  4.36it/s, loss=0.145] 
Epoch 5/100: 100%|██████████| 496/496 [01:51<00:00,  4.43it/s, loss=0.153] 
Epoch 6/100: 100%|██████████| 496/496 [01:54<00:00,  4.34it/s, loss=0.127] 
Epoch 7/100: 100%|██████████| 496/496 [01:51<00:00,  4.45it/s, loss=0.0902]
Epoch 8/100: 100%|██████████| 496/496 [01:54<00:00,  4.34it/s, loss=0.0523]
Epoch 9/100: 100%|██████████| 496/496 [01:52<00:00,  4.40it/s, loss=0.0838]
Epoch 10/100: 100%|██████████| 496/496 [01:52<00:00,  4.43it/s, loss=0.124] 
Epoch 11/100: 

KeyboardInterrupt: 

# Used *4 swin ir to build hr lr data set

In [2]:
# In a Kaggle notebook, install dependencies in a separate cell:
# ```bash
# !pip install transformers pillow tqdm torchvision
# ```

import os
import glob
from PIL import Image
from tqdm import tqdm

import torch
from transformers import Swin2SRImageProcessor, Swin2SRForImageSuperResolution
from torchvision.transforms import ToPILImage

# -----------------------------------------------
# Directories & Device
# -----------------------------------------------
INPUT_DIR  = '/kaggle/input/paintings/resized_dataset'
OUTPUT_DIR = '/kaggle/working/outputs'
MODEL_NAME = 'caidas/swin2sr-classical-sr-x4-64'

os.makedirs(OUTPUT_DIR, exist_ok=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# -----------------------------------------------
# Load Processor & Model
# -----------------------------------------------
# Swin2SRImageProcessor handles normalization & padding
processor = Swin2SRImageProcessor(
    do_rescale=True,
    rescale_factor=1/255.0,
    do_pad=True,
    pad_size=8
)
# Load pretrained Swin2SR model
model = Swin2SRForImageSuperResolution.from_pretrained(MODEL_NAME).to(device)
model.eval()

# Helper to convert tensor to PIL
to_pil = ToPILImage()

# -----------------------------------------------
# Inference Loop
# -----------------------------------------------
image_paths = glob.glob(os.path.join(INPUT_DIR, '*.png')) + glob.glob(os.path.join(INPUT_DIR, '*.jpg'))

with torch.no_grad():
    for img_path in tqdm(image_paths, desc='Super-resolving'):
        # Load low-res image
        img = Image.open(img_path).convert('RGB')

        # Preprocess: returns {'pixel_values': tensor}
        inputs = processor(images=img, return_tensors='pt')
        # Move tensors to device
        pixel_values = inputs['pixel_values'].to(device)

        # Forward pass
        outputs = model(pixel_values=pixel_values)
        # `outputs.reconstruction` is a tensor [1, C, H, W]
        recon = outputs.reconstruction.squeeze(0).cpu().clamp(0, 1)

        # Convert to PIL image
        sr_img = to_pil(recon)

        # Save SR image
        fname = os.path.basename(img_path)
        sr_img.save(os.path.join(OUTPUT_DIR, fname))

print(f"Super-resolution completed. Outputs saved to: {OUTPUT_DIR}")


Super-resolving: 0it [00:00, ?it/s]

Super-resolution completed. Outputs saved to: /kaggle/working/outputs





# Used *2 swin ir to build hr lr pair

In [5]:
# In a Kaggle notebook, first install dependencies:
# ```bash
# !pip install transformers pillow tqdm torchvision
# ```

import os
import glob
from PIL import Image
from tqdm import tqdm

import torch
from transformers import Swin2SRImageProcessor, Swin2SRForImageSuperResolution
from torchvision.transforms import ToPILImage

# -----------------------------------------------
# Directories & Device
# -----------------------------------------------
INPUT_DIR  = '/kaggle/input/paintings/resized_dataset/resized_dataset'
OUTPUT_DIR = '/kaggle/working/outputs_x2'
MODEL_NAME = 'caidas/swin2sr-classical-sr-x2-64'   # ← x2 model instead of x4

os.makedirs(OUTPUT_DIR, exist_ok=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# -----------------------------------------------
# Load Processor & Model
# -----------------------------------------------
processor = Swin2SRImageProcessor(
    do_rescale=True,
    rescale_factor=1/255.0,
    do_pad=True,
    pad_size=8
)
model = Swin2SRForImageSuperResolution.from_pretrained(MODEL_NAME).to(device)
model.eval()

to_pil = ToPILImage()

# -----------------------------------------------
# Inference Loop
# -----------------------------------------------
image_paths = (
    glob.glob(os.path.join(INPUT_DIR, '*.png')) +
    glob.glob(os.path.join(INPUT_DIR, '*.jpg'))
)

with torch.no_grad():
    for img_path in tqdm(image_paths, desc='Super-resolving ×2'):
        # Load
        img = Image.open(img_path).convert('RGB')

        # Preprocess
        inputs = processor(images=img, return_tensors='pt')
        pixel_values = inputs['pixel_values'].to(device)

        # Forward
        outputs = model(pixel_values=pixel_values)
        recon = outputs.reconstruction.squeeze(0).cpu().clamp(0, 1)

        # Save
        sr_img = to_pil(recon)
        fname  = os.path.basename(img_path)
        sr_img.save(os.path.join(OUTPUT_DIR, fname))

print(f"×2 Super-resolution done. Outputs in {OUTPUT_DIR}")


Super-resolving ×2: 100%|██████████| 3961/3961 [57:27<00:00,  1.15it/s]

×2 Super-resolution done. Outputs in /kaggle/working/outputs_x2





In [6]:
import shutil
zip_base = '/kaggle/working/outputs_x2'
shutil.make_archive(zip_base, 'zip', OUTPUT_DIR)
zip_path = zip_base + '.zip'

print(f"Super-resolution completed. Outputs saved to: {OUTPUT_DIR}")
print(f"Zipped outputs to: {zip_path}")


Super-resolution completed. Outputs saved to: /kaggle/working/outputs_x2
Zipped outputs to: /kaggle/working/outputs_x2.zip
