In [12]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
nst_vangogh_local_safe.py
Neural Style Transfer (Gatys) — ローカル＆巨大画像安全版
- 依存: torch torchvision pillow tqdm
- 入力:
    - style:   Van_Gogh_-_Starry_Night_-_Google_Art_Project.jpg
    - content: Tokyo-Tower.jpg
- 出力: output.jpg
- ポイント:
    1) Pillow の DecompressionBomb を無効化 + 事前縮小で安全に読込
    2) 進捗表示つき
    3) LBFGS/Adam 切替可能
"""

"""
画風を強くしたい: STYLE_WEIGHT を上げる（例: 1e5 → 3e5）
内容を残したい: CONTENT_WEIGHT を上げる
解像度を上げたい: IMAGE_SIZE を上げる（GPUメモリと実行時間に注意）
収束が荒い場合: NUM_STEPS を増やす、USE_LBFGS=True を推奨（高品質）
"""

import os
from PIL import Image, ImageOps
Image.MAX_IMAGE_PIXELS = None  # 巨大画像の安全上限を解除（必ず事前縮小も併用）

from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms

# ====== 設定 ======
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
IMAGE_SIZE = 1024 if torch.cuda.is_available() else 256   # 768/1024 などに上げると高精細（VRAMに注意）
STYLE_PATH   = "Van_Gogh_-_Starry_Night_-_Google_Art_Project.jpg"

# CONTENT_PATH = "yakei.png"
CONTENT_PATH = "Central_Park_in_Shinjuku_Ward_Tokyo_20250824104812_01.png"
OUTPUT_PATH  = "output.jpg"

CONTENT_WEIGHT = 1.0
STYLE_WEIGHT   = 4e5
NUM_STEPS      = 300             # 画質↑: 増やす（GPUなら 500-1000 も可）
USE_LBFGS      = True            # False で Adam に切替（高速デバッグ向け）
LR_ADAM        = 1e-1

# ====== ユーティリティ ======
def require_file(path, label):
    if not os.path.exists(path):
        raise FileNotFoundError(f"[{label}] {path} が見つかりません。同じフォルダに置いてください。")

def safe_load_and_resize(path, imsize=IMAGE_SIZE):
    """
    DecompressionBombError を回避しつつ、事前に強制縮小してからテンソル化。
    ・EXIFの回転を補正
    ・thumbnail で長辺 imsize へクリップ
    """
    with Image.open(path) as im:
        im = ImageOps.exif_transpose(im).convert("RGB")
        im.thumbnail((imsize, imsize), Image.LANCZOS)  # ここで強制縮小
        to_tensor = transforms.ToTensor()
        t = to_tensor(im).unsqueeze(0).to(DEVICE, torch.float)
    return t

# VGG 前処理の平均・分散
cnn_normalization_mean = torch.tensor([0.485, 0.456, 0.406]).to(DEVICE)
cnn_normalization_std  = torch.tensor([0.229, 0.224, 0.225]).to(DEVICE)

class Normalization(nn.Module):
    def __init__(self, mean, std):
        super().__init__()
        self.mean = mean.clone().detach().view(-1,1,1)
        self.std  = std.clone().detach().view(-1,1,1)
    def forward(self, img):
        return (img - self.mean) / self.std

def unnormalize_and_save(tensor, path):
    x = tensor.detach().cpu().squeeze(0)
    x = x * cnn_normalization_std.view(3,1,1) + cnn_normalization_mean.view(3,1,1)
    x = torch.clamp(x, 0, 1)
    transforms.ToPILImage()(x).save(path)
    print(f"[SAVE] {path}")

def gram_matrix(x):
    b, c, h, w = x.size()
    F = x.view(b, c, h*w)
    return torch.bmm(F, F.transpose(1,2)) / (c*h*w)

# 使う層
content_layers_default = ['conv_4']
style_layers_default   = ['conv_1','conv_2','conv_3','conv_4','conv_5']

class StyleTransferModel(nn.Module):
    """
    VGG を順に通し、指定層の出力を収集。
    """
    def __init__(self, cnn, mean, std,
                 content_layers=content_layers_default,
                 style_layers=style_layers_default):
        super().__init__()
        self.content_layers = content_layers
        self.style_layers   = style_layers
        self.model = nn.Sequential(Normalization(mean, std))

        i = 0
        for layer in cnn.children():
            if isinstance(layer, nn.Conv2d):
                i += 1; name = f'conv_{i}'
            elif isinstance(layer, nn.ReLU):
                name = f'relu_{i}'; layer = nn.ReLU(inplace=False)
            elif isinstance(layer, nn.MaxPool2d):
                name = f'pool_{i}'
            elif isinstance(layer, nn.BatchNorm2d):
                name = f'bn_{i}'
            else:
                name = f'layer_{i}'
            self.model.add_module(name, layer)
            # conv_5 近辺までで十分
            if i >= 5 and isinstance(layer, nn.ReLU):
                break

    def forward(self, x):
        c_feats, s_feats = {}, {}
        for name, layer in self.model._modules.items():
            x = layer(x)
            if name in self.content_layers: c_feats[name] = x
            if name in self.style_layers:   s_feats[name] = x
        return c_feats, s_feats

def run_style_transfer(cnn, mean, std, content_img, style_img, input_img,
                       steps=NUM_STEPS, cw=CONTENT_WEIGHT, sw=STYLE_WEIGHT):
    model = StyleTransferModel(cnn, mean, std).to(DEVICE).eval()
    mse = nn.MSELoss()

    with torch.no_grad():
        c_ref, _ = model(content_img)
        _, s_ref = model(style_img)
        s_grams = {l: gram_matrix(s_ref[l]) for l in s_ref}

    x = input_img.clone().requires_grad_(True)

    if USE_LBFGS:
        opt = optim.LBFGS([x])
        run = [0]
        pbar = tqdm(total=steps, desc="optim(L-BFGS)")
        def closure():
            opt.zero_grad()
            c_out, s_out = model(x)
            c_loss = sum(mse(c_out[l], c_ref[l]) for l in c_out)
            s_loss = sum(mse(gram_matrix(s_out[l]), s_grams[l]) for l in s_out)
            loss = cw*c_loss + sw*s_loss
            loss.backward()
            run[0] += 1
            # ざっくり進捗（LBFGSは内部で複数回呼ばれるため10刻みで更新）
            if pbar.n < steps:
                pbar.update(min(10, steps - pbar.n))
            return loss
        opt.step(closure)
        pbar.close()
    else:
        opt = optim.Adam([x], lr=LR_ADAM)
        pbar = tqdm(range(steps), desc="optim(Adam)")
        for _ in pbar:
            opt.zero_grad()
            c_out, s_out = model(x)
            c_loss = sum(mse(c_out[l], c_ref[l]) for l in c_out)
            s_loss = sum(mse(gram_matrix(s_out[l]), s_grams[l]) for l in s_out)
            loss = cw*c_loss + sw*s_loss
            loss.backward()
            opt.step()
    return x.detach()

def main():
    require_file(STYLE_PATH,   "style")
    require_file(CONTENT_PATH, "content")

    content_img = safe_load_and_resize(CONTENT_PATH, IMAGE_SIZE)
    style_img   = safe_load_and_resize(STYLE_PATH,   IMAGE_SIZE)

    print("[INFO] VGG19 features をロード")
    vgg = models.vgg19(weights=models.VGG19_Weights.IMAGENET1K_V1).features.to(DEVICE).eval()

    input_img = content_img.clone()
    output = run_style_transfer(vgg, cnn_normalization_mean, cnn_normalization_std,
                                content_img, style_img, input_img,
                                steps=NUM_STEPS, cw=CONTENT_WEIGHT, sw=STYLE_WEIGHT)

    unnormalize_and_save(output, OUTPUT_PATH)
    print("[DONE] 出力:", OUTPUT_PATH)

    Image.open(OUTPUT_PATH).show()

if __name__ == "__main__":
    main()


[INFO] VGG19 features をロード


optim(L-BFGS):  67%|██████▋   | 200/300 [00:21<00:10,  9.09it/s]

[SAVE] output.jpg
[DONE] 出力: output.jpg



