In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
from PIL import Image
import os


input_dir = '/content/drive/MyDrive/Fabric2Garment_Evaluation/real_images'
output_dir = '/content/drive/MyDrive/Fabric2Garment_Evaluation/real_images_preprocessed'

os.makedirs(output_dir, exist_ok=True)

# output size
target_size = (512, 512)

# process each image
for filename in sorted(os.listdir(input_dir)):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        img_path = os.path.join(input_dir, filename)
        img = Image.open(img_path).convert("RGB")

        # resize with LANCZOS to preserve texture
        resized = img.resize(target_size, Image.LANCZOS)

        save_path = os.path.join(output_dir, os.path.splitext(filename)[0] + ".png")
        resized.save(save_path, format="PNG", quality=100)

print("Preprocessing complete. Images saved to:", output_dir)


✅ Preprocessing complete. Images saved to: /content/drive/MyDrive/Fabric2Garment_Evaluation/real_images_preprocessed


In [None]:
# Install essential libraries
!pip install pytorch-fid lpips transformers ftfy
!pip install git+https://github.com/openai/CLIP.git


Collecting pytorch-fid
  Downloading pytorch_fid-0.3.0-py3-none-any.whl.metadata (5.3 kB)
Collecting lpips
  Downloading lpips-0.1.4-py3-none-any.whl.metadata (10 kB)
Collecting ftfy
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.0.1->pytorch-fid)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.0.1->pytorch-fid)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.0.1->pytorch-fid)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.0.1->pytorch-fid)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.0.1->pyt

In [None]:
!pip install torchmetrics[image]


Collecting torchmetrics[image]
  Downloading torchmetrics-1.7.0-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics[image])
  Downloading lightning_utilities-0.14.2-py3-none-any.whl.metadata (5.6 kB)
Collecting torch-fidelity<=0.4.0 (from torchmetrics[image])
  Downloading torch_fidelity-0.3.0-py3-none-any.whl.metadata (2.0 kB)
Downloading lightning_utilities-0.14.2-py3-none-any.whl (28 kB)
Downloading torch_fidelity-0.3.0-py3-none-any.whl (37 kB)
Downloading torchmetrics-1.7.0-py3-none-any.whl (960 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m960.9/960.9 kB[0m [31m42.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lightning-utilities, torchmetrics, torch-fidelity
Successfully installed lightning-utilities-0.14.2 torch-fidelity-0.3.0 torchmetrics-1.7.0


In [None]:
!pip install torch-fidelity




# **MODEL 1 - DreamShaper**

# FID & KID

In [None]:
import os
import numpy as np
from PIL import Image
import torchvision.transforms as transforms
import torch
import lpips
from torchvision.models import inception_v3
from torchmetrics.image.fid import FrechetInceptionDistance
from torchmetrics.image.kid import KernelInceptionDistance
import clip


In [None]:
def load_images(path, size=(256, 256)):
    imgs = []
    for img_name in sorted(os.listdir(path)):
        img_path = os.path.join(path, img_name)
        img = Image.open(img_path).convert('RGB').resize(size)
        imgs.append(transforms.ToTensor()(img))
    return torch.stack(imgs)


In [None]:
!pip install torch-fidelity




In [None]:
real = load_images('/content/drive/MyDrive/Fabric2Garment_Evaluation/real_images')
gen = load_images('/content/drive/MyDrive/Fabric2Garment_Evaluation/generated_dreamshaper')

fid = FrechetInceptionDistance(normalize=True)
fid.update(real, real=True)
fid.update(gen, real=False)
print("FID:", fid.compute().item())

# Convert to uint8
real_uint8 = (real * 255).clamp(0, 255).byte()
gen_uint8 = (gen * 255).clamp(0, 255).byte()

# compute KID
kid = KernelInceptionDistance(subset_size=10)
kid.update(real_uint8, real=True)
kid.update(gen_uint8, real=False)
print("KID:", kid.compute()[0].item())


Downloading: "https://github.com/toshas/torch-fidelity/releases/download/v0.2.0/weights-inception-2015-12-05-6726825d.pth" to /root/.cache/torch/hub/checkpoints/weights-inception-2015-12-05-6726825d.pth
100%|██████████| 91.2M/91.2M [00:00<00:00, 243MB/s]


FID: 411.885986328125




KID: 0.1816464364528656


# SSIM (Structural Similarity Index)

In [None]:
!pip install scikit-image




In [None]:
from skimage.metrics import structural_similarity as ssim
import cv2
import numpy as np
import os

def compute_ssim(real_folder, gen_folder):
    scores = []

    for filename in os.listdir(real_folder):
        real_img = cv2.imread(os.path.join(real_folder, filename))
        gen_img = cv2.imread(os.path.join(gen_folder, filename))

        if real_img is None or gen_img is None:
            continue

        real_img = cv2.resize(real_img, (256, 256))
        gen_img = cv2.resize(gen_img, (256, 256))

        score = ssim(real_img, gen_img, channel_axis=2)
        scores.append(score)

    return sum(scores) / len(scores)

# Example usage:
real_dir = '/content/drive/MyDrive/Fabric2Garment_Evaluation/real_images'
gen_dir = '/content/drive/MyDrive/Fabric2Garment_Evaluation/generated_dreamshaper'

ssim_score = compute_ssim(real_dir, gen_dir)
print("SSIM:", ssim_score)


SSIM: 0.39331531359851074


# LPIPS (Learned Perceptual Image Patch Similarity)

In [None]:
!pip install lpips




In [None]:
import lpips
import torch
from PIL import Image
from torchvision import transforms

loss_fn = lpips.LPIPS(net='alex')

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

def compute_lpips(real_folder, gen_folder):
    scores = []
    for filename in os.listdir(real_folder):
        real_path = os.path.join(real_folder, filename)
        gen_path = os.path.join(gen_folder, filename)

        if not os.path.exists(real_path) or not os.path.exists(gen_path):
            continue

        real_img = transform(Image.open(real_path).convert("RGB")).unsqueeze(0)
        gen_img = transform(Image.open(gen_path).convert("RGB")).unsqueeze(0)

        score = loss_fn(real_img, gen_img).item()
        scores.append(score)

    return sum(scores) / len(scores)

lpips_score = compute_lpips(real_dir, gen_dir)
print("LPIPS:", lpips_score)


Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]


Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:02<00:00, 106MB/s] 


Loading model from: /usr/local/lib/python3.11/dist-packages/lpips/weights/v0.1/alex.pth
LPIPS: 0.6075943410396576


# CLIP Score (CLIP-S)

In [None]:
!pip install ftfy regex tqdm
!pip install git+https://github.com/openai/CLIP.git


Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-9s9_7n7x
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-9s9_7n7x
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [None]:
import clip
from PIL import Image

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

def compute_clip_score(gen_folder, prompts_dict):
    scores = []

    for filename in os.listdir(gen_folder):
        image_path = os.path.join(gen_folder, filename)
        prompt = prompts_dict.get(filename, None)
        if prompt is None:
            continue

        image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
        text = clip.tokenize([prompt]).to(device)

        with torch.no_grad():
            image_features = model.encode_image(image)
            text_features = model.encode_text(text)

            image_features /= image_features.norm(dim=-1, keepdim=True)
            text_features /= text_features.norm(dim=-1, keepdim=True)

            similarity = (image_features @ text_features.T).item()
            scores.append(similarity)

    return sum(scores) / len(scores)

# prompts dictionary (filename: prompt)
prompts = {
    "fabric_01.png": "A checkered shirt made from this fabric",
    "fabric_02.png": "A denim skirt made from this fabric",
    "fabric_03.png": "A chino pant made from this fabric",
    "fabric_04.png": "A dress shirt made from this fabric",
    "fabric_05.png": "A maxi skirt made from this fabric",
    "fabric_06.png": "A hoodie made from this fabric",
    "fabric_07.png": "A floral dress made from this fabric",
    "fabric_08.png": "A puffer jacket made from this fabric",
    "fabric_09.png": "A sleeveless dress with buttons made from this fabric",
    "fabric_10.png": "A polo shirt made from this fabric",
}

clip_s = compute_clip_score(gen_dir, prompts)
print("CLIP-S:", clip_s)


100%|████████████████████████████████████████| 338M/338M [00:02<00:00, 126MiB/s]


CLIP-S: 0.26879929900169375


In [None]:
import os

print(os.listdir(gen_dir))


['fabric_01.png', 'fabric_02.png', 'fabric_03.png', 'fabric_04.png', 'fabric_05.png', 'fabric_06.png', 'fabric_07.png', 'fabric_08.png', 'fabric_09.png', 'fabric_10.png']


# **MODEL 2 - RealisticVision**


# FID & KID

In [None]:
import os
import numpy as np
from PIL import Image
import torchvision.transforms as transforms
import torch
import lpips
from torchvision.models import inception_v3
from torchmetrics.image.fid import FrechetInceptionDistance
from torchmetrics.image.kid import KernelInceptionDistance
import clip
from skimage.metrics import structural_similarity as ssim


device = "cuda" if torch.cuda.is_available() else "cpu"

# we load images with consistent transform
def load_images(path, size=(256, 256)):
    imgs = []
    for img_name in sorted(os.listdir(path)):
        img_path = os.path.join(path, img_name)
        img = Image.open(img_path).convert('RGB').resize(size)
        imgs.append(transforms.ToTensor()(img))
    return torch.stack(imgs)

# load image folders
real = load_images('/content/drive/MyDrive/Fabric2Garment_Evaluation/real_images')
rv = load_images('/content/drive/MyDrive/Fabric2Garment_Evaluation/generated_realisticvision')

# FID
fid = FrechetInceptionDistance(normalize=True).to(device)
fid.update(real.to(device), real=True)
fid.update(rv.to(device), real=False)
print("FID (Realistic Vision):", fid.compute().item())

# KID
real_uint8 = (real * 255).clamp(0, 255).byte()
rv_uint8 = (rv * 255).clamp(0, 255).byte()

kid = KernelInceptionDistance(subset_size=10).to(device)
kid.update(real_uint8.to(device), real=True)
kid.update(rv_uint8.to(device), real=False)
print("KID (Realistic Vision):", kid.compute()[0].item())


FID (Realistic Vision): 362.1177673339844




KID (Realistic Vision): 0.089350625872612


# LPIPS

In [None]:
# LPIPS
lpips_model = lpips.LPIPS(net='alex').to(device)
rv_images = [Image.open(os.path.join('/content/drive/MyDrive/Fabric2Garment_Evaluation/generated_realisticvision', f)).convert('RGB') for f in sorted(os.listdir('/content/drive/MyDrive/Fabric2Garment_Evaluation/generated_realisticvision'))]
real_images = [Image.open(os.path.join('/content/drive/MyDrive/Fabric2Garment_Evaluation/real_images', f)).convert('RGB') for f in sorted(os.listdir('/content/drive/MyDrive/Fabric2Garment_Evaluation/real_images'))]

total_lpips = 0
for r_img, g_img in zip(real_images, rv_images):
    transform_lpips = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    r_tensor = transform_lpips(r_img).unsqueeze(0).to(device)
    g_tensor = transform_lpips(g_img).unsqueeze(0).to(device)
    dist = lpips_model(r_tensor, g_tensor)
    total_lpips += dist.item()

print("LPIPS (Realistic Vision):", total_lpips / len(real_images))

Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /usr/local/lib/python3.11/dist-packages/lpips/weights/v0.1/alex.pth
LPIPS (Realistic Vision): 0.6628468275070191


# SSIM

In [None]:
# SSIM
ssim_total = 0
for r_img, g_img in zip(real_images, rv_images):
    r_np = np.array(r_img.resize((256, 256)))
    g_np = np.array(g_img.resize((256, 256)))
    s = ssim(r_np, g_np, channel_axis=-1)

    ssim_total += s

print("SSIM (Realistic Vision):", ssim_total / len(real_images))

SSIM (Realistic Vision): 0.4459733678809178


# CLIP-S

In [None]:
import clip
from PIL import Image

# we set generation directory for Model 2
gen_dir = '/content/drive/MyDrive/Fabric2Garment_Evaluation/generated_realisticvision'

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

def compute_clip_score(gen_folder, prompts_dict):
    scores = []

    for filename in os.listdir(gen_folder):
        image_path = os.path.join(gen_folder, filename)
        prompt = prompts_dict.get(filename, None)
        if prompt is None:
            continue

        image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
        text = clip.tokenize([prompt]).to(device)

        with torch.no_grad():
            image_features = model.encode_image(image)
            text_features = model.encode_text(text)

            image_features /= image_features.norm(dim=-1, keepdim=True)
            text_features /= text_features.norm(dim=-1, keepdim=True)

            similarity = (image_features @ text_features.T).item()
            scores.append(similarity)

    return sum(scores) / len(scores)

prompts = {
    "fabric_01.png": "A checkered shirt made from this fabric",
    "fabric_02.png": "A denim skirt made from this fabric",
    "fabric_03.png": "A chino pant made from this fabric",
    "fabric_04.png": "A dress shirt made from this fabric",
    "fabric_05.png": "A maxi skirt made from this fabric",
    "fabric_06.png": "A hoodie made from this fabric",
    "fabric_07.png": "A floral dress made from this fabric",
    "fabric_08.png": "A puffer jacket made from this fabric",
    "fabric_09.png": "A sleeveless dress with buttons made from this fabric",
    "fabric_10.png": "A polo shirt made from this fabric",
}

clip_s = compute_clip_score(gen_dir, prompts)
print("CLIP-S (Realistic Vision):", clip_s)


CLIP-S (Realistic Vision): 0.2828796565532684


# **MODEL 3 - MajicMix v5**


# FID & KID

In [None]:
def load_images(path, size=(256, 256)):
    imgs = []
    for img_name in sorted(os.listdir(path)):
        img_path = os.path.join(path, img_name)
        img = Image.open(img_path).convert('RGB').resize(size)
        imgs.append(transforms.ToTensor()(img))
    return torch.stack(imgs)

real_images = load_images('/content/drive/MyDrive/Fabric2Garment_Evaluation/real_images')
gen_majic = load_images('/content/drive/MyDrive/Fabric2Garment_Evaluation/generated_majicmix')


In [None]:
from torchmetrics.image.fid import FrechetInceptionDistance
from torchmetrics.image.kid import KernelInceptionDistance

# Convert to uint8
real_uint8 = (real_images * 255).clamp(0, 255).byte()
gen_uint8 = (gen_majic * 255).clamp(0, 255).byte()

fid = FrechetInceptionDistance(normalize=True).to(device)
fid.update(real_images.to(device), real=True)
fid.update(gen_majic.to(device), real=False)
print("FID (MajicMix):", fid.compute().item())

kid = KernelInceptionDistance(subset_size=10).to(device)
kid.update(real_uint8.to(device), real=True)
kid.update(gen_uint8.to(device), real=False)
print("KID (MajicMix):", kid.compute()[0].item())


FID (MajicMix): 387.6136169433594
KID (MajicMix): 0.17328110337257385


# SSIM & LPIPS

In [None]:
from skimage.metrics import structural_similarity as ssim
import lpips

ssim_total = 0
lpips_fn = lpips.LPIPS(net='alex').to(device)
lpips_total = 0

for i in range(len(real_images)):
    r_img = transforms.ToPILImage()(real_images[i])
    g_img = transforms.ToPILImage()(gen_majic[i])

    # SSIM
    r_np = np.array(r_img.resize((256, 256)))
    g_np = np.array(g_img.resize((256, 256)))
    s = ssim(r_np, g_np, channel_axis=-1)
    ssim_total += s

    # LPIPS
    r_tensor = transforms.Resize((256, 256))(real_images[i].unsqueeze(0)).to(device)
    g_tensor = transforms.Resize((256, 256))(gen_majic[i].unsqueeze(0)).to(device)
    d = lpips_fn(r_tensor, g_tensor).item()
    lpips_total += d

print("SSIM (MajicMix):", ssim_total / len(real_images))
print("LPIPS (MajicMix):", lpips_total / len(real_images))


Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /usr/local/lib/python3.11/dist-packages/lpips/weights/v0.1/alex.pth
SSIM (MajicMix): 0.33650602704451316
LPIPS (MajicMix): 0.6432096660137177


# CLIP-S

In [None]:
gen_dir = '/content/drive/MyDrive/Fabric2Garment_Evaluation/generated_majicmix'

prompts = {
    "fabric_01.png": "A checkered shirt made from this fabric",
    "fabric_02.png": "A denim skirt made from this fabric",
    "fabric_03.png": "A chino pant made from this fabric",
    "fabric_04.png": "A dress shirt made from this fabric",
    "fabric_05.png": "A maxi skirt made from this fabric",
    "fabric_06.png": "A hoodie made from this fabric",
    "fabric_07.png": "A floral dress made from this fabric",
    "fabric_08.png": "A puffer jacket made from this fabric",
    "fabric_09.png": "A sleeveless dress with buttons made from this fabric",
    "fabric_10.png": "A polo shirt made from this fabric",
}

def compute_clip_score(gen_folder, prompts_dict):
    scores = []
    for filename in sorted(os.listdir(gen_folder)):
        image_path = os.path.join(gen_folder, filename)
        prompt = prompts_dict.get(filename)
        if not prompt:
            continue

        image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
        text = clip.tokenize([prompt]).to(device)

        with torch.no_grad():
            image_features = model.encode_image(image)
            text_features = model.encode_text(text)

            image_features /= image_features.norm(dim=-1, keepdim=True)
            text_features /= text_features.norm(dim=-1, keepdim=True)

            similarity = (image_features @ text_features.T).item()
            scores.append(similarity)

    return sum(scores) / len(scores)

clip_s = compute_clip_score(gen_dir, prompts)
print("CLIP-S (MajicMix):", clip_s)


CLIP-S (MajicMix): 0.2661693707108498
