In [9]:
import os, random, json
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from google.colab import drive
from google.colab import files

In [2]:
# ================== CONFIG ==================
from google.colab import drive
drive.mount('/content/drive')

# Clean destination
!rm -rf /content/jianjin008
!mkdir -p /content/jianjin008

# Quiet unzip into a temp folder
!unzip -q "/content/drive/MyDrive/Samsung_Datasets/jianjin008.zip" -d /content/jianjin008_raw

# Move all contents of the icip folder up one level
!rsync -a /content/jianjin008_raw/icip/ /content/jianjin008/

# Check that the structure is now flattened
!ls -R /content/jianjin008 | head -20

Mounted at /content/drive
/content/jianjin008:
jnd_img
mos.txt
paper.pdf
README.txt
ref_img

/content/jianjin008/jnd_img:
I01_02_01.png
I01_06_01.png
I01_08_01.png
I01_09_01.png
I01_10_01.png
I01_16_01.png
I01_16_02.png
I01_17_01.png
I01_17_02.png
I01_19_01.png
I01_19_02.png
I01_20_01.png


In [3]:
# ========== CONFIG ==========
DATASET_PATH = "/content/jianjin008"
REF_PATH = os.path.join(DATASET_PATH, "ref_img")
DIST_PATH = os.path.join(DATASET_PATH, "jnd_img")
MOS_FILE = os.path.join(DATASET_PATH, "mos.txt")
OUTPUT_PATH = "/content/jianjin008_patches"
PATCHES_DIR = os.path.join(OUTPUT_PATH, "patches")

NUM_SAMPLES = 1000
PATCH_SIZE = 20
CENTER_SIZE = 12
ALPHA = 0.5  # blending factor
FUSION_METHOD = "blend" # or copy

os.makedirs(PATCHES_DIR, exist_ok=True)

In [4]:
# ================== LOAD MOS ==================
def load_mos(mos_file):
    mos_dict = {}
    with open(mos_file, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) == 2:
                fname, score = parts
                base_name = os.path.splitext(fname)[0]
                mos_dict[base_name] = float(score)
    return mos_dict

mos_dict = load_mos(MOS_FILE)
all_files = list(mos_dict.keys())
random.shuffle(all_files)
selected_files = all_files[:NUM_SAMPLES]

In [5]:
# ================== HELPERS ==================
def compute_psnr(img1, img2):
    arr1, arr2 = np.array(img1, dtype=np.float32), np.array(img2, dtype=np.float32)
    mse = np.mean((arr1 - arr2) ** 2)
    if mse == 0:
        return float("inf")
    return 10 * np.log10((255 ** 2) / mse)

def extract_patch(img, x, y, size=20):
    return img.crop((x, y, x + size, y + size))

def fuse_patch(clean_patch, dist_patch, mode="blend", alpha=0.5):
    clean_arr = np.array(clean_patch).astype(np.float32)
    dist_arr = np.array(dist_patch).astype(np.float32)
    h, w, _ = clean_arr.shape
    cx, cy = w // 2, h // 2
    half = CENTER_SIZE // 2
    x1, y1 = cx - half, cy - half
    x2, y2 = x1 + CENTER_SIZE, y1 + CENTER_SIZE

    fused = dist_arr.copy()
    if mode == "copy":
        fused[y1:y2, x1:x2] = clean_arr[y1:y2, x1:x2]
    else:  # blend
        fused[y1:y2, x1:x2] = alpha * clean_arr[y1:y2, x1:x2] + (1 - alpha) * dist_arr[y1:y2, x1:x2]

    return Image.fromarray(np.uint8(fused))

In [6]:
# ================== MAIN LOOP (Samsung format, Dual Patch) ==================
metadata = []
psnr_values = []

DATASET_NAME = "Jianjin008"

for idx, fname in enumerate(selected_files, start=1):
    # Parse distorted filename
    parts = fname.lower().split("_")
    if len(parts) < 3:
        print(f"Skipping {fname}, invalid filename format")
        continue

    ref_id = parts[0].upper()
    distortion_type = parts[1]
    distortion_level = parts[2].split(".")[0]

    # ----- Reference lookup -----
    clean_file = None
    for ext in [".BMP", ".bmp", ".PNG", ".png"]:
        candidate = os.path.join(REF_PATH, f"{ref_id}{ext}")
        if os.path.exists(candidate):
            clean_file = candidate
            break
    if clean_file is None:
        print(f"Skipping {fname}, reference not found")
        continue

    # ----- Distorted lookup -----
    dist_file = None
    for ext in [".BMP", ".bmp", ".PNG", ".png"]:
        candidate = os.path.join(DIST_PATH, os.path.splitext(fname)[0] + ext)
        if os.path.exists(candidate):
            dist_file = candidate
            break
    if dist_file is None:
        print(f"Skipping {fname}, distorted not found")
        continue

    # ----- Load images -----
    clean_img = Image.open(clean_file).convert("RGB")
    dist_img = Image.open(dist_file).convert("RGB")
    w, h = clean_img.size

    # Random coords
    x = random.randint(0, w - PATCH_SIZE)
    y = random.randint(0, h - PATCH_SIZE)

    clean_patch = extract_patch(clean_img, x, y, PATCH_SIZE)
    dist_patch = extract_patch(dist_img, x, y, PATCH_SIZE)

    # ----- Generate ref + new patches -----
    for fusion_mode in ["copy", "blend"]:
        method_label = "ref" if fusion_mode == "copy" else "new"

        fused_patch = fuse_patch(clean_patch, dist_patch, mode=fusion_mode, alpha=ALPHA)
        psnr = compute_psnr(clean_patch, fused_patch)
        psnr_values.append(psnr)

        out_name = f"{method_label}_patch_{idx:04}.png"
        fused_patch.save(os.path.join(PATCHES_DIR, out_name))

        # Get MOS
        mos_score = mos_dict.get(fname)
        if mos_score is None:
            continue
        normalized_score = round(mos_score / 9, 3)

        # Assign Samsung-style IDs
        clean_id = 6000000 + idx
        dist_id = 7000000 + idx
        bp_map = dist_id + 1

        # Bin MOS → color and score
        if normalized_score >= 0.66:
            color = "GREEN"
            binned_score = 1
        elif normalized_score >= 0.33:
            color = "ORANGE"
            binned_score = 0.5
        else:
            color = "RED"
            binned_score = 0

        # Build metadata entry
        metadata.append({
            "unique_sample_id": f"{method_label}_{DATASET_NAME}_case{distortion_type}_{y}_{x}",
            "clean_image": clean_id,
            "distorted_image": dist_id,
            "score": binned_score,
            "metadata": {
                "crop_id": dist_id,
                "image_source": DATASET_NAME,
                "bp_case": f"case{distortion_type}",
                "method": method_label,
                "color": color,
                "score": binned_score,
                "roi": [x, y, PATCH_SIZE, PATCH_SIZE],
                "bp_map": bp_map,
                "IQE_workspace_number": 990,
                "mos_original": mos_score,
                "mos_normalized": normalized_score
            }
        })


Skipping I33_17_02, distorted not found


That's because there's no such file in the dataset, although 'mos.txt' has it recorded.

In [7]:
# ================== SAVE RESULTS ==================
os.makedirs(OUTPUT_PATH, exist_ok=True)

with open(os.path.join(OUTPUT_PATH, "metadata.json"), "w") as f:
    json.dump(metadata, f, indent=2)

plt.hist([p for p in psnr_values if p != float("inf")], bins=30)
plt.xlabel("PSNR (dB)")
plt.ylabel("Frequency")
plt.title("Histogram of PSNR values")
plt.savefig(os.path.join(OUTPUT_PATH, "psnr_histogram.png"))
plt.close()

In [10]:
!zip -qr jianjin008_patches.zip jianjin008_patches
files.download("jianjin008_patches.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>