In [65]:
import os
import random
import json
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from google.colab import files

In [66]:
# ================== CONFIG ==================
from google.colab import drive
drive.mount('/content/drive')

# Install pv for progress visualization
!apt-get -qq install unrar pv

# Create target directory
!mkdir -p /content/tid2013

# Extract with progress bar (quiet mode for unrar, pv shows progress)
!unrar x -y -idq "/content/drive/MyDrive/Samsung_Datasets/tid2013.rar" /content/tid2013/ | pv -l >/dev/null

DATASET_PATH = "/content/tid2013"
REF_PATH = os.path.join(DATASET_PATH, "reference_images")
DIST_PATH = os.path.join(DATASET_PATH, "distorted_images")
MOS_FILE = os.path.join(DATASET_PATH, "mos_with_names.txt")
OUTPUT_PATH = "/content/tid2013_patches"
PATCHES_DIR = os.path.join(OUTPUT_PATH, "patches")

NUM_SAMPLES = 1000
PATCH_SIZE = 20
CENTER_SIZE = 12
ALPHA = 0.5

os.makedirs(PATCHES_DIR, exist_ok=True)

!du -sh /content/tid2013

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
0.00  0:00:54 [0.00 /s] [<=>                                                   ]
1.7G	/content/tid2013


In [67]:
# ================== LOAD MOS ==================
def load_mos_with_names(mos_file):
    mos_dict = {}
    with open(mos_file, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) == 2:
                # check if first token is a number or a filename
                if parts[0].lower().endswith('.bmp'):
                    fname, score = parts
                else:
                    score, fname = parts
                mos_dict[fname] = float(score)
    return mos_dict


In [68]:
# ================== HELPERS ==================
def compute_psnr(img1, img2):
    arr1, arr2 = np.array(img1, dtype=np.float32), np.array(img2, dtype=np.float32)
    mse = np.mean((arr1 - arr2) ** 2)
    if mse == 0:
        return float("inf")
    return 10 * np.log10((255 ** 2) / mse)

def extract_patch(img, x, y, size=20):
    return img.crop((x, y, x + size, y + size))

def fuse_patch(clean_patch, dist_patch, mode="blend", alpha=0.5):
    clean_arr = np.array(clean_patch).astype(np.float32)
    dist_arr = np.array(dist_patch).astype(np.float32)
    h, w, _ = clean_arr.shape
    cx, cy = w // 2, h // 2
    half = CENTER_SIZE // 2
    x1, y1 = cx - half, cy - half
    x2, y2 = x1 + CENTER_SIZE, y1 + CENTER_SIZE

    fused = dist_arr.copy()
    if mode == "copy":
        fused[y1:y2, x1:x2] = clean_arr[y1:y2, x1:x2]
    else:  # blend
        fused[y1:y2, x1:x2] = alpha * clean_arr[y1:y2, x1:x2] + (1 - alpha) * dist_arr[y1:y2, x1:x2]

    return Image.fromarray(np.uint8(fused))

In [69]:
# ================== LOAD AND SELECT FILES ==================
mos_dict = load_mos_with_names(MOS_FILE)
all_files = list(mos_dict.keys())

# shuffle for randomness
random.shuffle(all_files)

# pick N distorted images
selected_files = all_files[:NUM_SAMPLES]

print(f"Loaded {len(mos_dict)} MOS entries")
print(f"Selected {len(selected_files)} files for patch extraction")
print("First few examples:", selected_files[:5])


Loaded 3000 MOS entries
Selected 1000 files for patch extraction
First few examples: ['i07_16_3.bmp', 'i12_21_4.bmp', 'i19_05_2.bmp', 'i02_22_5.bmp', 'i25_12_3.bmp']


In [70]:
print("Reference sample:", os.listdir(REF_PATH)[:5])
print("Distorted sample:", os.listdir(DIST_PATH)[:5])

Reference sample: ['I12.BMP', 'I25.BMP', 'I14.BMP', 'I06.BMP', 'I01.BMP']
Distorted sample: ['i14_24_5.bmp', 'i18_10_3.bmp', 'i17_10_5.bmp', 'i07_04_3.bmp', 'i22_01_2.bmp']


**Reference images are Uppercase while Distorted images are Lowercase.**

In [71]:
refs = sorted(os.listdir(REF_PATH))
print("Reference images found:", refs)
print("Count:", len(refs))

Reference images found: ['I01.BMP', 'I02.BMP', 'I03.BMP', 'I04.BMP', 'I05.BMP', 'I06.BMP', 'I07.BMP', 'I08.BMP', 'I09.BMP', 'I10.BMP', 'I11.BMP', 'I12.BMP', 'I13.BMP', 'I14.BMP', 'I15.BMP', 'I16.BMP', 'I17.BMP', 'I18.BMP', 'I19.BMP', 'I20.BMP', 'I21.BMP', 'I22.BMP', 'I23.BMP', 'I24.BMP', 'I25.BMP', 'i25.bmp']
Count: 26


**The last image file 'i25.BMP' is not Uppercase like the rest - rename it to match:**

In [72]:
mv /content/tid2013/reference_images/i25.bmp /content/tid2013/reference_images/I25.BMP

In [73]:
# ================== MAIN LOOP (Samsung format, Dual Patch) ==================
metadata = []
psnr_values = []

DATASET_NAME = "TID2013"

for idx, fname in enumerate(selected_files, start=1):
    # Parse distorted filename
    parts = fname.lower().split("_")
    if len(parts) < 3:
        print(f"Skipping {fname}, invalid filename format")
        continue

    ref_id = parts[0].upper()
    distortion_type = parts[1]
    distortion_level = parts[2].split(".")[0]

    # ----- Reference lookup -----
    clean_file = None
    for ext in [".BMP", ".bmp", ".PNG", ".png"]:
        candidate = os.path.join(REF_PATH, f"{ref_id}{ext}")
        if os.path.exists(candidate):
            clean_file = candidate
            break
    if clean_file is None:
        print(f"Skipping {fname}, reference not found")
        continue

    # ----- Distorted lookup -----
    dist_file = None
    for ext in [".BMP", ".bmp", ".PNG", ".png"]:
        candidate = os.path.join(DIST_PATH, os.path.splitext(fname)[0] + ext)
        if os.path.exists(candidate):
            dist_file = candidate
            break
    if dist_file is None:
        print(f"Skipping {fname}, distorted not found")
        continue

    # ----- Load images -----
    clean_img = Image.open(clean_file).convert("RGB")
    dist_img = Image.open(dist_file).convert("RGB")
    w, h = clean_img.size

    # Random coords
    x = random.randint(0, w - PATCH_SIZE)
    y = random.randint(0, h - PATCH_SIZE)

    clean_patch = extract_patch(clean_img, x, y, PATCH_SIZE)
    dist_patch = extract_patch(dist_img, x, y, PATCH_SIZE)

    # ----- Generate ref + new patches -----
    for fusion_mode in ["copy", "blend"]:
        method_label = "ref" if fusion_mode == "copy" else "new"

        fused_patch = fuse_patch(clean_patch, dist_patch, mode=fusion_mode, alpha=ALPHA)
        psnr = compute_psnr(clean_patch, fused_patch)
        psnr_values.append(psnr)

        out_name = f"{method_label}_patch_{idx:04}.png"
        fused_patch.save(os.path.join(PATCHES_DIR, out_name))

        # Get MOS
        mos_score = mos_dict.get(fname)
        if mos_score is None:
            continue
        normalized_score = round(mos_score / 9, 3)

        # Assign Samsung-style IDs
        clean_id = 6000000 + idx
        dist_id = 7000000 + idx
        bp_map = dist_id + 1

        # Bin MOS → color and score
        if normalized_score >= 0.66:
            color = "GREEN"
            binned_score = 1
        elif normalized_score >= 0.33:
            color = "ORANGE"
            binned_score = 0.5
        else:
            color = "RED"
            binned_score = 0

        # Build metadata entry
        metadata.append({
            "unique_sample_id": f"{method_label}_{DATASET_NAME}_case{distortion_type}_{y}_{x}",
            "clean_image": clean_id,
            "distorted_image": dist_id,
            "score": binned_score,
            "metadata": {
                "crop_id": dist_id,
                "image_source": DATASET_NAME,
                "bp_case": f"case{distortion_type}",
                "method": method_label,
                "color": color,
                "score": binned_score,
                "roi": [x, y, PATCH_SIZE, PATCH_SIZE],
                "bp_map": bp_map,
                "IQE_workspace_number": 990,
                "mos_original": mos_score,
                "mos_normalized": normalized_score
            }
        })


In [74]:
# ================== SAVE RESULTS ==================
with open(os.path.join(OUTPUT_PATH, "metadata.json"), "w") as f:
    json.dump(metadata, f, indent=2)

plt.hist([p for p in psnr_values if p != float("inf")], bins=30)
plt.xlabel("PSNR (dB)")
plt.ylabel("Frequency")
plt.title("Histogram of PSNR values")
plt.savefig(os.path.join(OUTPUT_PATH, "psnr_histogram.png"))
plt.close()

In [75]:
!zip -qr tid2013_patches.zip tid2013_patches
files.download("tid2013_patches.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>