In [16]:
import os
import random
import json
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from google.colab import files

In [17]:
# ================== CONFIG ==================
from google.colab import drive
drive.mount('/content/drive')

!apt-get install unrar -y
!unrar x "/content/drive/MyDrive/Samsung_Datasets/tid2013.rar" /content/tid2013/

DATASET_PATH = "/content/tid2013"
REF_PATH = os.path.join(DATASET_PATH, "reference_images")
DIST_PATH = os.path.join(DATASET_PATH, "distorted_images")
MOS_FILE = os.path.join(DATASET_PATH, "mos_with_names.txt")
OUTPUT_PATH = "/content/tid2013_patches"

NUM_SAMPLES = 1000
PATCH_SIZE = 20
CENTER_SIZE = 12
ALPHA = 0.5
PATCHES_DIR = os.path.join(OUTPUT_PATH, "patches")

os.makedirs(PATCHES_DIR, exist_ok=True)

!du -sh /content/tid2013

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
unrar is already the newest version (1:6.1.5-1ubuntu0.1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.

UNRAR 6.11 beta 1 freeware      Copyright (c) 1993-2022 Alexander Roshal


Extracting from /content/drive/MyDrive/Samsung_Datasets/tid2013.rar


Would you like to replace the existing file /content/tid2013/distorted_images/I01_01_1.bmp
589878 bytes, modified on 2012-11-19 17:00
with a new one
589878 bytes, modified on 2012-11-19 17:00

[Y]es, [N]o, [A]ll, n[E]ver, [R]ename, [Q]uit 
User break

User break
1.7G	/content/tid2013


In [18]:
# ================== LOAD MOS ==================
def load_mos_with_names(mos_file):
    mos_dict = {}
    with open(mos_file, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) == 2:
                # check if first token is a number or a filename
                if parts[0].lower().endswith('.bmp'):
                    fname, score = parts
                else:
                    score, fname = parts
                mos_dict[fname] = float(score)
    return mos_dict


In [19]:
# ================== HELPERS ==================
def compute_psnr(img1, img2):
    arr1, arr2 = np.array(img1, dtype=np.float32), np.array(img2, dtype=np.float32)
    mse = np.mean((arr1 - arr2) ** 2)
    if mse == 0:
        return float("inf")
    return 10 * np.log10((255 ** 2) / mse)

def extract_patch(img, x, y, size=20):
    return img.crop((x, y, x + size, y + size))

def fuse_patch(clean_patch, dist_patch, mode="blend", alpha=0.5):
    clean_arr = np.array(clean_patch).astype(np.float32)
    dist_arr = np.array(dist_patch).astype(np.float32)
    h, w, _ = clean_arr.shape
    cx, cy = w // 2, h // 2
    half = CENTER_SIZE // 2
    x1, y1 = cx - half, cy - half
    x2, y2 = x1 + CENTER_SIZE, y1 + CENTER_SIZE

    fused = dist_arr.copy()
    if mode == "copy":
        fused[y1:y2, x1:x2] = clean_arr[y1:y2, x1:x2]
    else:  # blend
        fused[y1:y2, x1:x2] = alpha * clean_arr[y1:y2, x1:x2] + (1 - alpha) * dist_arr[y1:y2, x1:x2]

    return Image.fromarray(np.uint8(fused))

In [20]:
# ================== LOAD AND SELECT FILES ==================
mos_dict = load_mos_with_names(MOS_FILE)
all_files = list(mos_dict.keys())

# shuffle for randomness
random.shuffle(all_files)

# pick N distorted images
selected_files = all_files[:NUM_SAMPLES]

print(f"Loaded {len(mos_dict)} MOS entries")
print(f"Selected {len(selected_files)} files for patch extraction")
print("First few examples:", selected_files[:5])


Loaded 3000 MOS entries
Selected 1000 files for patch extraction
First few examples: ['i16_18_3.bmp', 'i10_24_5.bmp', 'i06_03_4.bmp', 'i01_23_2.bmp', 'i19_14_2.bmp']


In [21]:
import os
print("Reference sample:", os.listdir(REF_PATH)[:5])
print("Distorted sample:", os.listdir(DIST_PATH)[:5])

Reference sample: ['I12.BMP', 'I14.BMP', 'I06.BMP', 'I01.BMP', 'I21.BMP']
Distorted sample: ['i14_24_5.bmp', 'i18_10_3.bmp', 'i17_10_5.bmp', 'i07_04_3.bmp', 'i22_01_2.bmp']


**Reference images are Uppercase while Distorted images are Lowercase.**

In [27]:
import os
refs = sorted(os.listdir(REF_PATH))
print("Reference images found:", refs)
print("Count:", len(refs))

Reference images found: ['I01.BMP', 'I02.BMP', 'I03.BMP', 'I04.BMP', 'I05.BMP', 'I06.BMP', 'I07.BMP', 'I08.BMP', 'I09.BMP', 'I10.BMP', 'I11.BMP', 'I12.BMP', 'I13.BMP', 'I14.BMP', 'I15.BMP', 'I16.BMP', 'I17.BMP', 'I18.BMP', 'I19.BMP', 'I20.BMP', 'I21.BMP', 'I22.BMP', 'I23.BMP', 'I24.BMP', 'i25.bmp']
Count: 25


**The last image file 'i25.BMP' is not Uppercase like the rest - rename it to match:**

In [28]:
mv /content/tid2013/reference_images/i25.bmp /content/tid2013/reference_images/I25.BMP

In [29]:
# ================== MAIN LOOP ==================
metadata = []
psnr_values = []

for idx, fname in enumerate(selected_files, start=1):
    ref_id = int(fname[1:3])  # from distorted filename, e.g. i07_04_3.bmp → 07

    # Reference image: always uppercase
    clean_file = os.path.join(REF_PATH, f"I{ref_id:02}.BMP")

    # Distorted image: check case variations
    dist_file = os.path.join(DIST_PATH, fname)
    if not os.path.exists(dist_file):
        alt_fname = fname[0].upper() + fname[1:]  # change first letter i->I
        dist_file = os.path.join(DIST_PATH, alt_fname)
        if not os.path.exists(dist_file):
            print(f"Skipping {fname}, missing files: {clean_file} or {dist_file}")
            continue

    if not os.path.exists(clean_file):
        print(f"Skipping {fname}, missing reference: {clean_file}")
        continue

    clean_img = Image.open(clean_file).convert("RGB")
    dist_img = Image.open(dist_file).convert("RGB")
    w, h = clean_img.size

    # Random coords for patch
    x = random.randint(0, w - PATCH_SIZE)
    y = random.randint(0, h - PATCH_SIZE)

    clean_patch = extract_patch(clean_img, x, y, PATCH_SIZE)
    dist_patch = extract_patch(dist_img, x, y, PATCH_SIZE)
    fused_patch = fuse_patch(clean_patch, dist_patch, mode="blend", alpha=ALPHA)

    psnr = compute_psnr(clean_patch, fused_patch)
    psnr_values.append(psnr)

    out_name = f"patch_{idx:04}.png"
    fused_patch.save(os.path.join(PATCHES_DIR, out_name))

    mos_score = mos_dict[fname]
    normalized_score = round(mos_score / 9, 3)  # scale to [0,1]

    metadata.append({
        "unique_sample_id": f"patch_{idx:04}",
        "clean_image": os.path.basename(clean_file).replace(".BMP", ".png"),
        "distorted_image": os.path.basename(dist_file).replace(".BMP", ".png"),
        "score": normalized_score,
        "metadata": {
            "region": [x, y, PATCH_SIZE, PATCH_SIZE],
            "method": f"blend_alpha_{ALPHA}",
            "src_image": fname,
            "mos": mos_score
        }
    })


In [30]:
# ================== SAVE RESULTS ==================
with open(os.path.join(OUTPUT_PATH, "metadata.json"), "w") as f:
    json.dump(metadata, f, indent=2)

plt.hist([p for p in psnr_values if p != float("inf")], bins=30)
plt.xlabel("PSNR (dB)")
plt.ylabel("Frequency")
plt.title("Histogram of PSNR values")
plt.savefig(os.path.join(OUTPUT_PATH, "psnr_histogram.png"))
plt.close()

In [31]:
from google.colab import files
!zip -r tid2013_patches.zip tid2013_patches
files.download("tid2013_patches.zip")

updating: tid2013_patches/ (stored 0%)
updating: tid2013_patches/psnr_histogram.png (deflated 20%)
updating: tid2013_patches/metadata.json (deflated 92%)
updating: tid2013_patches/patches/ (stored 0%)
updating: tid2013_patches/patches/patch_0677.png (stored 0%)
updating: tid2013_patches/patches/patch_0030.png (stored 0%)
updating: tid2013_patches/patches/patch_0939.png (stored 0%)
updating: tid2013_patches/patches/patch_0067.png (stored 0%)
updating: tid2013_patches/patches/patch_0751.png (stored 0%)
updating: tid2013_patches/patches/patch_0499.png (stored 0%)
updating: tid2013_patches/patches/patch_0591.png (stored 0%)
updating: tid2013_patches/patches/patch_0781.png (stored 0%)
updating: tid2013_patches/patches/patch_0183.png (stored 0%)
updating: tid2013_patches/patches/patch_0473.png (stored 0%)
updating: tid2013_patches/patches/patch_0821.png (stored 0%)
updating: tid2013_patches/patches/patch_0495.png (stored 0%)
updating: tid2013_patches/patches/patch_0078.png (stored 0%)
updati

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>