In [18]:
# !pip install datasets torchvision pandas scikit-image

import torch
import torch.nn.functional as F
from torchvision import transforms
from datasets import load_dataset

import random
import numpy as np
import pandas as pd
from skimage.metrics import structural_similarity as ssim
from tqdm.auto import tqdm

# ===========================
# 1. Dataset + transform nhẹ
# ===========================

image_size = 128

to_tensor = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),   # [0,1]
])

hf_train = load_dataset("Falah/Alzheimer_MRI", split="train")
print("Số ảnh train:", len(hf_train))

def get_image_tensor(idx):
    pil_img = hf_train[idx]["image"]
    x = to_tensor(pil_img)          # [1, H, W]
    return x


# ===========================
# 2. Hai phương pháp sharpen
# ===========================

class UnsharpMask(torch.nn.Module):
    def __init__(self, kernel_size=5, sigma=1.0, amount=0.5):
        super().__init__()
        self.kernel_size = kernel_size
        self.sigma = sigma
        self.amount = amount

    def _gaussian_kernel(self, device):
        k = self.kernel_size
        sigma = self.sigma
        ax = torch.arange(-k // 2 + 1., k // 2 + 1., device=device)
        xx, yy = torch.meshgrid(ax, ax, indexing="ij")
        kernel = torch.exp(-(xx**2 + yy**2) / (2. * sigma**2))
        kernel = kernel / kernel.sum()
        kernel = kernel.view(1, 1, k, k)
        return kernel

    def forward(self, x):
        if x.dim() == 3:   # [1,H,W]
            x = x.unsqueeze(0)  # [B,1,H,W]

        device = x.device
        kernel = self._gaussian_kernel(device)
        blur = F.conv2d(x, kernel, padding=self.kernel_size // 2)
        sharp = x + self.amount * (x - blur)
        sharp = torch.clamp(sharp, 0.0, 1.0)
        return sharp.squeeze(0)    # về [1,H,W]


class LaplacianSharpen(torch.nn.Module):
    def __init__(self, alpha=0.3):
        super().__init__()
        self.alpha = alpha
        k = torch.tensor([[0., -1., 0.],
                          [-1., 4., -1.],
                          [0., -1., 0.]]).view(1, 1, 3, 3)
        self.register_buffer("kernel", k)

    def forward(self, x):
        if x.dim() == 3:
            x = x.unsqueeze(0)

        lap = F.conv2d(x, self.kernel, padding=1)
        sharp = x + self.alpha * lap
        sharp = torch.clamp(sharp, 0.0, 1.0)
        return sharp.squeeze(0)


def gradient_magnitude(x):
    if x.dim() == 3:
        x = x.unsqueeze(0)   # [1,1,H,W]
    kx = torch.tensor([[[-1., 0., 1.],
                        [-2., 0., 2.],
                        [-1., 0., 1.]]])
    ky = torch.tensor([[[-1., -2., -1.],
                        [ 0.,  0.,  0.],
                        [ 1.,  2.,  1.]]])
    kx = kx.view(1, 1, 3, 3)
    ky = ky.view(1, 1, 3, 3)

    gx = F.conv2d(x, kx, padding=1)
    gy = F.conv2d(x, ky, padding=1)
    gmag = torch.sqrt(gx**2 + gy**2)
    return gmag.mean().item()


def ssim_img(x_ref, x):
    ref_np = x_ref.squeeze().cpu().numpy()
    x_np = x.squeeze().cpu().numpy()
    return ssim(ref_np, x_np, data_range=1.0)


# ===========================
# 3. Grid tham số
# ===========================

unsharp_grid = [
    (0.3, 1.0),
    (0.5, 1.0),
    (0.7, 1.0),
    (0.5, 0.8),
    (0.5, 1.2),
]

laplacian_grid = [0.1, 0.2, 0.3, 0.4]

configs = []

for amount, sigma in unsharp_grid:
    configs.append({
        "method": "unsharp",
        "amount": amount,
        "sigma": sigma,
        "alpha": np.nan,
        "model": UnsharpMask(kernel_size=5, sigma=sigma, amount=amount),
    })

for alpha in laplacian_grid:
    configs.append({
        "method": "laplacian",
        "amount": np.nan,
        "sigma": np.nan,
        "alpha": alpha,
        "model": LaplacianSharpen(alpha=alpha),
    })

print("Số cấu hình sharpen:", len(configs))

# ===========================
# 4. Chạy đánh giá
# ===========================

N_IMAGES = 500  # có thể tăng nếu muốn

max_images = min(N_IMAGES, len(hf_train))
sample_indices = list(range(max_images))   # cho chắc chắn không rỗng
print("Số ảnh dùng để R&D:", len(sample_indices))

records = []

for idx in tqdm(sample_indices, desc="Evaluating grid"):
    img = get_image_tensor(idx)      # [1,H,W]
    base_edge = gradient_magnitude(img)

    for cfg in configs:
        model = cfg["model"]
        x_aug = model(img)
        edge = gradient_magnitude(x_aug)
        sim = ssim_img(img, x_aug)

        records.append({
            "idx": idx,
            "method": cfg["method"],
            "amount": cfg["amount"],
            "sigma": cfg["sigma"],
            "alpha": cfg["alpha"],
            "edge_strength": edge,
            "edge_gain": edge / base_edge if base_edge > 0 else 1.0,
            "ssim_with_original": sim,
        })

print("Số bản ghi thu được:", len(records))
df = pd.DataFrame(records)
print(df.head())
print("Số dòng df:", len(df))

# ===========================
# 5. Group & sort kết quả
# ===========================

group_cols = ["method", "amount", "sigma", "alpha"]

df_group = (
    df.groupby(group_cols, dropna=False)   # <- thêm dropna=False
      .agg(
          edge_strength_mean=("edge_strength", "mean"),
          edge_strength_std=("edge_strength", "std"),
          edge_gain_mean=("edge_gain", "mean"),
          edge_gain_std=("edge_gain", "std"),
          ssim_mean=("ssim_with_original", "mean"),
          ssim_std=("ssim_with_original", "std"),
      )
      .reset_index()
)

df_group_sorted = df_group.sort_values(
    by=["method", "edge_gain_mean"],
    ascending=[True, False]
)

print(df_group_sorted)


Số ảnh train: 5120
Số cấu hình sharpen: 9
Số ảnh dùng để R&D: 500


Evaluating grid:   0%|          | 0/500 [00:00<?, ?it/s]

Số bản ghi thu được: 4500
   idx   method  amount  sigma  alpha  edge_strength  edge_gain  \
0    0  unsharp     0.3    1.0    NaN       0.260553   1.085302   
1    0  unsharp     0.5    1.0    NaN       0.274818   1.144719   
2    0  unsharp     0.7    1.0    NaN       0.289442   1.205637   
3    0  unsharp     0.5    0.8    NaN       0.265999   1.107986   
4    0  unsharp     0.5    1.2    NaN       0.281571   1.172849   

   ssim_with_original  
0            0.996736  
1            0.991661  
2            0.984927  
3            0.994499  
4            0.989283  
Số dòng df: 4500
      method  amount  sigma  alpha  edge_strength_mean  edge_strength_std  \
3  laplacian     NaN    NaN    0.4            0.516023           0.049689   
2  laplacian     NaN    NaN    0.3            0.482873           0.046576   
1  laplacian     NaN    NaN    0.2            0.449642           0.043221   
0  laplacian     NaN    NaN    0.1            0.417439           0.039875   
8    unsharp     0.7    1