In [3]:
import kagglehub

train_path = kagglehub.dataset_download("shreyansjain04/ai-vs-real-image-dataset")

test_path = kagglehub.dataset_download("shreyansjain04/ai-vs-real-image-test-dataset")

  from .autonotebook import tqdm as notebook_tqdm


Resuming download from 187695104 bytes (21370244987 bytes left)...
Resuming download from https://www.kaggle.com/api/v1/datasets/download/shreyansjain04/ai-vs-real-image-dataset?dataset_version_number=1 (187695104/21557940091) bytes left.


 18%|█▊        | 3.53G/20.1G [02:33<12:37, 23.4MB/s] 


OSError: [Errno 28] No space left on device

In [1]:
import cv2
import numpy as np

def extract_ct_exact_fast(image, kernel_size=3, max_iter=10):
    def em_channel(channel):
        alpha = kernel_size // 2
        padded = cv2.copyMakeBorder(channel, alpha, alpha, alpha, alpha, cv2.BORDER_REFLECT)
        h, w = channel.shape
        N = h * w
        d = kernel_size**2 - 1

        # Precompute patch offsets (excluding center)
        patch_offsets = []
        center = kernel_size // 2
        for i in range(kernel_size):
            for j in range(kernel_size):
                if i == center and j == center:
                    continue
                patch_offsets.append((i - center, j - center))

        A = np.zeros((N, d), dtype=np.float32)
        b = np.zeros(N, dtype=np.float32)

        idx = 0
        for y in range(alpha, h + alpha):
            for x in range(alpha, w + alpha):
                A[idx] = [padded[y + dy, x + dx] for dy, dx in patch_offsets]
                b[idx] = padded[y, x]
                idx += 1

        k = np.zeros(d, dtype=np.float32)
        for _ in range(max_iter):
            pred = A @ k
            residuals = b - pred
            sigma2 = np.mean(residuals**2)
            weights = np.exp(-residuals**2 / (2 * sigma2))

            # Use element-wise weighting instead of a full diagonal matrix
            Aw = A * weights[:, np.newaxis]
            bw = b * weights
            k = np.linalg.pinv(A.T @ Aw) @ (A.T @ bw)

        return k

    if image.shape[2] != 3:
        raise ValueError("Image must be RGB")

    image = image.astype(np.float32) / 255.0
    return np.concatenate([em_channel(image[..., c]) for c in range(3)])


In [2]:
import cv2
import numpy as np

from fastai.vision.all import *
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from pathlib import Path

def get_balanced_subset(path, max_per_class=500):
    files = get_image_files(path)
    grouped = {}
    for f in files:
        lbl = parent_label(f).lower()
        grouped.setdefault(lbl, []).append(f)

    selected = []
    for lbl, f_list in grouped.items():
        selected.extend(f_list[:max_per_class])
    
    return selected

# --- Load Data using FastAI ---
path = Path("data")

dblock = DataBlock(
    blocks=(ImageBlock, CategoryBlock),
    get_items=partial(get_balanced_subset, max_per_class=500),
    get_y=parent_label,
    splitter=RandomSplitter(seed=42)
)

dls = dblock.dataloaders(path, bs=16)

# --- Extract CT Features and Labels ---
X, y = [], []
for img, label in dls.train_ds:
    ct_vec = extract_ct(img, kernel_size=3)
    X.append(ct_vec)
    y.append(int(label))

X_valid, y_valid = [], []
for img, label in dls.valid_ds:
    ct_vec = extract_ct(img, kernel_size=3)
    X_valid.append(ct_vec)
    y_valid.append(int(label))

# --- Train Random Forest ---
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X, y)

# --- Evaluate ---
y_pred = rf.predict(X_valid)
print(classification_report(y_valid, y_pred, target_names=["ai", "real"]))

TypeError: 'NoneType' object is not iterable

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial.distance import euclidean
from scipy.stats import pearsonr
import numpy as np

def compare_ct_vectors(vec1, vec2, verbose=True):
    """
    Compares two CT vectors using multiple similarity metrics.

    Args:
        vec1, vec2: numpy arrays of same shape
        verbose: whether to print comparison scores

    Returns:
        dict of scores
    """
    vec1 = np.array(vec1).reshape(1, -1)
    vec2 = np.array(vec2).reshape(1, -1)

    assert vec1.shape == vec2.shape, "CT vectors must have the same shape"

    cosine_sim = cosine_similarity(vec1, vec2)[0][0]
    euclid = euclidean(vec1[0], vec2[0])
    corr, _ = pearsonr(vec1[0], vec2[0])

    if verbose:
        print(f"🔍 Cosine Similarity:     {cosine_sim:.4f}")
        print(f"📏 Euclidean Distance:    {euclid:.4f}")
        print(f"📈 Pearson Correlation:   {corr:.4f}")

    return {
        "cosine_similarity": cosine_sim,
        "euclidean_distance": euclid,
        "pearson_correlation": corr
    }


In [None]:
img1 = cv2.imread("sml/real/6.jpg")
img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
img1 = cv2.resize(img1, (256, 256))

img2 = cv2.imread("sml/ai/6.jpg")
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
img2 = cv2.resize(img2, (256, 256))

vec1 = extract_ct_exact(img1, kernel_size=3)
vec2 = extract_ct_exact(img2, kernel_size=3)

compare_ct_vectors(vec1, vec2)
