In [1]:
!pip install fastkaggle fastai kagglehub



In [2]:
import timm

from fastkaggle import *

comp = 'hack-rush-deep-fake-detection'

path = setup_comp(comp, install='fastai "timm>=0.6.2.dev0"')

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import kagglehub

train_path = kagglehub.dataset_download("shreyansjain04/ai-vs-real-image-dataset")

test_path = kagglehub.dataset_download("shreyansjain04/ai-vs-real-image-test-dataset")

In [4]:
from fastai.vision.all import *
path.ls()
print(train_path)
print(test_path)
trn_path = Path('mic')
tst_path = Path('mic-test')


/home/andreas/.cache/kagglehub/datasets/shreyansjain04/ai-vs-real-image-dataset/versions/1
/home/andreas/.cache/kagglehub/datasets/shreyansjain04/ai-vs-real-image-test-dataset/versions/2


In [19]:
resize_images(train_path, dest=trn_path, max_size=128, recurse=True, max_workers=7)



In [20]:
resize_images(test_path, dest=tst_path, max_size=128, recurse=True, max_workers=8)



In [5]:
def train(arch, size, item=Resize(480, method='squish'), accum=1, finetune=True, epochs=12):
    dls = ImageDataLoaders.from_folder(trn_path, valid_pct=0.2, item_tfms=item, batch_tfms=aug_transforms(size=size, min_scale=0.75), bs=64//accum)
    cbs = GradientAccumulation(64) if accum else []
    learn = vision_learner(dls, arch, metrics=error_rate, cbs=cbs).to_fp16()
    if finetune:
        learn.fine_tune(epochs, 0.01)
        tst_files = get_image_files(tst_path)

        interp = ClassificationInterpretation.from_learner(learn)
        interp.plot_confusion_matrix()
        interp.plot_top_losses(9)

        test_dl = learn.dls.test_dl(tst_files)
        preds, _ = learn.tta(dl=test_dl)

        submission = pd.DataFrame({
            "filename": [f.name for f in test_dl.items],
            "class": preds.argmax(dim=1).numpy()
        })
        submission.to_csv("submission.csv", index=False)

    else:
        learn.unfreeze()
        learn.fit_one_cycle(epochs, 0.01)

In [36]:
import cv2
from scipy.signal import convolve2d
import numpy as np
import math



def estimate_noise(I):

  H, W = I.shape

  M = [[1, -2, 1],
       [-2, 4, -2],
       [1, -2, 1]]

  sigma = np.sum(np.sum(np.absolute(convolve2d(I, M))))
  sigma = sigma * math.sqrt(0.5 * math.pi) / (6 * (W-2) * (H-2))

  return sigma

# loop through images in the test set and estimate noise

noise_estimates = []


for img_path in Path(test_path).glob('**/*.jpg'):
    # Read the image and convert to grayscale
  img = cv2.imread(img_path)
  img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  noise = estimate_noise(img_gray)
  noise_estimates.append({'filename': img_path.name, 'noise':noise})


In [51]:
import matplotlib.pyplot as plt

for n in range(len(noise_estimates)):
    noise_estimates[n]['ai'] = noise_estimates[n]['noise'] > 20

submission = pd.DataFrame({
    "filename": [f['filename'] for f in noise_estimates],
    "class": [int(pred['ai']) for pred in noise_estimates]
})
submission.to_csv("submission-test.csv", index=False)

In [12]:
result = train('convnext_small_in22k', 244, item=Resize((128, 128)), accum=1, epochs=10)

epoch,train_loss,valid_loss,error_rate,time


KeyboardInterrupt: 

In [12]:
!head submission.csv

filename,class
4403.jpg,0
4222.jpg,0
2158.jpg,0
2257.jpg,0
2688.jpg,1
691.jpg,1
95.jpg,0
1931.jpg,0
2844.jpg,1


In [52]:
if not iskaggle:
    from kaggle import api
    api.competition_submit_cli('submission-test.csv', 'noise', comp)

100%|██████████| 52.6k/52.6k [00:00<00:00, 57.9kB/s]
