In [1]:
import numpy as np 
import pandas as pd
import tensorflow as tf

import cv2
import os
import shutil

from glob import glob
from pathlib import Path

In [2]:
!pip install wolta

  pid, fd = os.forkpty()


Collecting wolta
  Downloading wolta-0.3.5-py3-none-any.whl.metadata (960 bytes)
Collecting imblearn (from wolta)
  Downloading imblearn-0.0-py2.py3-none-any.whl.metadata (355 bytes)
Downloading wolta-0.3.5-py3-none-any.whl (17 kB)
Downloading imblearn-0.0-py2.py3-none-any.whl (1.9 kB)
Installing collected packages: imblearn, wolta
Successfully installed imblearn-0.0 wolta-0.3.5


In [3]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.48-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.13-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.48-py3-none-any.whl (898 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m898.8/898.8 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.13-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.48 ultralytics-thop-2.0.13


In [4]:
os.environ['WANDB_MODE'] = 'disabled'

# Data Analysis

In [5]:
for dirname, _, _ in os.walk('/kaggle/input'):
    print(dirname)

/kaggle/input
/kaggle/input/test
/kaggle/input/test/FAKE
/kaggle/input/test/REAL
/kaggle/input/train
/kaggle/input/train/FAKE
/kaggle/input/train/REAL


In [6]:
p_paths = glob('/kaggle/input/*')
d_paths = []

for p_path in p_paths:
    d_paths.extend(glob('{}/*'.format(p_path)))
print(d_paths)

['/kaggle/input/test/FAKE', '/kaggle/input/test/REAL', '/kaggle/input/train/FAKE', '/kaggle/input/train/REAL']


In [7]:
i_paths = []

for d_path in d_paths:
    i_paths.extend(glob('{}/*'.format(d_path)))

print(len(i_paths))

120000


In [8]:
from wolta.visual_tools import get_extensions

get_extensions(i_paths)

{'jpg': 120000}

In [9]:
from wolta.visual_tools import dataset_size_same

dataset_size_same(i_paths)

True

In [10]:
temp_img = cv2.imread(i_paths[0])
ratio = temp_img.shape[1] / temp_img.shape[0]

print('Width: {}'.format(temp_img.shape[1]))
print('Height: {}'.format(temp_img.shape[0]))
print('Ratio: {}'.format(ratio))

Width: 32
Height: 32
Ratio: 1.0


# Image Stacking

In [11]:
os.makedirs('/kaggle/working/raw')

In [12]:
for d_path in d_paths:
    current_dir = Path(d_path).name
    current_path = '/kaggle/working/raw/{}'.format(current_dir) 
    os.makedirs(current_path, exist_ok=True)

    i_paths = glob('{}/*'.format(d_path))

    for i_path in i_paths:
        shutil.copy(i_path, current_path)

# Image Splitting

In [13]:
from wolta.visual_tools import dir_split

dir_split('/kaggle/working/raw', '/kaggle/working/data', test_size=0.2, val_size=0.2)

In [14]:

from tqdm import tqdm

def add_gaussian_noise(image, mean=0, std=25):
    """Add Gaussian noise to an image."""
    noise = np.random.normal(mean, std, image.shape).astype(np.uint8)
    noisy_image = cv2.add(image, noise)
    return noisy_image

def process_and_save_images(input_path, output_path):
    """Process images, add noise, and save to the output path."""
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    
    for img_name in tqdm(os.listdir(input_path)):
        img_path = os.path.join(input_path, img_name)
        if os.path.isfile(img_path):
            # Read image
            image = cv2.imread(img_path)
            if image is None:
                continue
            # Add Gaussian noise
            noisy_image = add_gaussian_noise(image)
            # add _1 at the last of image name
            img_name = img_name.split('.')[0] + '_1.' + img_name.split('.')[1]
            # Save the noisy image to the output folder
            output_img_path = os.path.join(output_path, img_name)
            cv2.imwrite(output_img_path, noisy_image)
base_dir = "/kaggle/working/data"  # Replace with the root path of your dataset
subsets = [ "test"]
categories = ["REAL", "FAKE"]

for subset in subsets:
    real_path = os.path.join(base_dir, subset, "REAL")
    fake_path = os.path.join(base_dir, subset, "FAKE")

    print(f"Processing {subset}/REAL...")
    process_and_save_images(real_path, fake_path)


Processing test/REAL...


100%|██████████| 10000/10000 [00:02<00:00, 4319.70it/s]


In [15]:
from wolta.visual_tools import cls_img_counter

cls_img_counter('/kaggle/working/data')

{'REAL': 50000, 'FAKE': 68394}

# YOLO

In [16]:
from ultralytics import YOLO

model = YOLO(model='yolo11x-cls.pt')
results = model.train(data='/kaggle/working/data', epochs=5, imgsz=32, verbose= True)


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-cls.pt to 'yolo11x-cls.pt'...


100%|██████████| 56.9M/56.9M [00:00<00:00, 220MB/s]


Ultralytics 8.3.48 🚀 Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=classify, mode=train, model=yolo11x-cls.pt, data=/kaggle/working/data, epochs=5, time=None, patience=100, batch=16, imgsz=32, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_

2024-12-09 05:47:39,157	INFO util.py:124 -- Outdated packages:
  ipywidgets==7.7.1 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2024-12-09 05:47:39,600	INFO util.py:124 -- Outdated packages:
  ipywidgets==7.7.1 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


Overriding model.yaml nc=80 with nc=2

                   from  n    params  module                                       arguments                     
  0                  -1  1      2784  ultralytics.nn.modules.conv.Conv             [3, 96, 3, 2]                 
  1                  -1  1    166272  ultralytics.nn.modules.conv.Conv             [96, 192, 3, 2]               
  2                  -1  2    389760  ultralytics.nn.modules.block.C3k2            [192, 384, 2, True, 0.25]     
  3                  -1  1   1327872  ultralytics.nn.modules.conv.Conv             [384, 384, 3, 2]              
  4                  -1  2   1553664  ultralytics.nn.modules.block.C3k2            [384, 768, 2, True, 0.25]     
  5                  -1  1   5309952  ultralytics.nn.modules.conv.Conv             [768, 768, 3, 2]              
  6                  -1  2   5022720  ultralytics.nn.modules.block.C3k2            [768, 768, 2, True]           
  7                  -1  1   5309952  ultralytics

100%|██████████| 5.35M/5.35M [00:00<00:00, 73.6MB/s]


[34m[1mAMP: [0mchecks passed ✅


[34m[1mtrain: [0mScanning /kaggle/working/data/train... 66000 images, 0 corrupt: 100%|██████████| 66000/66000 [00:31<00:00, 2092.59it/s]


[34m[1mtrain: [0mNew cache created: /kaggle/working/data/train.cache


  self.pid = os.fork()
[34m[1mval: [0mScanning /kaggle/working/data/val... 22000 images, 0 corrupt: 100%|██████████| 22000/22000 [00:09<00:00, 2216.28it/s]


[34m[1mval: [0mNew cache created: /kaggle/working/data/val.cache
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000714, momentum=0.9) with parameter groups 82 weight(decay=0.0), 83 weight(decay=0.0005), 83 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 32 train, 32 val
Using 2 dataloader workers
Logging results to [1mruns/classify/train[0m
Starting training for 5 epochs...

      Epoch    GPU_mem       loss  Instances       Size


        1/5     0.803G      1.007         16         32:   0%|          | 2/4125 [00:00<25:56,  2.65it/s]

Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf'...


        1/5     0.803G     0.9091         16         32:   0%|          | 8/4125 [00:01<07:44,  8.86it/s]
100%|██████████| 755k/755k [00:00<00:00, 15.9MB/s]
        1/5     0.816G     0.5159         16         32: 100%|██████████| 4125/4125 [04:51<00:00, 14.15it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 688/688 [00:13<00:00, 52.02it/s]

                   all      0.848          1






      Epoch    GPU_mem       loss  Instances       Size


        2/5      0.81G     0.3858         16         32: 100%|██████████| 4125/4125 [04:18<00:00, 15.98it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 688/688 [00:13<00:00, 52.68it/s]

                   all      0.852          1






      Epoch    GPU_mem       loss  Instances       Size


        3/5     0.814G      0.344         16         32: 100%|██████████| 4125/4125 [04:06<00:00, 16.74it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 688/688 [00:13<00:00, 51.74it/s]

                   all      0.887          1






      Epoch    GPU_mem       loss  Instances       Size


        4/5      0.81G      0.307         16         32: 100%|██████████| 4125/4125 [04:02<00:00, 17.03it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 688/688 [00:12<00:00, 53.17it/s]

                   all       0.91          1






      Epoch    GPU_mem       loss  Instances       Size


        5/5     0.805G     0.2632         16         32: 100%|██████████| 4125/4125 [04:00<00:00, 17.17it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 688/688 [00:13<00:00, 52.72it/s]


                   all      0.912          1

5 epochs completed in 0.376 hours.
Optimizer stripped from runs/classify/train/weights/last.pt, 57.0MB
Optimizer stripped from runs/classify/train/weights/best.pt, 57.0MB

Validating runs/classify/train/weights/best.pt...
Ultralytics 8.3.48 🚀 Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla T4, 15095MiB)
YOLO11x-cls summary (fused): 227 layers, 28,334,978 parameters, 0 gradients, 110.3 GFLOPs
[34m[1mtrain:[0m /kaggle/working/data/train... found 66000 images in 2 classes ✅ 
[34m[1mval:[0m /kaggle/working/data/val... found 22000 images in 2 classes ✅ 
[34m[1mtest:[0m /kaggle/working/data/test... found 30394 images in 2 classes ✅ 


               classes   top1_acc   top5_acc: 100%|██████████| 688/688 [00:10<00:00, 68.74it/s]


                   all      0.912          1
Speed: 0.0ms preprocess, 0.4ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1mruns/classify/train[0m


In [17]:
test_results = model.val(data='/kaggle/working/data', imgsz=32, split="test")

Ultralytics 8.3.48 🚀 Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla T4, 15095MiB)
YOLO11x-cls summary (fused): 227 layers, 28,334,978 parameters, 0 gradients, 110.3 GFLOPs
[34m[1mtrain:[0m /kaggle/working/data/train... found 66000 images in 2 classes ✅ 
[34m[1mval:[0m /kaggle/working/data/val... found 22000 images in 2 classes ✅ 
[34m[1mtest:[0m /kaggle/working/data/test... found 30394 images in 2 classes ✅ 


[34m[1mtest: [0mScanning /kaggle/working/data/test... 30394 images, 0 corrupt: 100%|██████████| 30394/30394 [00:13<00:00, 2233.19it/s]


[34m[1mtest: [0mNew cache created: /kaggle/working/data/test.cache


  self.pid = os.fork()
               classes   top1_acc   top5_acc: 100%|██████████| 1900/1900 [00:21<00:00, 86.96it/s]


                   all      0.797          1
Speed: 0.0ms preprocess, 0.7ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1mruns/classify/train2[0m


  self.pid = os.fork()


In [18]:
# Save the trained model
model.save('trained_yolo_model.pt')
shutil.rmtree('/kaggle/working/data')
shutil.rmtree('/kaggle/working/raw')