# Generation of the result presented in Class-aware data augmentation by GAN specialization to improve endoscopic images classification

## Data

The following scripts aim to download the data. Please take a look at the [README](https://github.com/PlathC/GanAugmentedCNN) in order to understand how to apply naming modification to Hyper-Kvasir (available [here](https://datasets.simula.no/hyper-kvasir/)) in order to make this script works.

In [None]:
GOOGLE_DRIVE_DATASET_ID = ''
GOOGLE_DRIVE_DATASET_NAME = 'v2-training-set-full.zip'
OUTPUT_DIR = '../results/'
CHECKPOINT_DIR = '../checkpoints/'
import os

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import os
import cv2
from skimage import io
import matplotlib.pyplot as plt

def directory_find(atom, root='.'):
    for path, dirs, files in os.walk(root):
        if atom in dirs:
            return os.path.join(path, atom)
    return ''

# Install the PyDrive wrapper & import libraries.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
from googleapiclient.http import MediaIoBaseDownload
import io

# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

def downloadextract(id, file):
    file_id = id
    downloaded = drive.CreateFile({'id':file_id})
    downloaded.FetchMetadata(fetch_all=True)

    local_file = io.FileIO(file, mode='wb')
    id = downloaded.metadata.get('id')
    request = drive.auth.service.files().get_media(fileId=id)
    downloader = MediaIoBaseDownload(local_file, request, chunksize=2048*50240)

    done = False

    while done is False:
        status, done = downloader.next_chunk()
    local_file.close()

    import zipfile
    print('Starting extraction...')
    with zipfile.ZipFile(file) as zip_ref:
        zip_ref.extractall('.')
    print(f'Extraction of {file} done.')    


if not os.path.exists(GOOGLE_DRIVE_DATASET_NAME):
    downloadextract(GOOGLE_DRIVE_DATASET_ID, GOOGLE_DRIVE_DATASET_NAME)

from google.colab import drive
drive.mount('/content/gdrive')

## Results generation

In [None]:
%pip install ninja

In [None]:
!git clone https://github.com/PlathC/GanAugmentedCNN.git

In [None]:
%cd /content/GanAugmentedCNN

In [None]:
!git fetch origin main
!git reset --hard FETCH_HEAD

## Results generation

Do not hesitate to modify dataset path to your needs.

### Raw results

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

%run generate_dataset.py --checkpoints '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' \
  --class_folders '{"cecum": ["../training-set-full/lower-gi-tract/lgi-anatomical-landmarks/cecum"], "ileum": ["../training-set-full/lower-gi-tract/lgi-anatomical-landmarks/ileum"], "retroflex-rectum": ["../training-set-full/lower-gi-tract/lgi-anatomical-landmarks/retroflex-rectum"], "hemorrhoids": ["../training-set-full/lower-gi-tract/lgi-pathological-findings/hemorrhoids"], "polyps": ["../training-set-full/lower-gi-tract/lgi-pathological-findings/polyps"], "ulcerative-colitis-grade-0-1": ["../training-set-full/lower-gi-tract/lgi-pathological-findings/uc-grade-1/ulcerative-colitis-grade-0-1"], "ulcerative-colitis-grade-1": ["../training-set-full/lower-gi-tract/lgi-pathological-findings/uc-grade-1/ulcerative-colitis-grade-1"], "ulcerative-colitis-grade-1-2": ["../training-set-full/lower-gi-tract/lgi-pathological-findings/unused-uc/ulcerative-colitis-grade-1-2"], "ulcerative-colitis-grade-2": ["../training-set-full/lower-gi-tract/lgi-pathological-findings/uc-grade-2/ulcerative-colitis-grade-2"], "ulcerative-colitis-grade-2-3": ["../training-set-full/lower-gi-tract/lgi-pathological-findings/uc-grade-3/ulcerative-colitis-grade-2-3"], "ulcerative-colitis-grade-3": ["../training-set-full/lower-gi-tract/lgi-pathological-findings/uc-grade-3/ulcerative-colitis-grade-3"], "bbps-0-1": ["../training-set-full/lower-gi-tract/lgi-quality-of-mucosal-views/bbps-0-1"], "bbps-2-3": ["../training-set-full/lower-gi-tract/lgi-quality-of-mucosal-views/bbps-2-3"], "impacted-stool": ["../training-set-full/lower-gi-tract/lgi-quality-of-mucosal-views/impacted-stool"], "dyed-lifted-polyps": ["../training-set-full/lower-gi-tract/lgi-therapeutic-interventions/dyed-lifted-polyps"], "dyed-resection-margins": ["../training-set-full/lower-gi-tract/lgi-therapeutic-interventions/dyed-resection-margins"], "pylorus": ["../training-set-full/upper-gi-tract/ugi-anatomical-landmarks/pylorus"], "retroflex-stomach": ["../training-set-full/upper-gi-tract/ugi-anatomical-landmarks/retroflex-stomach"], "z-line": ["../training-set-full/upper-gi-tract/ugi-anatomical-landmarks/z-line"], "barretts": ["../training-set-full/upper-gi-tract/ugi-pathological-findings/barretts"], "barretts-short-segment": ["../training-set-full/upper-gi-tract/ugi-pathological-findings/barretts-short-segment"], "esophagitis-a": ["../training-set-full/upper-gi-tract/ugi-pathological-findings/esophagitis-a"], "esophagitis-b-d": ["../training-set-full/upper-gi-tract/ugi-pathological-findings/esophagitis-b-d"] }'\
  --generate_number 0 --output_dir ../fullhk_dataset_raw  \
  --split_file ../training-set-full/splits/hk_2_fold_split_with_paths.csv

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

non_pathological_checkpoint = os.path.join(CHECKPOINT_DIR, '2.sg2ada_non_pathological.pkl')
pathological_checkpoint = os.path.join(CHECKPOINT_DIR, '3.sg2ada_pathological.pkl')
%run generate_dataset.py --checkpoints $non_pathological_checkpoint $pathological_checkpoint \
  --class_folders '{"non_pathological": ["../training-set-full/lower-gi-tract/lgi-quality-of-mucosal-views/bbps-2-3", "../training-set-full/lower-gi-tract/lgi-pathological-findings/uc-grade-1"], "pathological": ["../training-set-full/lower-gi-tract/lgi-pathological-findings/uc-grade-2", "../training-set-full/lower-gi-tract/lgi-pathological-findings/uc-grade-3"]}'\
  --generate_number 0 --output_dir ../custom_uc_raw  \
  --split_file ../training-set-full/splits/hk_2_fold_split_with_paths.csv

No synthetic No pretrain

In [None]:
output = os.path.join(OUTPUT_DIR, 'ResNet50/FHKRawNoPretrain')
%run main.py --batch_size 128  --dataset ../fullhk_dataset_raw --da True --output_dir $output --architecture resnet50

In [None]:
output = os.path.join(OUTPUT_DIR, 'ResNet50/CUCRawNoPretrain')
%run main.py --batch_size 128  --dataset ../custom_uc_raw --da True --output_dir $output --architecture resnet50

In [None]:
output = os.path.join(OUTPUT_DIR, 'DenseNet161/FHKRawNoPretrain')
%run main.py --batch_size 64  --dataset ../fullhk_dataset_raw --da True --output_dir $output --architecture densenet161

In [None]:
output = os.path.join(OUTPUT_DIR, 'DenseNet161/CUCRawNoPretrain')
%run main.py --batch_size 64  --dataset ../custom_uc_raw --da True --output_dir $output --architecture densenet161

No synthetic ImageNet

In [None]:
output = os.path.join(OUTPUT_DIR, 'ResNet50/FHKRawImageNet')
%run main.py --batch_size 128  --dataset ../fullhk_dataset_raw --da True --output_dir $output --architecture resnet50 --pretrained ImageNet

In [None]:
output = os.path.join(OUTPUT_DIR, 'ResNet50/CUCRawImageNet')
%run main.py --batch_size 128  --dataset ../custom_uc_raw --da True --output_dir $output --architecture resnet50 --pretrained ImageNet

In [None]:
output = os.path.join(OUTPUT_DIR, 'DenseNet161/FHKRawImageNet')
%run main.py --batch_size 64  --dataset ../fullhk_dataset_raw --da True --output_dir $output --architecture densenet161 --pretrained ImageNet

In [None]:
output = os.path.join(OUTPUT_DIR, 'DenseNet161/CUCRawImageNet')
%run main.py --batch_size 64  --dataset ../custom_uc_raw --da True --output_dir $output --architecture densenet161 --pretrained ImageNet

### Augmented


Create and save augmented datasets

In [None]:
import os

out_dir = os.path.join(OUTPUT_DIR, 'datasets')
dataset_name = 'FHK' # CUC or FHK
generation_nb = 700

if dataset_name == 'FHK':
    class_folders = {"cecum": ["../training-set-full/lower-gi-tract/lgi-anatomical-landmarks/cecum"], "ileum": ["../training-set-full/lower-gi-tract/lgi-anatomical-landmarks/ileum"], "retroflex-rectum": ["../training-set-full/lower-gi-tract/lgi-anatomical-landmarks/retroflex-rectum"], "hemorrhoids": ["../training-set-full/lower-gi-tract/lgi-pathological-findings/hemorrhoids"], "polyps": ["../training-set-full/lower-gi-tract/lgi-pathological-findings/polyps"], "ulcerative-colitis-grade-0-1": ["../training-set-full/lower-gi-tract/lgi-pathological-findings/uc-grade-1/ulcerative-colitis-grade-0-1"], "ulcerative-colitis-grade-1": ["../training-set-full/lower-gi-tract/lgi-pathological-findings/uc-grade-1/ulcerative-colitis-grade-1"], "ulcerative-colitis-grade-1-2": ["../training-set-full/lower-gi-tract/lgi-pathological-findings/unused-uc/ulcerative-colitis-grade-1-2"], "ulcerative-colitis-grade-2": ["../training-set-full/lower-gi-tract/lgi-pathological-findings/uc-grade-2/ulcerative-colitis-grade-2"], "ulcerative-colitis-grade-2-3": ["../training-set-full/lower-gi-tract/lgi-pathological-findings/uc-grade-3/ulcerative-colitis-grade-2-3"], "ulcerative-colitis-grade-3": ["../training-set-full/lower-gi-tract/lgi-pathological-findings/uc-grade-3/ulcerative-colitis-grade-3"], "bbps-0-1": ["../training-set-full/lower-gi-tract/lgi-quality-of-mucosal-views/bbps-0-1"], "bbps-2-3": ["../training-set-full/lower-gi-tract/lgi-quality-of-mucosal-views/bbps-2-3"], "impacted-stool": ["../training-set-full/lower-gi-tract/lgi-quality-of-mucosal-views/impacted-stool"], "dyed-lifted-polyps": ["../training-set-full/lower-gi-tract/lgi-therapeutic-interventions/dyed-lifted-polyps"], "dyed-resection-margins": ["../training-set-full/lower-gi-tract/lgi-therapeutic-interventions/dyed-resection-margins"], "pylorus": ["../training-set-full/upper-gi-tract/ugi-anatomical-landmarks/pylorus"], "retroflex-stomach": ["../training-set-full/upper-gi-tract/ugi-anatomical-landmarks/retroflex-stomach"], "z-line": ["../training-set-full/upper-gi-tract/ugi-anatomical-landmarks/z-line"], "barretts": ["../training-set-full/upper-gi-tract/ugi-pathological-findings/barretts"], "barretts-short-segment": ["../training-set-full/upper-gi-tract/ugi-pathological-findings/barretts-short-segment"], "esophagitis-a": ["../training-set-full/upper-gi-tract/ugi-pathological-findings/esophagitis-a"], "esophagitis-b-d": ["../training-set-full/upper-gi-tract/ugi-pathological-findings/esophagitis-b-d"] }
    checkpoints   = ['_', os.path.join(CHECKPOINT_DIR, '2.sg2ada_ileum.pkl'), '_', os.path.join(CHECKPOINT_DIR, '2.sg2ada_hemorrhoids.pkl'), '_', '_', os.path.join(CHECKPOINT_DIR, '2.sg2ada_uc1.pkl'), '_', '_', '_', os.path.join(CHECKPOINT_DIR, '2.sg2ada_uc3.pkl'), '_', '_', os.path.join(CHECKPOINT_DIR, '2.sg2ada_impacted-stool.pkl'), '_', '_', '_', '_', '_', os.path.join(CHECKPOINT_DIR, '2.sg2ada_barrets.pkl'), os.path.join(CHECKPOINT_DIR, '2.sg2ada_barrets-short-segment.pkl'), os.path.join(CHECKPOINT_DIR, '2.sg2ada_esophagitis-a.pkl'), os.path.join(CHECKPOINT_DIR, '2.sg2ada_esophagitis-b-d.pkl')]
else:
    class_folders = {"non_pathological": ["../training-set-full/lower-gi-tract/lgi-quality-of-mucosal-views/bbps-2-3", "../training-set-full/lower-gi-tract/lgi-pathological-findings/uc-grade-1"], "pathological": ["../training-set-full/lower-gi-tract/lgi-pathological-findings/uc-grade-2", "../training-set-full/lower-gi-tract/lgi-pathological-findings/uc-grade-3"]}
    checkpoints   = [os.path.join(CHECKPOINT_DIR, '2.sg2ada_non_pathological.pkl'), os.path.join(CHECKPOINT_DIR, '3.sg2ada_pathological.pkl')]

split_file = '../training-set-full/splits/hk_2_fold_split_with_paths.csv'
dataset_dir = os.path.join('../', dataset_name + '_' + str(generation_nb))

import json
class_folders = json.dumps(json.dumps(class_folders))
class_names = ' '.join(class_names)
checkpoints = ' '.join(checkpoints)

get_ipython().run_line_magic('run', f'generate_dataset.py --checkpoints {checkpoints} '
  f'--class_folders {class_folders} '
  f'--generate_number {generation_nb} --output_dir {dataset_dir} '
  f'--split_file {split_file}')

import os
import zipfile
    
def zipdir(path, ziph):
    # ziph is zipfile handle
    for root, dirs, files in os.walk(path):
        for file in files:
            ziph.write(os.path.join(root, file), 
                       os.path.relpath(os.path.join(root, file), 
                                       os.path.join(path, '..')))
      
zip_name = dataset_name + '_' + str(generation_nb) + '.zip'
local_zip_path = os.path.join('..', zip_name)
zipf = zipfile.ZipFile(local_zip_path, 'w', zipfile.ZIP_DEFLATED)
zipdir(dataset_dir, zipf)
zipf.close()

from shutil import copyfile
copyfile(local_zip_path, os.path.join(out_dir, zip_name))

from shutil import rmtree
rmtree(dataset_dir)
os.remove(local_zip_path)

Load dataset

In [None]:
out_dir = os.path.join(OUTPUT_DIR, 'datasets')
dataset_name = 'FHK' # or CUC
generation_nb = 700

out_folder = dataset_name + '_' + str(generation_nb)
zip_name = out_folder + '.zip'
zip_path = os.path.join(out_dir, zip_name)

if os.path.exists(zip_path):
    from shutil import copyfile
    local_zip_path = os.path.join('../', zip_name)
    copyfile(zip_path, local_zip_path)

    import zipfile
    print('Starting extraction...')
    with zipfile.ZipFile(local_zip_path) as zip_ref:
        zip_ref.extractall('..')
    print(f'Extraction of {local_zip_path} done.')    
else:
    raise FileNotFoundError(f'{zip_path} does not exist.')

In [None]:
dataset_name = dataset_name + str(generation_nb)
dataset_path = os.path.join(out_folder)

In [None]:
output = os.path.join(OUTPUT_DIR, f'ResNet50/{dataset_name}NoPretrain')
%run main.py --batch_size 128 --dataset $dataset_path --da True --output_dir $output --architecture resnet50 

In [None]:
output = os.path.join(OUTPUT_DIR, f'DenseNet161/{dataset_name}NoPretrain')
%run main.py --batch_size 64  --dataset $dataset_path --da True --output_dir $output --architecture densenet161

In [None]:
output = os.path.join(OUTPUT_DIR, f'ResNet50/{dataset_name}ImageNet')
%run main.py --batch_size 128 --dataset $dataset_path --da True --output_dir $output --architecture resnet50 --pretrained ImageNet

In [None]:
output = os.path.join(OUTPUT_DIR, f'DenseNet161/{dataset_name}ImageNet')
%run main.py --batch_size 64  --dataset $dataset_path --da True --output_dir $output --architecture densenet161 --pretrained ImageNet 