In [1]:
import pandas as pd
import numpy as np
from skimage import io
from collections import OrderedDict, namedtuple
from torch.optim import lr_scheduler
from glob import glob  # find all pathnames matching certain patterns
from sklearn.model_selection import GroupKFold
import joblib
import torch
import torch.nn as nn
import os
import random
import matplotlib.pyplot as plt
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2  # for converting image to tensor
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler  # return indices in sequence/random order
import cv2
from sklearn.metrics import *
from tqdm.notebook import tqdm
import transformers
from transformers import AdamW, get_linear_schedule_with_warmup, get_constant_schedule
import warnings
warnings.filterwarnings("ignore")

SEED = 42

# Seed everything for reproducable results
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)



In [2]:
!pip install efficientnet_pytorch
# import efficientnet_pytorch
from efficientnet_pytorch import EfficientNet

Collecting efficientnet_pytorch
  Downloading efficientnet_pytorch-0.6.3.tar.gz (16 kB)
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l- \ done
[?25h  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.6.3-py3-none-any.whl size=12419 sha256=dfe42f3dc885726c5269864375bb659a547aa3b6d8f63aceab4cf598ec4d533f
  Stored in directory: /root/.cache/pip/wheels/90/6b/0c/f0ad36d00310e65390b0d4c9218ae6250ac579c92540c9097a
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.6.3


In [3]:
# def get_net0():
#     net = EfficientNet.from_pretrained('efficientnet-b0')
#     net._fc = nn.Linear(in_features=1280, out_features=4, bias=True)
#     return net

# mx0 = get_net0()

In [4]:
def get_net2():
    net = EfficientNet.from_pretrained('efficientnet-b2')
    net._fc = nn.Linear(in_features=1408, out_features=4, bias=True)
    return net

mx2 = get_net2()

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth" to /root/.cache/torch/checkpoints/efficientnet-b2-8bb594d6.pth


HBox(children=(FloatProgress(value=0.0, max=36804509.0), HTML(value='')))


Loaded pretrained weights for efficientnet-b2


In [5]:
DATA_ROOT_PATH = "../input/alaska2-image-steganalysis"

In [6]:
# getting validation data and labels
# %%time
data = []
for label, kind in enumerate(["Cover", "JMiPOD", "JUNIWARD", "UERD"]):
    for i, path in enumerate(glob("../input/alaska2-image-steganalysis/Cover/*.jpg")):
#         if i == 2500:
#             break
        dat = {
            "kind": kind,
            "image_name": path.split("/")[-1],
            "label": label
        }
        data.append(dat)
data = pd.DataFrame(data).sample(60000, random_state = 42)
data.shape

(60000, 3)

In [7]:
def get_valid_transforms():
    return A.Compose([
        A.Resize(height = 512, width = 512, p = 1.0),
        A.HorizontalFlip(p = 0.5),
        A.VerticalFlip(p = 0.5),
        ToTensorV2(p = 1.0)
    ], p = 1.0)

In [8]:

class DatasetEnsembleRetriever(Dataset):

    def __init__(self, image_names, image_kinds, transforms=None):
        super().__init__()
        self.image_names = image_names
        self.image_kinds = image_kinds
        self.transforms = transforms

    def __getitem__(self, index: int):
        image_name = self.image_names[index]
        image_kind = self.image_kinds[index]
        image = cv2.imread(f'{DATA_ROOT_PATH}/{image_kind}/{image_name}', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        if self.transforms:
            sample = {'image': image}
            sample = self.transforms(**sample)
            image = sample['image']

        return image_name, image

    def __len__(self) -> int:
        return self.image_names.shape[0]
    
    def get_names(self):
        return self.image_names


In [9]:
dataset = DatasetEnsembleRetriever(
    image_names=data["image_name"].values,
    image_kinds = data["kind"].values,
    transforms=get_valid_transforms(),
)


data_loader = DataLoader(
    dataset,
    batch_size=8,
    shuffle=False,
    num_workers=4,
    drop_last=False,
)

In [10]:
device = torch.device('cuda:0')
model2 = mx2.to(device)
# model0 = mx0.to(device)

In [11]:
# creating checkpoint for all gpu models

####################################    (Efficientnet b2)     #########################################
# Alex
cp1 = torch.load("../input/alaska2-public-baseline/best-checkpoint-023epoch.bin")
cp2 = torch.load(f"../input/alaska2-public-baseline/best-checkpoint-033epoch.bin")

# # Sid

# cp3 = torch.load("../input/sid-epochs88/epoch_0_val_loss_6.07_auc_0.867.pth")
# cp4 = torch.load("../input/sid-epochs88/epoch_1_val_loss_6.12_auc_0.864.pth")

# # ####################################    (Efficientnet b0)    ##########################################

# cp5 = torch.load("../input/alaska2-efficientnet-trained-model-weights/efficientnetb0_lb0.867.pth")
# cp6 = torch.load("../input/alaska2-efficientnet-trained-model-weights/efficientnetb0_lb0.870.pth")
# cp7 = torch.load("../input/alaska2-efficientnet-trained-model-weights/efficientnetb0_lb0.871.pth")
# cp8 = torch.load("../input/alaska2-efficientnet-trained-model-weights/efficientnetb0_lb0.876.pth")
# cp9 = torch.load("../input/alaska2-efficientnet-trained-model-weights/efficientnetb0_lb0.881.pth")

In [12]:
results = []

In [13]:
# Alex
for i in range(5):
    for cp in [cp1, cp2]:
        model2.load_state_dict(cp['model_state_dict'])
        result = []
        for step, (image_names, images) in enumerate(data_loader):
            y_pred = model2(images.cuda())
            y_pred = 1 - nn.functional.softmax(y_pred, dim=1).data.cpu().numpy()[:,0]
            result.extend(y_pred)
        results.append(result)
        
    print("=" * 25)



In [14]:
valid_data = pd.DataFrame(results).T
valid_data["label"] = data["label"].values
valid_data.to_csv("ensemble_req.csv")

# Test Data

In [15]:
def test_transforms():
    return A.Compose([
        A.Resize(height = 512, width = 512, p = 1.0),
        A.HorizontalFlip(p = 0.5),
        A.VerticalFlip(p = 0.5),
        ToTensorV2(p = 1.0)
    ], p = 1.0)

In [16]:
def get_net():
    net = EfficientNet.from_pretrained('efficientnet-b2')
    net._fc = nn.Linear(in_features=1408, out_features=4, bias=True)
    return net

mx = get_net()

Loaded pretrained weights for efficientnet-b2


In [17]:
class DatasetSubmissionRetriever(Dataset):

    def __init__(self, image_names, transforms=None):
        super().__init__()
        self.image_names = image_names
        self.transforms = transforms

    def __getitem__(self, index: int):
        image_name = self.image_names[index]
        image = cv2.imread(f'{DATA_ROOT_PATH}/Test/{image_name}', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        if self.transforms:
            sample = {'image': image}
            sample = self.transforms(**sample)
            image = sample['image']

        return image_name, image

    def __len__(self) -> int:
        return self.image_names.shape[0]
    
    def get_names(self):
        return self.image_names

In [18]:
dataset = DatasetSubmissionRetriever(
    image_names=np.array([path.split('/')[-1] for path in glob('../input/alaska2-image-steganalysis/Test/*.jpg')]),
    transforms=test_transforms(),
)


data_loader = DataLoader(
    dataset,
    batch_size=8,
    shuffle=False,
    num_workers=4,
    drop_last=False,
)

In [19]:
# Test
test_results = []
for i in range(5):
    for cp in [cp1, cp2]:
        model2.load_state_dict(cp['model_state_dict'])
        result = []
        for step, (image_names, images) in enumerate(data_loader):
            y_pred = model2(images.cuda())
            y_pred = 1 - nn.functional.softmax(y_pred, dim=1).data.cpu().numpy()[:,0]
            result.extend(y_pred)
        test_results.append(result)
        
    print("=" * 25)



In [20]:
test_data = pd.DataFrame(test_results).T
test_data["Id"] = list(dataset.get_names())
test_data.to_csv("test_req.csv")