Install Kaggle Library

In [0]:
# Colab library to upload files to notebook
from google.colab import files

# Install Kaggle library
!pip install -q kaggle
!pip uninstall -y kaggle
!pip install --upgrade pip
!pip install kaggle==1.5.6

Uninstalling kaggle-1.5.6:
  Successfully uninstalled kaggle-1.5.6
Collecting pip
[?25l  Downloading https://files.pythonhosted.org/packages/43/84/23ed6a1796480a6f1a2d38f2802901d078266bda38388954d01d3f2e821d/pip-20.1.1-py2.py3-none-any.whl (1.5MB)
[K     |████████████████████████████████| 1.5MB 2.6MB/s 
[?25hInstalling collected packages: pip
  Found existing installation: pip 19.3.1
    Uninstalling pip-19.3.1:
      Successfully uninstalled pip-19.3.1
Successfully installed pip-20.1.1
Collecting kaggle==1.5.6
  Downloading kaggle-1.5.6.tar.gz (58 kB)
[K     |████████████████████████████████| 58 kB 1.7 MB/s 
Building wheels for collected packages: kaggle
  Building wheel for kaggle (setup.py) ... [?25l[?25hdone
  Created wheel for kaggle: filename=kaggle-1.5.6-py3-none-any.whl size=72859 sha256=4fa450d4a24cf4b0352102c6f40174bbd99b4f6658e17a7abc1344b54c39ef3e
  Stored in directory: /root/.cache/pip/wheels/01/3e/ff/77407ebac3ef71a79b9166a8382aecf88415a0bcbe3c095a01
Successfully bu

In [0]:
# Upload kaggle API key file
uploaded = files.upload()

! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
#! kaggle datasets list

Saving kaggle.json to kaggle.json


Download and Unzip the data

In [0]:
!kaggle competitions download -c alaska2-image-steganalysis

Downloading alaska2-image-steganalysis.zip to /content
100% 30.0G/30.0G [12:56<00:00, 56.5MB/s]
100% 30.0G/30.0G [12:57<00:00, 41.4MB/s]


In [0]:
!unzip \*.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: Cover/69426.jpg         
  inflating: Cover/69428.jpg         
  inflating: Cover/69429.jpg         
  inflating: Cover/69431.jpg         
  inflating: Cover/69432.jpg         
  inflating: Cover/69433.jpg         
  inflating: Cover/69434.jpg         
  inflating: Cover/69435.jpg         
  inflating: Cover/69436.jpg         
  inflating: Cover/69438.jpg         
  inflating: Cover/69439.jpg         
  inflating: Cover/69440.jpg         
  inflating: Cover/69441.jpg         
  inflating: Cover/69442.jpg         
  inflating: Cover/69443.jpg         
  inflating: Cover/69444.jpg         
  inflating: Cover/69446.jpg         
  inflating: Cover/69447.jpg         
  inflating: Cover/69448.jpg         
  inflating: Cover/69449.jpg         
  inflating: Cover/69450.jpg         
  inflating: Cover/69451.jpg         
  inflating: Cover/69452.jpg         
  inflating: Cover/69453.jpg         
  inflating: Cover/6945

Create the model

In [0]:
import os
import torch
import numpy as np
import torch.nn.functional as F

from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, AvgPool2d, Module, Softmax, BatchNorm2d, Dropout, AdaptiveAvgPool2d

def seed_everything(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

seed_everything()

class AlaskaNet(Module):
  def __init__(self, inputs, outputs, kernel_size=(3,3), dropout_rate = 0):
    super(AlaskaNet, self).__init__()

    self.lt1 = Sequential(
      Conv2d(inputs, outputs, kernel_size),
      BatchNorm2d(outputs),
      ReLU(),
      Dropout(dropout_rate)
    )

    self.lt2 = Sequential(
      self.lt1(inputs, outputs, kernel_size, dropout_rate),
      Conv2d(inputs, outputs, kernel_size),
      BatchNorm2d(outputs),
      Dropout(dropout_rate)
    )

    self.lt3_branch1 = Sequential(
      self.lt1(inputs, outputs, kernel_size, dropout_rate),
      Conv2d(inputs, outputs, kernel_size),
      BatchNorm2d(outputs),
      AvgPool2d(kernel_size, stride=2),
      Dropout(dropout_rate)
    )

    self.lt3branch2 = Sequential(
      Conv2d(inputs, outputs, kernel_size, stride=2),
      BatchNorm2d(outputs),
      Dropout(dropout_rate)
    )

    self.lt4 = Sequential(
      self.lt1(inputs, outputs, kernel_size, dropout_rate),
      Conv2d(inputs, outputs, kernel_size),
      BatchNorm2d(outputs),
      Dropout(dropout_rate),
      AdaptiveAvgPool2d(1)
    )

  def forward(self, x):
    x = self.lt1(x)
    x1 = self.lt1(x)

    x2_1 = self.lt2(x1)
    x2 = add(x1, x2_1)
    x2_2 = self.lt2(x2)
    x2 = add(x2, x2_2)
    x2_3 = self.lt2(x2)
    x2 = add(x2, x2_3)
    x2_4 = self.lt2(x2)
    x2 = add(x2, x2_4)
    x2_5 = self.lt2(x2)
    x2 = add(x2, x2_5)

    x3_1_1 = self.lt3_branch1(x2)
    x3_1_2 = self.lt3_branch2(x2)
    x3 = add(x3_1_1, x3_1_2)
    x3_2_1 = self.lt3_branch1(x3)
    x3_2_2 = self.lt3_branch2(x3)
    x3 = add(x3_2_1, x3_2_2)
    x3_3_1 = self.lt3_branch1(x3)
    x3_3_2 = self.lt3_branch2(x3)
    x3 = add(x3_3_1, x3_3_2)
    x3_4_1 = self.lt3_branch1(x3)
    x3_4_2 = self.lt3_branch2(x3)
    x3 = add(x3_4_1, x3_4_2)

    x4 = self.lt4(x3)

    x = Linear(x4)
    x = Softmax(x)


Getting the Quality Factor

In [4]:
! git clone https://github.com/dwgoon/jpegio
!pip install jpegio/.

import numpy as np
import jpegio as jpio

Cloning into 'jpegio'...
remote: Enumerating objects: 75, done.[K
remote: Counting objects: 100% (75/75), done.[K
remote: Compressing objects: 100% (64/64), done.[K
remote: Total 1229 (delta 33), reused 41 (delta 11), pack-reused 1154[K
Receiving objects: 100% (1229/1229), 231.39 MiB | 14.43 MiB/s, done.
Resolving deltas: 100% (608/608), done.
Processing ./jpegio
Building wheels for collected packages: jpegio
  Building wheel for jpegio (setup.py) ... [?25l[?25hdone
  Created wheel for jpegio: filename=jpegio-0.2.1-cp36-cp36m-linux_x86_64.whl size=920419 sha256=fc7bbfa1b39bfadcbcdf5b748b985fcb57ed3b3a5370cf4e9a598bb7f9d71504
  Stored in directory: /tmp/pip-ephem-wheel-cache-aacd80ld/wheels/a0/49/1c/0fdb3abe06f1084f60188eb6fb7378ca04ba273f39fa4fd5d4
Successfully built jpegio
Installing collected packages: jpegio
Successfully installed jpegio-0.2.1


In [0]:
def JPEGdecompressYCbCr(jpegStruct):
    
    nb_colors=len(jpegStruct.coef_arrays)
        
    [Col,Row] = np.meshgrid( range(8) , range(8) )
    T = 0.5 * np.cos(np.pi * (2*Col + 1) * Row / (2 * 8))
    T[0,:] = T[0,:] / np.sqrt(2)
    
    sz = np.array(jpegStruct.coef_arrays[0].shape)
    
    imDecompressYCbCr = np.zeros([sz[0], sz[1], nb_colors]);
    szDct = (sz/8).astype('int')
    
    
    
    for ColorChannel in range(nb_colors):
        tmpPixels = np.zeros(sz)
    
        DCTcoefs = jpegStruct.coef_arrays[ColorChannel];
        if ColorChannel==0:
            QM = jpegStruct.quant_tables[ColorChannel];
        else:
            QM = jpegStruct.quant_tables[1];
        
        for idxRow in range(szDct[0]):
            for idxCol in range(szDct[1]):
                D = DCTcoefs[idxRow*8:(idxRow+1)*8 , idxCol*8:(idxCol+1)*8]
                tmpPixels[idxRow*8:(idxRow+1)*8 , idxCol*8:(idxCol+1)*8] = np.dot( np.transpose(T) , np.dot( QM * D , T ) )
        imDecompressYCbCr[:,:,ColorChannel] = tmpPixels;
    return imDecompressYCbCr

Splitting Dataset

In [6]:
import os
import glob
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

col_names =  ['ImageFileName', 'Label', 'Stego', 'QF']
df  = pd.DataFrame(columns = col_names)

folder_names = ['Cover', 'JMiPOD', 'JUNIWARD', 'UERD']

for dir in folder_names:
  for file in glob.glob(dir+"/*.jpg"):
    
    jpegStruct = jpio.read(file)
    imDecompressYCbCr = JPEGdecompressYCbCr(jpegStruct)

    if (jpegStruct.quant_tables[0][0,0]==2):
        print('Quality Factor is 95')
        qf = 95
    elif (jpegStruct.quant_tables[0][0,0]==3):
        print('Quality Factor is 90')
        qf = 90
    elif (jpegStruct.quant_tables[0][0,0]==8):
        print('Quality Factor is 75')
        qf = 75
    
    print(file.replace('/',''))
    if dir == 'Cover':
      label = 0
    elif dir == 'JMiPOD':
      if qf == 95:
        label = 1
      elif qf == 90:
        label = 2
      elif qf == 75:
        label = 3
    elif dir == 'JUNIWARD':
      if qf == 95:
        label = 4
      elif qf == 90:
        label = 5
      elif qf == 75:
        label = 6
    elif dir == 'UERD':
      if qf == 95:
        label = 7
      elif qf == 90:
        label = 8
      elif qf == 75:
        label = 9

    new_row = {'ImageFileName':file.replace('/',''), 'Label':label, 'Stego': dir, 'QF':qf}
    df = df.append(new_row, ignore_index=True)

df

# save the combined df 
df.to_csv('df.csv', index=False)

Unnamed: 0,ImageFileName,Label,Stego,QF


In [7]:
for lbl in range(10):
    df_lbl = df[df['Label']==lbl]
    df_qf_tr, df_qf_val_test = train_test_split(df_lbl, test_size=0.3, random_state=1234, stratify=df_lbl['Label'].values)
    df_qf_val, df_qf_test  = train_test_split(df_qf_val_test, test_size=0.2, random_state=1234, stratify=df_qf_val_test['Label'].values)
    print(f'Split for quality factor of {qf}...')
    #print(df_qf_tr['Label'].value_counts())
    #print(df_qf_val['Label'].value_counts())
    #print(df_qf_test['Label'].value_counts())
    print('Shape of train split: ', df_qf_tr.shape)
    print('Shape of valid split: ', df_qf_val.shape)
    print('Shape of val_test split: ', df_qf_test.shape)
    print('*'*35)
    
    #save the splits
    df_qf_tr.to_csv(f'train_split_qf_{qf}.csv', index=False)
    df_qf_val.to_csv(f'valid_split_qf_{qf}.csv', index=False)
    df_qf_test.to_csv(f'test_val_split_qf_{qf}.csv', index=False)

ValueError: ignored

In [0]:
!ls

sample_data


To keep runtime from disconnecting

function ClickConnect(){
console.log("Working"); 
document.querySelector("colab-toolbar-button").click() 
}setInterval(ClickConnect,60000)