## **Hyperspectral image classification on Indian Pines dataset**

This notebook implements the HybridSN architecture to achieve the state of the art on Indian pines dataset with a test accuracy of 98.80%

In [1]:
# %tensorflow_version 2.1
from google.colab import drive
drive.mount('/content/drive')
! pip install spectral
! pip install RMDL;

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive
Collecting spectral
[?25l  Downloading https://files.pythonhosted.org/packages/48/8e/db1d750fb0153027e4e945f91f04b72a3b8b9a0cfdc2c8a33bedcb27740d/spectral-0.20.tar.gz (143kB)
[K     |████████████████████████████████| 153kB 2.8MB/s 
Building wheels for collected packages: spectral
  Building wheel for spectral (setup.py) ... [?25l[?25hdone
  Created wheel for spectral: filename=spectral-0.20-cp36-none-any.whl size=183917 sha256=cccf957985635fa5f6c7c3fd3c3e7

In [5]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import Conv2D, Conv3D, Flatten, Dense, Reshape, BatchNormalization
from tensorflow.keras.layers import Dropout, Input, LeakyReLU, AveragePooling3D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import to_categorical

from RMDL import RMDL_Image as RMDL

from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, cohen_kappa_score

from plotly.offline import init_notebook_mode

from operator import truediv

import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
import os
import spectral

init_notebook_mode(connected=True)
%matplotlib inline

In [6]:
print(tf.__version__)

1.15.0


#### Data loading

In [0]:
# Global Variables
dataset = 'IP'
test_ratio = 0.5
windowSize = 25

In [0]:
def loadData():
    data_path = os.path.join(os.getcwd(), 'Dataset')
    data = sio.loadmat('/content/drive/My Drive/Colab Notebooks/Datasets/Indian_Pines/Indian_pines_corrected.mat')['indian_pines_corrected']
    labels = sio.loadmat('/content/drive/My Drive/Colab Notebooks/Datasets/Indian_Pines/Indian_pines_gt.mat')['indian_pines_gt']
    return data, labels

In [0]:
def splitTrainTestSet(X, y, testRatio, randomState=0):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = testRatio,
                                                       random_state=randomState,
                                                       stratify=y)
    return X_train, X_test, y_train, y_test
"""
This stratify parameter makes a split so that the proportion of values in the sample 
produced will be the same as the proportion of values provided to parameter stratify.

For example, if variable y is a binary categorical variable with values 0 and 1 and 
there are 25% of zeros and 75% of ones, stratify=y will make sure that your random 
split has 25% of 0's and 75% of 1's.
""";

In [0]:
def applyPCA(X, numComponents=75):
    newX = np.reshape(X, (-1, X.shape[2]))
    pca = PCA(n_components=numComponents, whiten=True)
    newX = pca.fit_transform(newX)
    newX = np.reshape(newX, (X.shape[0], X.shape[1], numComponents))
    return newX, pca
"""
When True (False by default) the components_ vectors are multiplied by the square 
root of n_samples and then divided by the singular values to ensure uncorrelated 
outputs with unit component-wise variances.

Whitening will remove some information from the transformed signal (the relative 
variance scales of the components) but can sometime improve the predictive accuracy 
of the downstream estimators by making their data respect some hard-wired assumptions.
""";

In [0]:
def padWithZeros(X, margin=2):
    newX = np.zeros((X.shape[0] + 2 * margin, X.shape[1] + 2*margin, X.shape[2]))
    x_offset = margin
    y_offset = margin
    newX[x_offset:X.shape[0] + x_offset, y_offset: X.shape[1] + y_offset, :] = X
    return newX

In [0]:
def createImageCubes(X, y, windowSize=5, removeZeroLabels=True):
    margin = int((windowSize-1)/2)
    zeroPaddedX = padWithZeros(X, margin=margin)
    
    # Split patches
    print("X.shape", X.shape)
    patchesData = np.zeros((X.shape[0]*X.shape[1], windowSize, windowSize, X.shape[2]))
    patchesLabels = np.zeros((X.shape[0] * X.shape[1]))
    patchIndex = 0
    
    for r in range(margin, zeroPaddedX.shape[0] - margin):
        for c in range(margin, zeroPaddedX.shape[1] - margin):
            patch = zeroPaddedX[r - margin: r + margin + 1, c - margin:c + margin + 1]
            patchesData[patchIndex, :, :, :] = patch
            patchesLabels[patchIndex] = y[r-margin, c-margin]
            patchIndex += 1
            
    if removeZeroLabels:
        patchesData = patchesData[patchesLabels > 0, :, :, :]
        patchesLabels = patchesLabels[patchesLabels > 0]
        patchesLabels -= 1
    
    return patchesData, patchesLabels

In [13]:
X, y = loadData()

X.shape, y.shape

((145, 145, 200), (145, 145))

In [0]:
K = X.shape[2] # Number of bands

In [15]:
# We'll apply PCA to get only 30 components
K = 30
X, pca = applyPCA(X, numComponents = K)
X.shape

(145, 145, 30)

In [16]:
X, y = createImageCubes(X, y, windowSize = windowSize)
X.shape, y.shape
X = X.astype('float32')
y = y.astype('uint8')
# Its picking 25x25 grid size, which i feel is a little too large.

X.shape (145, 145, 30)


In [0]:
# Inference: Thre are a total of 145x145 = 21,025 pixels, and none is lost
# during creation of patches due to padding.
# We lose (21025 - 10249 = 10,776) pixels as they were 0, meaning unclassified.

In [18]:
X = np.interp(X, (X.min(), X.max()), (0, +1))
X_train, X_test, y_train, y_test = splitTrainTestSet(X, y, test_ratio)
print("X_train shape: ", X_train.shape)
print("X_test shape: ", X_test.shape)
print("y_train shape: ", y_train.shape)
print("y_test shape: ", y_test.shape)
# Not sure why the train test split is so large

X_train shape:  (5124, 25, 25, 30)
X_test shape:  (5125, 25, 25, 30)
y_train shape:  (5124,)
y_test shape:  (5125,)


In [19]:
# y_train = to_categorical(y_train)  # One hot encoding basically
y_train.shape
# y_test = to_categorical(y_test)
y_test.shape

(5125,)

In [20]:
X_test.shape

(5125, 25, 25, 30)

In [21]:
X_train = X_train.reshape(-1, windowSize, windowSize, K)
X_train.shape
X_test = X_test.reshape(-1, windowSize, windowSize, K)
X_test.shape

(5125, 25, 25, 30)

In [0]:
num_of_classes = 16

In [0]:
shape = (25, 25, K)
batch_size = 64

In [0]:
sparse_categorical = True
n_epochs = [100,100,100]
random_deep = [3,3,3]

In [26]:
y_train.dtype

dtype('uint8')

In [0]:
RMDL.Image_Classification(X_train, y_train, X_test, y_test,shape,
                     batch_size=batch_size,
                     sparse_categorical=True,
                     random_deep=random_deep,
                     epochs=n_epochs)

DNN  0 


(25, 25, 30)



Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
<keras.optimizers.Adagrad object at 0x7fa8d9bdef60>


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Train on 5124 samples, validate on 5125 samples
Epoch 1/100





 - 10s - loss: 12.1604 - acc: 0.2358 - val_loss: 12.2560 - val_acc: 0.2396

Epoch 00001: val_acc improved from -inf to 0.23961, saving model to weights\weights_DNN_0.hdf5
Epoch 2/100
 - 3s - loss: 12.2584 - acc: 0.2395 - val_loss: 12.2560 - val_acc: 0.2396

Epoch 00002: val_acc did not improve from 0.23961
Epoch 3/100
 - 3s - loss: 12.2584 - acc: 0.2395 - val_loss: 12.2560 - val_acc: 0.2396

Epoch 00003: val_acc did not improve from 0.23961
Epoch 4/100
 - 3s - loss: 12.2584 - acc: 0.2395 - val_loss: 12.2560 - val_acc: 0.2396

Epoch 00004: val_acc did not improve from 0.23961
Epoch 5/100
 - 3s - loss: 12.2584 - acc: 0.2395 - val_loss: 1

In [0]:
# Inference: Only cnn worked well, all others failed. Hence, it didnt make any difference