In [0]:
#from google.colab import files
#uploaded = files.upload()

**Import dependencies**

In [0]:
import scipy.io as si         # for inputing matlab files
import numpy as np            # Linear Algebra tools
from random import shuffle    # for shuffling dataset
import pandas as pd

**Load Dataset**

We have performed our experiments on the [Indian Pines Dataset.](http://www.ehu.eus/ccwintco/index.php?title=Hyperspectral_Remote_Sensing_Scenes)
We downloaded MATLAB data files.








In [0]:
#mat_x = si.loadmat('Indian_pines.mat')['indian_pines']        # shape 145*145*220
#mat_y = si.loadmat('Indian_pines_gt.mat')['indian_pines_gt']  # shape 145*145

**Mahesh Sir Data**

In [0]:
 
# Read csv file
x_csv = pd.read_csv("/content/data.csv")
y_csv = pd.read_csv("/content/ref.csv")

# Convert dataframe to np array
x_np = np.array(x_csv)
y_np = np.array(y_csv)

# Reshape np_array to original picture
mat_y = y_np.reshape(330,307)
mat_x = np.zeros( (330,307,6) , dtype = np.uint8)

# Make band
for i in range( x_np.shape[1] ):
  band = x_np[:,i]
  band = band.reshape(330,307)
  mat_x[:,:,i] = band
  


In [0]:
# Flatten mat_y for further use 
y = mat_y.flatten('C')   # shape 21025*1     flattened row wise

In [0]:
# To see enteries of each class
#  np.unique(y,return_counts = True)

**Define Global Variables**

In [0]:
HEIGHT = mat_x.shape[0]   # 145
WIDTH = mat_x.shape[1]    # 145
BAND = mat_x.shape[2]     # 220
P_S = 5                   # Patch Size
OUTPUT_CLASSES = 7       # Total Classes including zero
NO_PATCHES = 2000         # No of patches for each class
TEST_FRACTION = 0.25      # 75% training_data & 25% testing_data

**Remove class 0**

Becoz it has large no of examples as compared to other 16 classes so it tend to overdominate in the training.

In [0]:
for i in range(len(y)):
  if(y[i]==0):
    y[i]=OUTPUT_CLASSES
  else:
    y[i] -= 1

**Normalization**

Band Max Normalization adopted for Indian Pines DataSet.
https://arxiv.org/ftp/arxiv/papers/1710/1710.02939.pdf

In [0]:
mat_x = mat_x.astype(float)
for i in range(BAND):
  mat_x[:,:,i] /= np.max(mat_x[:,:,i])

**Perform the Padding Operation for extracting the patches**

In [0]:
"""
padding is useful for extracting patches of corner pixels
"""

pad_width = int( (P_S-1)/2 )
#print(pad_width)
padded_x = np.pad(mat_x,[(pad_width,pad_width),(pad_width,pad_width),(0,0)],'constant')
#print(padded_x.shape)

**Functions  to extract Patches for each class with label as list index**


In [0]:
# Function to extract patche at h_index,w_index 

def patch_at_index(h_index,w_index):
  patch = padded_x[h_index:h_index+P_S,w_index:w_index+P_S,:]
  """
  we need to convert patch into a vector 
  """
  patch = patch.flatten('C')  # Row wise flatten 
  return patch

In [0]:
# Function to extract all the patches from hyperspectral image

def extract_all_patches():
  patches = np.ndarray( shape = (HEIGHT*WIDTH, P_S*P_S*BAND))
  for i in range(HEIGHT):
    for j in range(WIDTH):
      patches[ WIDTH*i + j ] = patch_at_index(i,j)
  return patches

In [0]:
# Function to split each class patches

def split_each_class_patches():
  patches = extract_all_patches()
  classes = []
  for i in range(OUTPUT_CLASSES):
    classes.append([])
  for i in range(len(patches)):
    if( y[i] < OUTPUT_CLASSES ):
      classes[y[i]].append(patches[i])
  return classes  

**Splitting Training and Testing Data Set**

In [0]:
# function to split training and test data

def train_test_split():
  
  classes = split_each_class_patches()
  x_train_class, y_train_class, x_test_class, y_test_class = [],[],[],[]    # Each of these lists will be having 17 lists for each class
  
  for c in range(OUTPUT_CLASSES):
    x_train_class.append([])
    y_train_class.append([])
    x_test_class.append([])
    y_test_class.append([])
  
  for c in range(OUTPUT_CLASSES):
    test_set_size = int( len(classes[c]) * TEST_FRACTION )
    shuffle(classes[c])
    x_test_class[c] += classes[c][0:test_set_size]               # += works becoz both are list, it is exactly same as extend
    x_train_class[c] += classes[c][test_set_size:]
    y_test_class[c].extend( np.full(test_set_size, c, dtype = int) )   # += dont work here becoz y_test[c] is a list while np.full(..) gives a np array
    
    temp = x_train_class[c]
    #print(len(temp))
    for i in range( int( NO_PATCHES / len(x_train_class[c]) ) ):
      x_train_class[c]+=temp
    
    shuffle(x_train_class[c])
    x_train_class[c] = x_train_class[c][0:NO_PATCHES]
    y_train_class[c].extend( np.full( NO_PATCHES, c, dtype = int) )
    
  x_train, y_train, x_test, y_test = [],[],[],[]         # actual  data set should be one list or array of samples*feature size
  
  for c in range(OUTPUT_CLASSES):
    x_train.extend( x_train_class[c] )
    y_train.extend( y_train_class[c] )
    x_test.extend( x_test_class[c] )
    y_test.extend( y_test_class[c] )
    
  
  return np.array(x_train), np.array(y_train), np.array(x_test), np.array(y_test) 
    