In [1]:
from scipy.io import loadmat
import numpy as np
import pandas as pd
from time import time
import logging
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import fetch_lfw_people
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

print(__doc__)

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')

Automatically created module for IPython interactive environment


In [2]:
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)

In [3]:
# load mat file
mat = loadmat('AV_org_GRSL_Baily.mat')

In [4]:
# mat

## load data

In [5]:
X_train = mat['train']
X_test = mat['test']
y_train = mat['train_label']
y_test = mat['test_label']

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((1121, 220), (1121, 1), (1006, 220), (1006, 1))

## Data concatenation : train + test = Data

In [6]:
Data = np.concatenate((X_train, X_test), axis = 0)
label = np.concatenate((y_train, y_test), axis = 0)


Data.shape, label.shape

((2127, 220), (2127, 1))

## Divide data into three segement based on Spectral Wavelength

In [7]:
D1 = Data[:, :32]
D2 = Data[:, 32:72]
D3 = Data[:, 72:]

D1.shape, D2.shape, D3.shape

((2127, 32), (2127, 40), (2127, 148))

## PCA: Data

In [8]:
n_components = 220

t0 = time()
pca = PCA(n_components=n_components, svd_solver='randomized',
          whiten=True).fit(Data)
print("done in %0.3fs" % (time() - t0))



t0 = time()
Data_pca = pca.transform(Data)
print("Shape of Data PCA is: {}".format(Data_pca.shape))
print("done in %0.3fs" % (time() - t0))

done in 0.232s
Shape of Data PCA is: (2127, 220)
done in 0.012s


## PCA: D1

In [9]:
n_components = 9

t0 = time()
pca = PCA(n_components=n_components, svd_solver='randomized',
          whiten=True).fit(D1)
print("done in %0.3fs" % (time() - t0))



t0 = time()
D1_pca = pca.transform(D1)
print("Shape of D1 PCA is: {}".format(D1_pca.shape))
print("done in %0.3fs" % (time() - t0))

done in 0.009s
Shape of D1 PCA is: (2127, 9)
done in 0.001s


## PCA: D2

In [10]:
n_components = 6

t0 = time()
pca = PCA(n_components=n_components, svd_solver='randomized',
          whiten=True).fit(D2)
print("done in %0.3fs" % (time() - t0))



t0 = time()
D2_pca = pca.transform(D2)
print("Shape of D2 PCA is: {}".format(D2_pca.shape))
print("done in %0.3fs" % (time() - t0))

done in 0.008s
Shape of D2 PCA is: (2127, 6)
done in 0.001s


## PCA D3

In [11]:
n_components = 5

t0 = time()
pca = PCA(n_components=n_components, svd_solver='randomized',
          whiten=True).fit(D3)
print("done in %0.3fs" % (time() - t0))



t0 = time()
D3_pca = pca.transform(D3)
print("Shape of D3 PCA is: {}".format(D3_pca.shape))
print("done in %0.3fs" % (time() - t0))

done in 0.025s
Shape of D3 PCA is: (2127, 5)
done in 0.003s


# concatenate all pca: D1+D2+D3 = Spectrally segement_pca

In [12]:
S_segement_pca = np.concatenate((D1_pca, D2_pca, D3_pca), axis = 1)

print("Shape of segement_pca PCA is: {}".format(S_segement_pca.shape))

Shape of segement_pca PCA is: (2127, 20)


# SVM On segement_pca

## train test split

In [13]:
from sklearn.model_selection import train_test_split

X_train_pca, X_test_pca, y_train, y_test = train_test_split(S_segement_pca, label, test_size = 0.46, random_state=121)

X_train_pca.shape, X_test_pca.shape, y_train.shape, y_test.shape

((1148, 20), (979, 20), (1148, 1), (979, 1))

## Train svm on GridSearch approach

In [14]:
#############################################################################
#Train a SVM classification model

print("Fitting the classifier to the training set")
t0 = time()
param_grid = {'C': [1e3, 1e4],
              'gamma': [0.0001,  0.001], }
clf = GridSearchCV(
    SVC(kernel='rbf', class_weight='balanced'), param_grid, cv=5
)
clf = clf.fit(X_train_pca, y_train)
print("done in %0.3fs" % (time() - t0))
print("Best estimator found by grid search:")
print(clf.best_estimator_)

Fitting the classifier to the training set
done in 0.497s
Best estimator found by grid search:
SVC(C=10000.0, class_weight='balanced', gamma=0.001)


## Test the model

In [15]:

# #############################################################################
# Quantitative evaluation of the model quality on the test set

print("Predicting HSI on the test set")
t0 = time()
y_pred = clf.predict(X_test_pca)
print("done in %0.3fs" % (time() - t0))


label_names = ['Alfalfa', 'Corn-notill', 'Corn-mintill', 'Corn', 'Grass-pasture', 'Grass-trees', 'Grass-pasture-mowed', 'Hay-windrowed', 'Oats', 'Soybean-notill', 'Soybean-mintill', 
              'Soybean-clean', 'Wheat', 'Wood', ]
print(classification_report(y_test, y_pred, target_names=label_names))
print(confusion_matrix(y_test, y_pred, labels=range(14)))

Predicting HSI on the test set
done in 0.018s
                     precision    recall  f1-score   support

            Alfalfa       0.99      1.00      1.00       132
        Corn-notill       0.98      0.91      0.94        93
       Corn-mintill       0.99      0.99      0.99       251
               Corn       1.00      1.00      1.00        39
      Grass-pasture       1.00      1.00      1.00        86
        Grass-trees       0.94      0.99      0.96       125
Grass-pasture-mowed       1.00      1.00      1.00        83
      Hay-windrowed       1.00      1.00      1.00        44
               Oats       0.90      0.97      0.94        39
     Soybean-notill       1.00      0.79      0.88        19
    Soybean-mintill       1.00      1.00      1.00        25
      Soybean-clean       1.00      0.93      0.96        14
              Wheat       1.00      1.00      1.00        12
               Wood       0.82      0.82      0.82        17

           accuracy                  

## Training and Cross Validation

In [16]:
# Train test split
X_train_pca, X_test_pca, y_train, y_test = train_test_split(S_segement_pca, label, test_size = 0.2, random_state=121)

X_train_pca.shape, X_test_pca.shape, y_train.shape, y_test.shape

((1701, 20), (426, 20), (1701, 1), (426, 1))

In [17]:

clf = SVC(C=10000.0, break_ties=False, cache_size=200, class_weight='balanced',
    coef0=0.0, decision_function_shape='ovr', degree=3, gamma=0.001,
    kernel='rbf', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)


all_accuracies = cross_val_score(estimator=clf, X=X_train_pca, y=y_train, cv=5)

In [18]:
print(all_accuracies)

[0.98240469 0.98823529 0.98235294 0.97352941 0.99411765]
