In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import warnings
warnings.filterwarnings('ignore')

import os
from os import listdir
from os.path import isfile, join

from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from scipy.interpolate import interp1d
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.cluster import OPTICS
from sklearn.mixture import BayesianGaussianMixture 

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Input, Reshape, Dense, BatchNormalization
from tensorflow.keras.layers import Activation, LeakyReLU

import keras_radam
from keras_radam import RAdam
from tensorflow.keras.utils import plot_model

import numpy as np
import joblib 
import matplotlib.pyplot as plt

from CRISMrelatedMethods.preprocessing import *
from CRISMrelatedMethods.dataRead import *
from CRISMrelatedMethods.score import *

In [3]:
augmentedData_X=joblib.load('AugmentedPrep_X.z')
augmentedData_Y=joblib.load('AugmentedPrep_Y.z')
labelledData_X=joblib.load('plebaniMICA_testPrep_X.z')
labelledData_Y=joblib.load('plebaniMICA_testPrep_Y.z')

------------------------------------------------------------------------------------------------------------------
Ordering Points To Identify Clustering Structure (OPTICS)

In [53]:
plebaniMICA_X_smCRSS_copy,plebaniMICA_Y_copy=np.copy(labelledData_X['crsmCRSS']),np.copy(labelledData_Y)
plebaniMICA_X_smCRSS_copy,plebaniMICA_Y_copy=shuffle_in_unison(plebaniMICA_X_smCRSS_copy,plebaniMICA_Y_copy)
noOfFolds=8
individual_mineral,mineral_group,groupwisePerf=[],[],{i:[] for i in range(len(mineralGroupNames))}
for f in range(noOfFolds):
    testX=plebaniMICA_X_smCRSS_copy[(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*f:(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*(f+1),:]
    testY=plebaniMICA_Y_copy[(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*f:(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*(f+1)]

    clustering = OPTICS(min_samples=.01, algorithm='brute',xi=.001,metric='correlation',min_cluster_size=.01).fit(testX)
    labels=clustering.labels_

    labels-=np.min(labels)
    labelPairing= getLabelPairing(labels, testY)
    c_label=np.array([labelPairing[labels[l]] for l in range(labels.shape[0])])
    
    individual_mineral.append(np.sum(np.where(c_label==testY,True,False))/testY.shape[0])
    mineral_group.append(np.sum(np.where(getGroupPred(c_label)==getGroupPred(testY),True,False))/testY.shape[0])
    
    G=getClassWiseAccuracy(getGroupPred(c_label),getGroupPred(testY))
    for g in G:
        groupwisePerf[g].append(G[g])
    
print('mineral_group',np.round(np.mean(mineral_group),4),np.round(np.std(mineral_group),3))
print('groupwisePerf',[(mineralGroupNames[g],np.round(np.mean(groupwisePerf[g]),4),np.round(np.std(groupwisePerf[g]),3)) for g in groupwisePerf])

mineral_group 0.6561 0.054
groupwisePerf [('Iron oxides and primary silicates', 0.688, 0.069), ('Ices', 0.8512, 0.107), ('Sulfates', 0.6848, 0.176), ('Phyllosilicates', 0.7121, 0.307), ('Carbonates', 0.3124, 0.123), ('Hydrated silicates and halides', 0.5651, 0.16)]


------------------------------------------------------------------------------------------------------------------
linear discriminant analysis (LDA)

In [47]:
plebaniMICA_X_smCRSS_copy,plebaniMICA_Y_copy=np.copy(labelledData_X['crsmCRSS']),np.copy(labelledData_Y)
plebaniMICA_X_smCRSS_copy,plebaniMICA_Y_copy=shuffle_in_unison(plebaniMICA_X_smCRSS_copy,plebaniMICA_Y_copy)
noOfFolds=10
individual_mineral,mineral_group,groupwisePerf=[],[],{i:[] for i in range(len(mineralGroupNames))}
for f in range(noOfFolds):
    testX=plebaniMICA_X_smCRSS_copy[(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*f:(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*(f+1),:]
    testY=plebaniMICA_Y_copy[(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*f:(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*(f+1)]

    X_train,X_test,y_train,y_test = train_test_split(testX,testY,test_size=0.3,random_state=20)
    lda = LDA(shrinkage=1,solver='lsqr').fit(X_train, y_train)

    y_=lda.predict(X_test)
    
    individual_mineral.append(np.sum(np.where(y_==y_test,True,False))/y_test.shape[0])
    mineral_group.append(np.sum(np.where(getGroupPred(y_)==getGroupPred(y_test),True,False))/y_test.shape[0])
    
    G=getClassWiseAccuracy(getGroupPred(y_),getGroupPred(y_test))
    for g in G:
        groupwisePerf[g].append(G[g])
    
print('mineral_group',np.round(np.mean(mineral_group),4),np.round(np.std(mineral_group),3))
print('groupwisePerf',[(mineralGroupNames[g],np.round(np.mean(groupwisePerf[g]),4),np.round(np.std(groupwisePerf[g]),3)) for g in groupwisePerf])

mineral_group 0.8315 0.029
groupwisePerf [('Iron oxides and primary silicates', 0.8917, 0.039), ('Ices', 0.9431, 0.076), ('Sulfates', 0.851, 0.051), ('Phyllosilicates', 0.8108, 0.076), ('Carbonates', 0.6954, 0.18), ('Hydrated silicates and halides', 0.7991, 0.055)]


------------------------------------------------------------------------------------------------------------------
Bayesian Gaussian Dirichlet Process Mixture (DPM)

In [5]:
plebaniMICA_X_smCRSS_copy,plebaniMICA_Y_copy=np.copy(labelledData_X['crsmCRSS']),np.copy(labelledData_Y)
plebaniMICA_X_smCRSS_copy,plebaniMICA_Y_copy=shuffle_in_unison(plebaniMICA_X_smCRSS_copy,plebaniMICA_Y_copy)
noOfFolds=10
individual_mineral,mineral_group,groupwisePerf=[],[],{i:[] for i in range(len(mineralGroupNames))}
for f in range(noOfFolds):
    testX=plebaniMICA_X_smCRSS_copy[(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*f:(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*(f+1),:]
    testY=plebaniMICA_Y_copy[(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*f:(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*(f+1)]
    
    pca=PCA(.95)
    pca.fit(testX)

    X_train,X_test,y_train,y_test = train_test_split(testX,testY,test_size=0.2) #,random_state=20
    dpm = BayesianGaussianMixture(weight_concentration_prior_type='dirichlet_process',n_components=np.unique(y_train).shape[0], random_state=1,warm_start=True).fit(pca.transform(X_train), y_train)

    y_=dpm.predict(pca.transform(X_test))
    labelPairing= getLabelPairing(y_, y_test)
    c_label=np.array([labelPairing[y_[l]] for l in range(y_.shape[0])])

    individual_mineral.append(np.sum(np.where(c_label==y_test,True,False))/y_test.shape[0])
    mineral_group.append(np.sum(np.where(getGroupPred(c_label)==getGroupPred(y_test),True,False))/y_test.shape[0])
    
    G=getClassWiseAccuracy(getGroupPred(c_label),getGroupPred(y_test))
    for g in G:
        groupwisePerf[g].append(G[g])
    
print('mineral_group',np.round(np.mean(mineral_group),4),np.round(np.std(mineral_group),3))
print('groupwisePerf',[(mineralGroupNames[g],np.round(np.mean(groupwisePerf[g]),4),np.round(np.std(groupwisePerf[g]),3)) for g in groupwisePerf])

mineral_group 0.7143 0.031
groupwisePerf [('Iron oxides and primary silicates', 0.778, 0.096), ('Ices', 0.8421, 0.165), ('Sulfates', 0.7607, 0.088), ('Phyllosilicates', 0.6981, 0.209), ('Carbonates', 0.5212, 0.2), ('Hydrated silicates and halides', 0.6524, 0.163)]


------------------------------------------------------------------------------------------------------------------
CNN

In [5]:
warnings.filterwarnings('ignore')

plebaniMICA_X_smCRSSFE_copy,plebaniMICA_Y_copy=np.copy(labelledData_X['crsmCRSS']),np.copy(labelledData_Y)
plebaniMICA_X_smCRSSFE_copy,plebaniMICA_Y_copy=shuffle_in_unison(plebaniMICA_X_smCRSSFE_copy,plebaniMICA_Y_copy)
noOfFolds=10

model=load_model('models/CNN_crsmCRSS.h5', custom_objects={'RAdam': RAdam})
    
individual_mineral,mineral_group,groupwisePerf=[],[],{i:[] for i in range(len(mineralGroupNames))}
for f in range(noOfFolds):
    testX=np.expand_dims(plebaniMICA_X_smCRSSFE_copy[(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*f:(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*(f+1),:],axis=2)
    testY=plebaniMICA_Y_copy[(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*f:(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*(f+1)]

    Ppredict=(model.predict(testX))
    Plabel=np.array([mineralIndexMap[np.argmax(Ppredict[p])] for p in range(Ppredict.shape[0])])    
    
    individual_mineral.append(np.sum(np.where(Plabel==testY,True,False))/testY.shape[0])
    mineral_group.append(np.sum(np.where(getGroupPred(Plabel)==getGroupPred(testY),True,False))/testY.shape[0])
    
    G=getClassWiseAccuracy(getGroupPred(Plabel),getGroupPred(testY))
    for g in G:
        groupwisePerf[g].append(G[g])
        
print('mineral_group',np.round(np.mean(mineral_group),4),np.round(np.std(mineral_group),3))
print('groupwisePerf',[(mineralGroupNames[g],np.round(np.mean(groupwisePerf[g]),4),np.round(np.std(groupwisePerf[g]),3)) for g in groupwisePerf])

mineral_group 0.8162 0.014
groupwisePerf [('Iron oxides and primary silicates', 0.8542, 0.047), ('Ices', 0.9757, 0.043), ('Sulfates', 1.0194, 0.022), ('Phyllosilicates', 0.7163, 0.035), ('Carbonates', 0.7716, 0.07), ('Hydrated silicates and halides', 0.5889, 0.042)]


------------------------------------------------------------------------------------------------------------------
ANN

In [14]:
plebaniMICA_X_smCRSSFE_copy,plebaniMICA_Y_copy=np.copy(labelledData_X['crsmCRSSFE']),np.copy(labelledData_Y)
plebaniMICA_X_smCRSSFE_copy,plebaniMICA_Y_copy=shuffle_in_unison(plebaniMICA_X_smCRSSFE_copy,plebaniMICA_Y_copy)
noOfFolds=10

model=load_model('models/ANN_crsmCRSSFE.h5', custom_objects={'RAdam': RAdam})
    
individual_mineral,mineral_group,groupwisePerf=[],[],{i:[] for i in range(len(mineralGroupNames))}
for f in range(noOfFolds):
    testX=plebaniMICA_X_smCRSSFE_copy[(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*f:(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*(f+1),:]
    testY=plebaniMICA_Y_copy[(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*f:(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*(f+1)]

    Ppredict=(model.predict(testX))
    Plabel=np.array([mineralIndexMap[np.argmax(Ppredict[p])] for p in range(Ppredict.shape[0])])    
    
    individual_mineral.append(np.sum(np.where(Plabel==testY,True,False))/testY.shape[0])
    mineral_group.append(np.sum(np.where(getGroupPred(Plabel)==getGroupPred(testY),True,False))/testY.shape[0])
    
    G=getClassWiseAccuracy(getGroupPred(Plabel),getGroupPred(testY))
    for g in G:
        groupwisePerf[g].append(G[g])
        
print('mineral_group',np.round(np.mean(mineral_group),4),np.round(np.std(mineral_group),3))
print('groupwisePerf',[(mineralGroupNames[g],np.round(np.mean(groupwisePerf[g]),4),np.round(np.std(groupwisePerf[g]),3)) for g in groupwisePerf])

mineral_group 0.8225 0.021
groupwisePerf [('Iron oxides and primary silicates', 0.8282, 0.035), ('Ices', 0.9029, 0.056), ('Sulfates', 0.9422, 0.021), ('Phyllosilicates', 0.8926, 0.026), ('Carbonates', 0.6339, 0.067), ('Hydrated silicates and halides', 0.5942, 0.072)]


------------------------------------------------------------------------------------------------------------------
RFC

In [35]:
plebaniMICA_X_smCRSSFE_copy,plebaniMICA_Y_copy=np.copy(labelledData_X['crsmCRSSFE']),np.copy(labelledData_Y)
plebaniMICA_X_smCRSSFE_copy,plebaniMICA_Y_copy=shuffle_in_unison(plebaniMICA_X_smCRSSFE_copy,plebaniMICA_Y_copy)
noOfFolds=10

model=joblib.load('models/RFC_crsmCRSSFE.z')
    
individual_mineral,mineral_group,groupwisePerf=[],[],{i:[] for i in range(len(mineralGroupNames))}
for f in range(noOfFolds):
    testX=plebaniMICA_X_smCRSSFE_copy[(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*f:(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*(f+1),:]
    testY=plebaniMICA_Y_copy[(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*f:(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*(f+1)]
  
    Plabel=(model.predict(testX))
    
    individual_mineral.append(np.sum(np.where(Plabel==testY,True,False))/testY.shape[0])
    mineral_group.append(np.sum(np.where(getGroupPred(Plabel)==getGroupPred(testY),True,False))/testY.shape[0])
    
    G=getClassWiseAccuracy(getGroupPred(Plabel),getGroupPred(testY))
    for g in G:
        groupwisePerf[g].append(G[g])
        
print('mineral_group',np.round(np.mean(mineral_group),4),np.round(np.std(mineral_group),3))
print('groupwisePerf',[(mineralGroupNames[g],np.round(np.mean(groupwisePerf[g]),4),np.round(np.std(groupwisePerf[g]),3)) for g in groupwisePerf])

mineral_group 0.7967 0.008
groupwisePerf [('Iron oxides and primary silicates', 0.817, 0.027), ('Ices', 0.5437, 0.018), ('Sulfates', 0.9625, 0.008), ('Phyllosilicates', 0.886, 0.012), ('Carbonates', 0.645, 0.091), ('Hydrated silicates and halides', 0.58, 0.056)]


------------------------------------------------------------------------------------------------------------------
SVC

In [32]:
plebaniMICA_X_smCRSSFE_copy,plebaniMICA_Y_copy=np.copy(labelledData_X['crsmCRSSFE']),np.copy(labelledData_Y)
plebaniMICA_X_smCRSSFE_copy,plebaniMICA_Y_copy=shuffle_in_unison(plebaniMICA_X_smCRSSFE_copy,plebaniMICA_Y_copy)
noOfFolds=10

pca=joblib.load('models/PCA_crsmCRSSFE.z')
model=joblib.load('models/SVC_crsmCRSSFE.z')
    
individual_mineral,mineral_group,groupwisePerf=[],[],{i:[] for i in range(len(mineralGroupNames))}
for f in range(noOfFolds):
    testX=pca.transform(plebaniMICA_X_smCRSSFE_copy[(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*f:(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*(f+1),:])
    testY=plebaniMICA_Y_copy[(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*f:(int(plebaniMICA_Y_copy.shape[0]/noOfFolds))*(f+1)]
  
    Plabel=(model.predict(testX))
    
    individual_mineral.append(np.sum(np.where(Plabel==testY,True,False))/testY.shape[0])
    mineral_group.append(np.sum(np.where(getGroupPred(Plabel)==getGroupPred(testY),True,False))/testY.shape[0])
    
    G=getClassWiseAccuracy(getGroupPred(Plabel),getGroupPred(testY))
    for g in G:
        groupwisePerf[g].append(G[g])
        
print('mineral_group',np.round(np.mean(mineral_group),4),np.round(np.std(mineral_group),3))
print('groupwisePerf',[(mineralGroupNames[g],np.round(np.mean(groupwisePerf[g]),4),np.round(np.std(groupwisePerf[g]),3)) for g in groupwisePerf])

mineral_group 0.8039 0.019
groupwisePerf [('Iron oxides and primary silicates', 0.8112, 0.049), ('Ices', 0.8152, 0.047), ('Sulfates', 0.9497, 0.018), ('Phyllosilicates', 0.9375, 0.022), ('Carbonates', 0.422, 0.055), ('Hydrated silicates and halides', 0.5532, 0.033)]
