In [1]:
# %pip install scikit-optimize
import warnings
warnings.filterwarnings('ignore')

from scipy.io import loadmat
import umap

from skopt.space import Real, Categorical, Integer
from skopt.utils import use_named_args
from skopt import gp_minimize
from sklearn import cluster
from sklearn.manifold import TSNE, Isomap, SpectralEmbedding, LocallyLinearEmbedding
from sklearn.metrics import v_measure_score

from sklearn.preprocessing import minmax_scale

In [2]:
# import the data
data_array = loadmat('./Indian_pines_corrected.mat')['indian_pines_corrected'] # Hyperspectral data
gt = loadmat('./Indian_pines_gt.mat')['indian_pines_gt'] # Ground truth data

data_reshaped = data_array.reshape(data_array.shape[0]*data_array.shape[1], -1)
data_reshaped = minmax_scale(data_reshaped, feature_range=(0, 1), axis=0, copy=False)
gt_reshaped = gt.flatten()



In [None]:
# define the model
model = TSNE(random_state=1,n_components=2)
# The hyperparameter space which we would be using to search
search_space = [Integer(10, 50, name='perplexity'),Integer(250, 1000, name='n_iter')]
clusterer = cluster.KMeans(n_clusters=17, random_state=1) # create an object of the classifier

# The function which would be used to evaluate the given configuration
@use_named_args(search_space)
def evaluate_model(**params):
	#Setting the parameters of the model
	model.set_params(**params)
	# calculate 5-fold cross validation
	selected_components_tsne = model.fit_transform(data_reshaped)
	labels = clusterer.fit_predict(selected_components_tsne)
	result= v_measure_score(gt_reshaped, labels)
	return 1-result


# perform optimization
result = gp_minimize(evaluate_model, search_space,n_jobs=-1,n_calls=100)
# summarizing finding:
print('Best Accuracy: %.3f' % (1.0 - result.fun))
print('Best Parameters: perplexity=%d, n_iter=%d' % (result.x[0], result.x[1]))

In [3]:
model = Isomap(n_components=2)
# The hyperparameter space which we would be using to search
search_space = [Integer(1, 100, name='n_neighbors')]
clusterer = cluster.KMeans(n_clusters=17, random_state=1) # create an object of the classifier
count=0
# The function which would be used to evaluate the given configuration
@use_named_args(search_space)
def evaluate_model(**params):
	global count
	count+=1
	#Setting the parameters of the model
	model.set_params(**params)
	# calculate 5-fold cross validation
	selected_components_isomap = model.fit_transform(data_reshaped)
	labels = clusterer.fit_predict(selected_components_isomap)
	result= v_measure_score(gt_reshaped, labels)
	print(count,"percent done",result)
	return 1-result

# perform optimization
result = gp_minimize(evaluate_model, search_space,n_jobs=-1,n_calls=100)
# summarizing finding:
print('Best Accuracy: %.3f' % (1.0 - result.fun))
print('Best Parameters: n_neighbours=%d' % (result.x[0]))

In [None]:
model = SpectralEmbedding(n_jobs=-1,affinity='nearest_neighbors',n_components=2,random_state=1)
# The hyperparameter space which we would be using to search
search_space = [Integer(1, 100, name='n_neighbors')]
clusterer = cluster.KMeans(n_clusters=17, random_state=1) # create an object of the classifier
count=0
# The function which would be used to evaluate the given configuration
@use_named_args(search_space)
def evaluate_model(**params):
	global count
	count+=1
	#Setting the parameters of the model
	model.set_params(**params)
	# calculate 5-fold cross validation
	selected_components_spec_embed = model.fit_transform(data_reshaped)
	labels = clusterer.fit_predict(selected_components_spec_embed)
	result= v_measure_score(gt_reshaped, labels)
	print(count,"percent done",result)
	return 1-result

# perform optimization
result = gp_minimize(evaluate_model, search_space,n_jobs=-1,n_calls=100)
# summarizing finding:
print('Best Accuracy: %.3f' % (1.0 - result.fun))
print('Best Parameters: n_neighbours=%d' % (result.x[0]))

In [None]:
model = LocallyLinearEmbedding(method='modified', n_jobs=-1, random_state=1,n_components=2)                                                        
# The hyperparameter space which we would be using to search
search_space = [Integer(2, 20, name='n_neighbors'),Real(0.0001, 0.01, name='reg'),Integer(50,200, name='max_iter')]
clusterer = cluster.KMeans(n_clusters=17, random_state=1) # create an object of the classifier
count=0
# The function which would be used to evaluate the given configuration
@use_named_args(search_space)
def evaluate_model(**params):
    global count
    count+=1
    try:
        model.set_params(**params)
        # calculate 5-fold cross validation
        selected_components_tsne = model.fit_transform(data_reshaped)
        # print("Got selected compoonents")
        labels = clusterer.fit_predict(selected_components_tsne) # train
        # print("Got labels")
        result= v_measure_score(gt_reshaped, labels)
        print(count,"percent done",result)
        return 1-result
    except:
        return 1


# perform optimization
result = gp_minimize(evaluate_model, search_space,n_jobs=-1,n_calls=100)
# summarizing finding:
print('Best Accuracy: %.3f' % (1.0 - result.fun))
print('Best Parameters: n_neighbours=%d,reg={}, max_iter=%d ' %  (result.x[0], result.x[1],result.x[2]))

In [None]:
cat=[ 'euclidean' ,'manhattan' , 'chebyshev' , 'minkowski' , 'canberra' , 'braycurtis' , 'mahalanobis' , 'wminkowski']
model = umap.UMAP(n_jobs=-1,random_state=1,n_components=2)
# The hyperparameter space which we would be using to search
search_space = [Integer(3, 20, name='n_neighbors'),Real(0.001, 0.5, name='min_dist'),Real(0.5, 3, name='spread'),Categorical(cat,name='metric')]
clusterer = cluster.KMeans(n_clusters=17, random_state=1) # create an object of the classifier
count=0
# The function which would be used to evaluate the given configuration
@use_named_args(search_space)
def evaluate_model(**params):
	global count
	count+=1
	try:
		#Setting the parameters of the model
		model.set_params(**params)
		# calculate 5-fold cross validation
		selected_components_tsne = model.fit_transform(data_reshaped)
		# print("Got selected compoonents")
		labels = clusterer.fit_predict(selected_components_tsne) # train
		# print("Got labels")
		result= v_measure_score(gt_reshaped, labels)
		print(count,"percent done",result)
		return 1-result
	except:
		return 1

# perform optimization
result = gp_minimize(evaluate_model, search_space,n_jobs=-1,n_calls=100)
# summarizing finding:
print('Best Accuracy: %.3f' % (1.0 - result.fun))
print('Best Parameters: n_neighbours=%d,min_dist=%d, spread=%d, metric=%d ' %  (result.x[0], result.x[1],result.x[2], result.x[3]))