In [113]:
# Authors: Dr. Min Wang, Noah Gardner

In [114]:
from pathlib import Path

import numpy as np
import scipy.io as sio
from fcmeans import FCM
from scipy.optimize import linear_sum_assignment
from sklearn.metrics import accuracy_score, cluster
from sklearn.metrics import adjusted_rand_score
from sklearn.model_selection import train_test_split

In [115]:
from juliacall import Main as jl
jl.seval('include("./datasets.jl")')
jl.seval('ENV["DATADEPS_ALWAYS_ACCEPT"] = "true"')

│   name = "IndianPinesData"
└ @ DataDeps ~/.julia/packages/DataDeps/ooWXe/src/registration.jl:15
│   name = "IndianPinesLabels"
└ @ DataDeps ~/.julia/packages/DataDeps/ooWXe/src/registration.jl:15
│   name = "WashingtonMall"
└ @ DataDeps ~/.julia/packages/DataDeps/ooWXe/src/registration.jl:15


'true'

In [116]:
# Constants
# number of experiments
noexp = 3

# number of classes
labellist = [2, 5, 10, 11]
L = len(labellist)

RANDOM_STATE = int(np.random.uniform()*100)
TEST_SIZE = 0.2
print('random_state:', RANDOM_STATE)

random_state: 77


In [117]:
ip_data_path = Path(jl.seval('datadep"IndianPinesData"')) / 'Indian_pines.mat'
ip_labels_path = Path(jl.seval('datadep"IndianPinesLabels"')) / 'Indian_pines_gt.mat'
ip_data = sio.loadmat(ip_data_path)['indian_pines']
ip_labels = sio.loadmat(ip_labels_path)['indian_pines_gt']

ip_data = ip_data.reshape(ip_data.shape[0] * ip_data.shape[1], ip_data.shape[2])
ip_labels = ip_labels.reshape(ip_labels.shape[0] * ip_labels.shape[1])

In [118]:
idx = np.where(np.isin(ip_labels, labellist))
ip_data = ip_data[idx]
ip_labels = ip_labels[idx]
print(ip_data.shape)
print(ip_labels.shape)

(5338, 220)
(5338,)


In [119]:
# convert labels from 2,5,10,11 to 0,1,2,3
ip_labels[np.where(ip_labels == 2)] = 0
ip_labels[np.where(ip_labels == 5)] = 1
ip_labels[np.where(ip_labels == 10)] = 2
ip_labels[np.where(ip_labels == 11)] = 3

In [120]:
print(np.unique(ip_labels, return_counts=True))

(array([0, 1, 2, 3], dtype=uint8), array([1428,  483,  972, 2455]))


In [121]:
X_train, X_test, y_train, y_test = train_test_split(
    ip_data,
    ip_labels,
    test_size=TEST_SIZE,
    random_state=RANDOM_STATE,
)

In [122]:
def train(X, L, max_iter=100, m=2):
    fcm = FCM(
        n_clusters=L,
        max_iter=max_iter,
        m=m,
        random_state=RANDOM_STATE,
    )
    fcm.fit(X)
    return fcm

In [123]:
def predict(X, fcm):
    return fcm.predict(X)

In [124]:
def checkFCM(preds, labels):
    # create a mapping list: class i --> pred j
    c = cluster.contingency_matrix(labels, preds, eps=1, dtype=int)
    remapped_centers = linear_sum_assignment(c, maximize=True)
    
    # need to convert labellist to [0,3]
    remapped_preds = np.array([remapped_centers[1][pred] for pred in preds])
    
    print('new experiment')
    print('prediction frequency count: ', np.unique(preds, return_counts=True))
    print('contigency matrix:\n', c)
    print('re-mapped centers: ', remapped_centers)
    print('accuracy: ', accuracy_score(remapped_preds, labels))

    # for debugging, adjusted rand score will be the same before and after remapping
    print('adjusted rand score: ', adjusted_rand_score(preds, labels))
    print('adjusted rand score (after remap): ', adjusted_rand_score(remapped_preds, labels))
    return accuracy_score(remapped_preds, labels)

In [125]:
fcm = train(X_train, L)
preds = predict(X_test, fcm)

In [126]:
checkFCM(preds, y_test)

new experiment
prediction frequency count:  (array([0, 1, 2, 3]), array([ 75, 346, 470, 177]))
contigency matrix:
 [[  2 155  87  41]
 [ 75   3  13  11]
 [  1  39 119  67]
 [  1 153 255  62]]
re-mapped centers:  (array([0, 1, 2, 3]), array([1, 0, 3, 2]))
accuracy:  0.5131086142322098
adjusted rand score:  0.12553261633558424
adjusted rand score (after remap):  0.12553261633558424


0.5131086142322098