In [None]:
# Spectral clustering 
import numpy as np
from sklearn.cluster import  SpectralClustering
from sklearn.metrics import confusion_matrix, f1_score, normalized_mutual_info_score, precision_score, recall_score
import pandas as pd
import timeit
import matplotlib as plt

In [None]:
# the method specifies key to make sure the ground truth set is in consistent order as the valeus accessed 
def ground_truth_set(key):

    ground_truth = []
    for item in key:
        if "Palm" in item: 
            ground_truth.append(1)
        else:
            ground_truth.append(0)
    
    return np.array(ground_truth)

def data_set(key, dict):
    data = []
    for item in key:
        data.append(dict[item])
    return np.array(data)

def get_truth_and_data(fileName):
    dict = np.load(fileName, allow_pickle=True).item()
    dict_keys = dict.keys()
    ground_truth = ground_truth_set(dict_keys)
    data_values = data_set(dict_keys, dict)

    return ground_truth, data_values

def accuracy(truth, pred):
    tn, fp, fn, tp = confusion_matrix(truth, pred).ravel()
    return (tn+tp)/ (tn+ fp+ fn+ tp)


In [None]:
p_r_f1 = {}

In [None]:
def experiment(truth, data):
    spectral_estimator = SpectralClustering(n_clusters = 2)
    labels = spectral_estimator.fit_predict(data)
    
    precision = precision_score(truth, labels, labels = [0,1], average = 'weighted')
    recall = recall_score(truth, labels, labels = [0,1], average = 'weighted')
    f1 = f1_score(truth, labels, labels = [0,1], average = 'weighted')
    return precision, recall, f1 

In [None]:
import csv 
import os 
def record_results(truth, data, dict, name, experi ="spectral"): 
    precision, recall, f1 = experiment(truth,data)
    if name not in dict:
        dict[name] = [precision, recall, f1]

    if not os.path.exists(f"results_{experi}/{name}.csv"):
        with open(f"results_{experi}/{name}.csv", 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(("precision","recall", "f1"))
            writer.writerow((precision, recall, f1))
    else:
        with open(f"results_{experi}/{name}.csv", 'a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow((precision, recall, f1)) 
    
    print(name)
    
    if dict[name][0] <= precision:
        dict[name][0] = precision
    else:
        print("max precision: ", dict[name][0])

    if dict[name][1] <= recall:
        dict[name][1] = recall
    else:
        print("max recall: ", dict[name][0])

    if dict[name][2] <= f1:
        dict[name][2] = f1
    else:
        print("max f1 : ", dict[name][2])
    
    print("f1 : ", f1 , "\nprecision: ", precision,"\nrecall: ", recall) 

In [None]:
glcm_truth_n , glcm_data_n  =  get_truth_and_data('GLCM_n.npy')
glcm_truth , glcm_data  =  get_truth_and_data('GLCM.npy')

hist_truth_n, hist_data_n =  get_truth_and_data('hist_n.npy')
hist_truth, hist_data =  get_truth_and_data('hist.npy')

hog_truth_n, hog_data_n =  get_truth_and_data('hog_n.npy')
hog_truth, hog_data =  get_truth_and_data('hog.npy')

lbp_truth, lbp_data =  get_truth_and_data('lbp.npy')
lbp_truth_n, lbp_data_n =  get_truth_and_data('lbp_n.npy')

In [None]:
for i in range(10):
    record_results(glcm_truth_n , glcm_data_n,  p_r_f1,   "glcm_n" )
    record_results(glcm_truth , glcm_data,  p_r_f1,   "glcm" )
    record_results(hog_truth , hog_data,  p_r_f1,   "hog" )
    record_results(hist_truth , hist_data,  p_r_f1,   "hist" )
    record_results(lbp_truth , lbp_data,  p_r_f1,   "lbp" )
    record_results(lbp_truth_n , lbp_data_n,  p_r_f1,   "lbp_n" )
#49min 11.2s


In [34]:
for i in range (3):
    record_results(hog_truth_n , hog_data_n, p_r_f1, "hog_n" )
# 115m 59.4s

# more than 748m for two results 
# f1 :  0.7167622554453932 
# precision:  0.8164723191982974 
# recall:  0.6706206474324634

KeyboardInterrupt: 

In [None]:
record_results(hist_truth_n , hist_data_n, p_r_f1, "hist_n" ) 

In [None]:
# the PCA section 
GLCM_truth_n_pca, GLCM_data_n_pca =  get_truth_and_data('GLCM_n_pca.npy')
GLCM_truth_pca, GLCM_data_pca =  get_truth_and_data('GLCM_pca.npy')

hog_truth_n_pca, hog_data_n_pca =  get_truth_and_data('hog_n_pca.npy')
hog_truth_pca, hog_data_pca =  get_truth_and_data('hog_pca.npy')

lbp_truth_pca, lbp_data_pca =  get_truth_and_data('lbp_pca.npy')
lbp_truth_n_pca, lbp_data_n_pca =  get_truth_and_data('lbp_n_pca.npy')

hist_truth_n_pca, hist_data_n_pca =  get_truth_and_data('hist_n_pca.npy')
hist_truth_pca, hist_data_pca =  get_truth_and_data('hist_pca.npy')

In [None]:
for i in range(10):
    record_results(GLCM_truth_n_pca, GLCM_data_n_pca, p_r_f1, "glcm_n_pca")
    record_results(GLCM_truth_pca, GLCM_data_pca, p_r_f1, "glcm_pca")
    
    record_results(hist_truth_pca, hist_data_pca, p_r_f1, "hist_pca")

    record_results(lbp_truth_pca, lbp_data_pca, p_r_f1, "lbp_pca")
    record_results(lbp_truth_n_pca, lbp_data_n_pca,  p_r_f1, "lbp_n_pca")
    #41m 38.8s

In [None]:
# hog takes 194 minutes long
for i in range (10):
    record_results(hog_truth_n_pca, hog_data_n_pca, p_r_f1, "hog_n_pca")
    record_results(hog_truth_pca, hog_data_pca, p_r_f1, "hog_pca")

In [None]:
# hist n pca doesnt want to run properly 
record_results(hist_truth_n_pca, hist_data_n_pca, p_r_f1, "hist_n_pca")