# Mutual Information score

In [1]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import adjusted_mutual_info_score as AMI
from sklearn.metrics import normalized_mutual_info_score as NMI
%matplotlib inline
%run nifti_tools.ipynb

In [26]:
# Sample NMI and AMI score

# Loading the Allen Reference Atlas
allen_path = '/data/bioprotean/ABA/PCA/80_variance/allen_annot200.nii'
reference = nifti_to_vector(allen_path)

# Loading a sample K-means cluster
cluster_path = '/data/bioprotean/ABA/PCA/80_variance/Kmeans_labels/594_clusters.npy'
cluster = np.load(cluster_path).flatten()

print(AMI(cluster, reference))
print(NMI(cluster, reference))

0.581191676493668
0.6209993297758435


In [4]:
# AMI score for PCA + K-means clusters

# Making a dataframe of AMI score for PCA clusters
score_df = pd.DataFrame(columns=['K','AMI','Silhouette'])

# Loading the Allen Reference Atlas
allen_path = '/data/bioprotean/ABA/PCA/80_variance/allen_annot200.nii'
reference = nifti_to_vector(allen_path)

# List of K values
numbers_list = list(range(1,50))
extra_list = list(range(50,551,50))
add_number = 594
numbers_list.extend(extra_list)
numbers_list.append(add_number)

# The list of scores
AMI_list = []

for i in numbers_list:
    # Loading the labels
    cluster_path = '/data/bioprotean/ABA/PCA/80_variance/Kmeans_labels/'+str(i)+'_clusters.npy'
    cluster = np.load(cluster_path).flatten()
    
    AMI_list.append(AMI(reference, cluster))
    
# Adding values to DataFrame
score_df['K'] = numbers_list
score_df['AMI'] = AMI_list

score_df
# # Saving to CSV
# score_df.to_csv('/data/bioprotean/ABA/PCA/80_variance/PC_80v_score.csv')

Unnamed: 0,K,AMI,Silhouette
0,1,-8.950789e-17,
1,2,3.029351e-01,
2,3,3.818624e-01,
3,4,4.206111e-01,
4,5,4.619584e-01,
...,...,...,...
56,400,5.872368e-01,
57,450,5.903770e-01,
58,500,5.847377e-01,
59,550,5.848705e-01,


In [5]:
# AMI score for PCA + K-means clusters

# Making a dataframe of AMI score for PCA clusters
score_df = pd.DataFrame(columns=['K','AMI','Silhouette'])

# Loading the Allen Reference Atlas
allen_path = '/data/bioprotean/ABA/PCA/80_variance/allen_annot200.nii'
reference = nifti_to_vector(allen_path)

# List of K values
numbers_list = list(range(1,50))
extra_list = list(range(50,551,50))
add_number = 594
numbers_list.extend(extra_list)
numbers_list.append(add_number)

# The list of scores
AMI_list = []

for i in numbers_list:
    # Loading the labels
    cluster_path = '/data/bioprotean/ABA/SFT/Kmeans_rc/'+str(i)+'_clusters.npy'
    cluster = np.load(cluster_path).flatten()
    
    AMI_list.append(AMI(reference, cluster))
    
# Adding values to DataFrame
score_df['K'] = numbers_list
score_df['AMI'] = AMI_list

score_df
# # Saving to CSV
# score_df.to_csv('/data/bioprotean/ABA/SFT/Kmeans_rc/SFT_AMI_score.csv')

Unnamed: 0,K,AMI,Silhouette
0,1,0.376514,
1,2,0.424769,
2,3,0.494535,
3,4,0.516553,
4,5,0.554352,
...,...,...,...
56,400,0.720065,
57,450,0.718631,
58,500,0.718213,
59,550,0.717659,


In [11]:
# AMI score for PCA + K-means clusters

# Making a dataframe of AMI score for PCA clusters
score_df = pd.DataFrame(columns=['K','AMI','Silhouette'])

# Loading the Allen Reference Atlas
allen_path = '/data/bioprotean/ABA/PCA/80_variance/allen_annot200.nii'
reference = nifti_to_vector(allen_path)

# List of K values
numbers_list = list(range(1,50))
extra_list = list(range(50,551,50))
add_number = 594
numbers_list.extend(extra_list)
numbers_list.append(add_number)

# The list of scores
AMI_list = []

for i in numbers_list:
    # Loading the labels
    cluster_path = '/data/bioprotean/ABA/KernelPCA/poly2/Kmeans_rc/'+str(i)+'_clusters.npy'
    cluster = np.load(cluster_path).flatten()
    
    AMI_list.append(AMI(reference, cluster))
    
# Adding values to DataFrame
score_df['K'] = numbers_list
score_df['AMI'] = AMI_list

score_df
# # Saving to CSV
# score_df.to_csv('/data/bioprotean/ABA/SFT/Kmeans_rc/SFT_AMI_score.csv')

Unnamed: 0,K,AMI,Silhouette
0,1,0.376514,
1,2,0.448081,
2,3,0.483203,
3,4,0.486858,
4,5,0.501683,
...,...,...,...
56,400,0.705926,
57,450,0.702058,
58,500,0.703292,
59,550,0.703849,


In [7]:
# AMI score for DLSC + K-means clusters

# Making a dataframe of AMI score for PCA clusters
score_df = pd.DataFrame(columns=['K','AMI','Silhouette'])

# Loading the Allen Reference Atlas
allen_path = '/data/bioprotean/ABA/PCA/80_variance/allen_annot200.nii'
reference = nifti_to_vector(allen_path)

# List of K values
numbers_list = list(range(1,50))
extra_list = list(range(50,551,50))
add_number = 594
numbers_list.extend(extra_list)
numbers_list.append(add_number)

# The list of scores
AMI_list = []

for i in numbers_list:
    # Loading the labels
    cluster_path = '/data/bioprotean/ABA/DLSC/pos_std/Kmeans_labels/'+str(i)+'_clusters.npy'
    cluster = np.load(cluster_path).flatten()
    
    AMI_list.append(AMI(reference, cluster))
    
# Adding values to DataFrame
score_df['K'] = numbers_list
score_df['AMI'] = AMI_list

score_df
# # Saving to CSV
# score_df.to_csv('/data/bioprotean/ABA/DLSC/pos_std/DLSC_100_score.csv')

Unnamed: 0,K,AMI,Silhouette
0,1,-8.950789e-17,
1,2,2.075557e-02,
2,3,1.560250e-01,
3,4,1.950712e-01,
4,5,2.748161e-01,
...,...,...,...
56,400,5.901659e-01,
57,450,5.918110e-01,
58,500,5.862475e-01,
59,550,5.855717e-01,
