In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

In [None]:
from os import listdir
from os.path import isfile, join

In [None]:
from pathlib import Path

In [None]:
from sklearn.cluster import SpectralBiclustering
from sklearn import metrics

In [None]:
from wordcloud import WordCloud
import matplotlib.pyplot as plt

In [None]:
from sklearn.manifold import TSNE

In [None]:
InDir = "~/Biclustering_Tests/Data/"

In [None]:
outdir = "~/Biclustering_Tests/Output/CSV/"

## Data With Labels

In [None]:
df = pd.read_csv(join(InDir,"dementia_normalized.csv"))
#df = pd.read_csv(join(InDir,"movement_normalised.csv"))
df.head()

In [None]:
labels_1 = df[df.columns[0]]

### Reference cell

metrics.adjusted_rand_score(labels_1, labels_2)
metrics.mutual_info_score(labels_true, labels_pred)
metrics.homogeneity_score(labels_true, labels_pred)
metrics.completeness_score(labels_true, labels_pred)
metrics.v_measure_score(labels_true, labels_pred)

from sklearn.metrics import pairwise_distances
metrics.silhouette_score(X, labels, metric='euclidean')
    

In [None]:
data = df[df.columns[1:]].to_numpy()

In [None]:
CVI_Scores = {}
CVI_Scores["ARI"] =[]
CVI_Scores["Homogeneity"] =[]
CVI_Scores["MI"] =[]
CVI_Scores["Completeness"] =[]
CVI_Scores["VMeasure"] =[]
CVI_Scores["Silhouette"] = [] 
CVI_Scores["N Clusters Row"] = []
CVI_Scores["N Clusters Col"] = []

In [None]:
for itr1 in range(2,11):
    for itr2 in range(2,10):
        model = SpectralBiclustering(n_clusters=(itr1, itr2),random_state=0)
        model.fit(data)
        labels_2 = model.row_labels_
        CVI_Scores["ARI"].append(metrics.adjusted_rand_score(labels_1, labels_2))
        CVI_Scores["MI"].append(metrics.mutual_info_score(labels_1, labels_2))
        CVI_Scores["Homogeneity"].append(metrics.homogeneity_score(labels_1, labels_2))
        CVI_Scores["Completeness"].append(metrics.completeness_score(labels_1, labels_2))
        CVI_Scores["VMeasure"].append(metrics.v_measure_score(labels_1, labels_2))
        CVI_Scores["Silhouette"].append(metrics.silhouette_score(data, labels_2, metric='euclidean')) 
        CVI_Scores["N Clusters Row"].append(itr1)
        CVI_Scores["N Clusters Col"].append(itr2)

In [None]:
df_CVI = pd.DataFrame(data= CVI_Scores)

In [None]:
df_CVI

In [None]:
len_Indices = len(CVI_Scores["ARI"])

In [None]:
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_CVI.index, y=df_CVI["ARI"],
                    mode='lines',
                    name='ARI'))
fig.add_trace(go.Scatter(x=df_CVI.index, y=df_CVI["MI"],
                    mode='lines',
                    name='MI'))
fig.add_trace(go.Scatter(x=df_CVI.index, y=df_CVI["Homogeneity"],
                    mode='lines',
                    name='Homogeneity'))
fig.add_trace(go.Scatter(x=df_CVI.index, y=df_CVI["Completeness"],
                    mode='lines',
                    name='Completeness'))
fig.add_trace(go.Scatter(x=df_CVI.index, y=df_CVI["VMeasure"],
                    mode='lines',
                    name='V Measure'))
fig.add_trace(go.Scatter(x=df_CVI.index, y=df_CVI["Silhouette"],
                    mode='lines',
                    name='Silhouette'))
fig.update_layout(title='CVI vs #Clusters for Dementia',
                   xaxis_title='#index',
                   yaxis_title='CVI Value')

fig.update_layout(font = dict(
                              family="Georgia",
                              size=18,
                              color="#000000"))


fig.show()
fig.write_image(join(outdir,"SpecBi_CVI_Data.png"))

In [None]:
indexMax = CVI_Scores["ARI"].index(max(df_CVI["ARI"]))
nclustRow = CVI_Scores["N Clusters Row"][indexMax]
nclustCol = CVI_Scores["N Clusters Col"][indexMax]

In [None]:
nclust

In [None]:
nclustRow = 8
nclustCol = 8

In [None]:
nclustRow

In [None]:
model = SpectralBiclustering(n_clusters=(nclustRow, nclustCol),random_state=0)
model.fit(data)
row_labels = model.row_labels_
col_labels = model.column_labels_
unique_labels_row = np.unique(row_labels)
unique_labels_col = np.unique(col_labels)

In [None]:
col_sort_index = []
new_col_labels = []
for lbl in unique_labels_col:
    locations = np.where(col_labels==lbl)[0]
    for val in locations:
        col_sort_index.append(val)
        new_col_labels.append(f"{df.columns[val+1]}#{lbl+1}")
col_shuffled_data = data[:,col_sort_index]

In [None]:
with open('tseData.npy', 'rb') as f:
    shuffled_data_embedded = np.load(f)

In [None]:
shuffled_data_embedded.shape

In [None]:
import plotly.graph_objects as go
fig = go.Figure()

for clust in range(0,max(row_labels)+1):
    fig.add_trace(go.Scatter(x=shuffled_data_embedded[np.where(row_labels==clust)[0],0],
                             y=shuffled_data_embedded[np.where(row_labels==clust)[0],1],
                             mode='markers',
                             marker={'size':15},
                             name=f'Cluster#{clust+1}'))

#Page
fig.update_layout({
                    #'paper_bgcolor':'rgba(0,0,0,0)',
                    'plot_bgcolor':'rgba(0,0,0,0)',
                    })

#Title
fig.update_layout(title='tSNE Projection of Data With Spectral Biclustering',
                   xaxis_title='Axis#1',
                   yaxis_title='Axis#2')

#Ticks
fig.update_xaxes(ticks="outside", tickwidth=1, tickcolor='black', ticklen=10)
fig.update_yaxes(ticks="outside", tickwidth=1, tickcolor='black', ticklen=10)

#Line
fig.update_xaxes(showline=True, linewidth=1, linecolor='black',  mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black',  mirror=True)

#Font
fig.update_layout(font = dict(
                              family="Georgia",
                              size=18,
                              color="#000000"))

fig.update_layout(
    autosize=False,
    width=1000,
    height=800)

fig.show()

In [None]:
import umap
reducer = umap.UMAP(n_neighbors=15,
                   n_components=2
                   )

embedding = reducer.fit_transform(col_shuffled_data)
embedding.shape

In [None]:
import plotly.graph_objects as go
fig = go.Figure()

for clust in range(0,max(row_labels)+1):
    fig.add_trace(go.Scatter(x=embedding[np.where(row_labels==clust)[0],0],
                             y=embedding[np.where(row_labels==clust)[0],1],
                             mode='markers',
                             marker={'size':15},
                             name=f'Cluster#{clust+1}'))

"""
fig.update_layout({
                    #'paper_bgcolor':'rgba(0,0,0,0)',
                    'plot_bgcolor':'rgba(0,0,0,0)',
                    })
"""

fig.update_layout(title='UMAP Projection of Data with Spectral Biclustering',
                   xaxis_title='Axis#1',
                   yaxis_title='Axis#2')

fig.update_layout(font = dict(
                              family="Georgia",
                              size=18,
                              color="#000000"))

fig.update_layout(
    autosize=False,
    width=1000,
    height=800)

fig.show()

### Data without labels

In [None]:
df = pd.read_csv(join(InDir,"Data_Without_Labels.csv"))
df.head()

In [None]:
df[df.columns[1:]]

In [None]:
data

In [None]:
shift=1
data = df[df.columns[shift:]].to_numpy()
data_max = np.max(data,axis=0)
data_min = np.min(data,axis=0)
data = (data- data_min)/(data_max - data_min)

In [None]:
CVI_Scores = {}
CVI_Scores["CHI"] =[]
CVI_Scores["DBI"] = []
CVI_Scores["Silhouette"] = [] 
CVI_Scores["N Clusters Row"] = []
CVI_Scores["N Clusters Col"] = []

In [None]:
for itr1 in range(2,11):
    for itr2 in range(2,11):
        model = SpectralBiclustering(n_clusters=(itr1,itr2),random_state=0)
        model.fit(data)
        labels_2 = model.row_labels_
        CVI_Scores["CHI"].append(metrics.calinski_harabasz_score(data, labels_2))
        CVI_Scores["DBI"].append(metrics.davies_bouldin_score(data, labels_2))
        CVI_Scores["Silhouette"].append(metrics.silhouette_score(data, labels_2, metric='euclidean')) 
        CVI_Scores["N Clusters Row"].append(itr1)
        CVI_Scores["N Clusters Col"].append(itr2)

In [None]:
df_CVI = pd.DataFrame(data= CVI_Scores)
df_CVI

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig = go.Figure()

fig = make_subplots(rows=2, cols=1)

fig.add_trace(go.Scatter(x=df_CVI.index, y=df_CVI["CHI"],
                    mode='lines',
                    name='CHI'), row=1, col=1)
fig.add_trace(go.Scatter(x=df_CVI.index, y=df_CVI["DBI"],
                    mode='lines',
                    name='DBI'), row=1, col=1)
fig.add_trace(go.Scatter(x=df_CVI.index, y=df_CVI["Silhouette"],
                    mode='lines',
                    name='Silhouette'), row=2, col=1)
fig.update_layout(title='CVI vs #Clusters for Data Without Labels',
                   yaxis_title='CVI Value')

fig.update_xaxes(title_text='#index', row=2, col=1)
fig.update_yaxes(title_text='CVI Value', row=2, col=1)

fig.show()
fig.write_image(join(outdir,"SpecBic_CVI_Data_Without_Labels.png"))