In [1]:
from sklearn.datasets import fetch_20newsgroups

In [2]:
### Fetch the dataset ####
categories = ['comp.sys.ibm.pc.hardware','comp.graphics','comp.sys.mac.hardware','comp.os.ms-windows.misc',
              'rec.autos', 'rec.motorcycles','rec.sport.baseball','rec.sport.hockey']
dataset = fetch_20newsgroups(subset= 'all', categories=categories,shuffle=True, random_state=42)


In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(min_df=3, stop_words='english')
X = vectorizer.fit_transform(dataset.data)

In [4]:
X.shape

(7882, 27768)

In [5]:
from sklearn.cluster import KMeans
km = KMeans(n_clusters=2, init='k-means++', max_iter=1000, random_state = 0,n_init = 30)
km

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=1000,
    n_clusters=2, n_init=30, n_jobs=1, precompute_distances='auto',
    random_state=0, tol=0.0001, verbose=0)

In [6]:
km.fit(X)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=1000,
    n_clusters=2, n_init=30, n_jobs=1, precompute_distances='auto',
    random_state=0, tol=0.0001, verbose=0)

In [7]:
X_predict = km.fit_predict(X)

In [8]:
X_predict.shape

(7882,)

In [9]:
for x in range(0,7882):
    if dataset.target[x] > 3 : 
        dataset.target[x] = 1
    else:
        dataset.target[x] = 0
       

In [10]:
for i in range(0,10):
    print("Predicted label for document %d : %d"%(i,X_predict[i]))
    print("Actual    label for document %d : %d"%(i,dataset.target[i]))
    

Predicted label for document 0 : 1
Actual    label for document 0 : 0
Predicted label for document 1 : 1
Actual    label for document 1 : 0
Predicted label for document 2 : 1
Actual    label for document 2 : 0
Predicted label for document 3 : 1
Actual    label for document 3 : 0
Predicted label for document 4 : 0
Actual    label for document 4 : 1
Predicted label for document 5 : 1
Actual    label for document 5 : 1
Predicted label for document 6 : 1
Actual    label for document 6 : 0
Predicted label for document 7 : 0
Actual    label for document 7 : 1
Predicted label for document 8 : 1
Actual    label for document 8 : 0
Predicted label for document 9 : 1
Actual    label for document 9 : 1


In [11]:

from sklearn.metrics.cluster import contingency_matrix
cntg_matrix = contingency_matrix(dataset.target,X_predict )

print("Contingency Matrix:")
print (cntg_matrix)

plt.matshow(cntg_matrix,cmap=plt.cm.Blues)
plt.colorbar()
plt.title('Contingency matrix for r = 1')
plt.show()

Contingency Matrix:
[[   4 3899]
 [1718 2261]]


NameError: name 'plt' is not defined

In [None]:
from sklearn import metrics
print("Homogeneity: %0.3f" % metrics.homogeneity_score(dataset.target, km.labels_))
print("Completeness: %0.3f" % metrics.completeness_score(dataset.target, km.labels_))
print("V-measure: %0.3f" % metrics.v_measure_score(dataset.target, km.labels_))
print("Adjusted Rand-Index: %.3f"
      % metrics.adjusted_rand_score(dataset.target, km.labels_))
print("Adjusted mutual information score: %0.3f"
      % metrics.adjusted_mutual_info_score(dataset.target, km.labels_))

print()

In [None]:
from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(1000, random_state=0)
X_reduced = svd.fit(X)


In [None]:
import numpy as np
from matplotlib import pyplot as plt

dimensions = np.arange(1000)

exp_var = X_reduced.explained_variance_ratio_.cumsum()

    
## Now plot the # of variance against the dimensions
plt.plot(dimensions,exp_var)
plt.xlabel("Number of Dimensions")
plt.ylabel("Percentage of Explained Variance")
plt.show()
print("Total percentage Variance for all 1000 dimension is: {}% ".format(svd.explained_variance_ratio_.sum()*100 ))


         

## K-Means on Truncated SVD

In [None]:
from sklearn.metrics import confusion_matrix
r = [1,2,3,5,10,20,50,100,300]

#Metric buckets
cont_matrix = []
homogenity = []
completeness = []
vmeasure = [] 
adjrand = []
adjmutualinfo = []

X_SVD_reduced = svd.transform(X)
y = dataset.target

for iter in r:
    print('Current r value sweeping = %d' %iter)
    
    ykm_svd = km.fit_predict(X_SVD_reduced[:,:iter])


    mat = contingency_matrix(y,ykm_svd )
    cont_matrix.append(mat)
    

    plt.matshow(mat,cmap=plt.cm.Blues)
    plt.colorbar()
    plt.title('Contingency matrix for r = 1')
    plt.show()
    
    homogenity.append(metrics.homogeneity_score(y, ykm_svd ))
    completeness.append(metrics.completeness_score(y, ykm_svd ))
    vmeasure.append(metrics.v_measure_score(y, ykm_svd ))
    adjrand.append(metrics.adjusted_rand_score(y, ykm_svd ))
    adjmutualinfo.append(metrics.adjusted_mutual_info_score(y, ykm_svd ))
    
    

In [None]:
sp = plt.scatter(x= r,y= homogenity,color='r',marker='x')
plt.ylabel('Homogenity')
plt.xlabel('r Principal components')
plt.title('Homogenity Score for all r dimensions in SVD')
plt.grid(True)
plt.show()
#plot the completeness score
plt.scatter(x= r,y= completeness,color='r',marker='x')
plt.ylabel('Completeness')
plt.xlabel('r Principal components')
plt.title('Completeness Score for all r dimensions in SVD')
plt.grid(True)
plt.show()
#plot vmeasure score
plt.scatter(x= r,y= vmeasure,color='r',marker='x')
plt.ylabel('V-Measure')
plt.xlabel('r Principal components')
plt.title('V-Measure Score for all r dimensions in SVD')
plt.grid(True)
plt.show()
#plot rand score
plt.scatter(x= r,y= adjrand,color='r',marker='x')
plt.ylabel('Rand')
plt.xlabel('r Principal components')
plt.title('Rand Score for all r dimensions in SVD')
plt.grid(True)
plt.show()
#plot mutual information score
plt.scatter(x= r,y= adjmutualinfo,color='r',marker='x')
plt.ylabel('Mutual info')
plt.xlabel('r Principal components')
plt.title('Mutual info score Score for all r dimensions in SVD')
plt.grid(True)
plt.show()


## K-Means on NMF

In [None]:
from sklearn.decomposition import NMF
#Metric buckets
cont_matrix = []
homogenity = []
completeness = []
vmeasure = [] 
adjrand = []
adjmutualinfo = []


for iter in r:
    print('Current r value sweeping = %d' %iter)
# NMF dim reduction
    nmf = NMF(n_components=iter, init='random', random_state=0)
    X_NMF_reduced = nmf.fit_transform(X)

    km2 = KMeans(n_clusters=2, init='k-means++', max_iter=1000, random_state = 0,n_init = 30)
    #km.fit(X_SVD_reduced)
    ykm_nmf = km2.fit_predict(X_NMF_reduced)

   # Conf Matrix ( y= dataset.target)
    mat = contingency_matrix(y, ykm_nmf )  
    cont_matrix.append(mat)
   # Plots 
    plt.matshow(mat,cmap=plt.cm.Blues)
    plt.colorbar()
    plt.title('Contingency matrix for r = %d' %iter)
    plt.show()
    #http://scikit-learn.org/stable/auto_examples/text/document_clustering.html   
    homogenity.append(metrics.homogeneity_score(y, ykm_nmf ))
    completeness.append(metrics.completeness_score(y, ykm_nmf ))
    vmeasure.append(metrics.v_measure_score(y, ykm_nmf ))
    adjrand.append(metrics.adjusted_rand_score(y, ykm_nmf ))
    adjmutualinfo.append(metrics.adjusted_mutual_info_score(y, ykm_nmf ))


In [None]:
#Plot metrics
#plot the homogenity score
sp = plt.scatter(x= r,y= homogenity,color='r',marker='x')
plt.ylabel('Homogenity')
plt.xlabel('r Principal components')
plt.title('Homogenity Score for all r dimensions in NMF')
plt.grid(True)
plt.show()
#plot the completeness score
plt.scatter(x= r,y= completeness,color='r',marker='x')
plt.ylabel('Completeness')
plt.xlabel('r Principal components')
plt.title('Completeness Score for all r dimensions in NMF')
plt.grid(True)
plt.show()
#plot vmeasure score
plt.scatter(x= r,y= vmeasure,color='r',marker='x')
plt.ylabel('V-Measure')
plt.xlabel('r Principal components')
plt.title('V-Measure Score for all r dimensions in NMF')
plt.grid(True)
plt.show()
#plot rand score
plt.scatter(x= r,y= adjrand,color='r',marker='x')
plt.ylabel('Rand')
plt.xlabel('r Principal components')
plt.title('Rand Score for all r dimensions in NMF')
plt.grid(True)
plt.show()
#plot mutual information score
plt.scatter(x= r,y= adjmutualinfo,color='r',marker='x')
plt.ylabel('Mutual info')
plt.xlabel('r Principal components')
plt.title('Mutual info score Score for all r dimensions in NMF')
plt.grid(True)
plt.show()


In [None]:
X_SVD_reduced.shape
km = KMeans(n_clusters=2, init='k-means++', max_iter=1000, random_state = 0,n_init = 30)

vis_best_r = km.fit_predict(X_SVD_reduced[:,:2])

In [None]:
# The below code plots
plt.scatter(X_SVD_reduced[:,0],X_SVD_reduced[:,1],c = vis_best_r)
plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.title('SVD Best value with clustering label')
plt.grid(True)
plt.colorbar()
plt.show()


In [None]:
# The below code plots
plt.scatter(X_SVD_reduced[:,0],X_SVD_reduced[:,1],c = dataset.target)
plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.title('SVD Best value with Ground Truth label')
plt.grid(True)
plt.colorbar()
plt.show()

In [None]:
nmf = NMF(n_components=2, init='random', random_state=0)
X_NMF_reduced = nmf.fit_transform(X)

km2 = KMeans(n_clusters=2, init='k-means++', max_iter=1000, random_state = 0,n_init = 30)
#km.fit(X_SVD_reduced)
ykm_nmf = km2.fit_predict(X_NMF_reduced)


In [None]:
# The below code plots
plt.scatter(X_NMF_reduced[:,0],X_NMF_reduced[:,1],c = ykm_nmf)
plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.title('NMF Best value with clustering label')
plt.grid(True)
plt.colorbar()
plt.show()


In [None]:
# The below code plots
plt.scatter(X_NMF_reduced[:,0],X_NMF_reduced[:,1],c = dataset.target)
plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.title('NMF Best value for Ground Truth label')
plt.grid(True)
plt.colorbar()
plt.show()

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()
scal_matrix = scaler.fit_transform(X_SVD_reduced[:,:2])
km = KMeans(n_clusters=2, init='k-means++', max_iter=1000, random_state = 0,n_init = 30)
vis_best_r = km.fit_predict(scal_matrix)


#Visualising the results 
plt.scatter(X_SVD_reduced[:,0],X_SVD_reduced[:,1],c = vis_best_r)
plt.xlabel('x axis')
plt.ylabel('y axis')
plt.title('SVD Normalization with Cluster label')
plt.grid(True)
plt.colorbar()
plt.show()

plt.scatter(X_SVD_reduced[:,0],X_SVD_reduced[:,1],c = dataset.target)
plt.xlabel('x axis')
plt.ylabel('y axis')
plt.title('SVD Normalization with Ground Truth Label')
plt.grid(True)
plt.colorbar()
plt.show()

#Metrics
print('Metrics post SVD \n')
print("Homogeneity: %0.4f" % metrics.homogeneity_score(dataset.target, vis_best_r))
print("Completeness: %0.4f" % metrics.completeness_score(dataset.target, vis_best_r))
print("V-measure: %0.4f" % metrics.v_measure_score(dataset.target, vis_best_r))
print("Adjusted Rand-Index: %.4f"
      % metrics.adjusted_rand_score(dataset.target, vis_best_r))
print("Adjusted Mutual Info score: %.4f"
      % metrics.adjusted_mutual_info_score(dataset.target, vis_best_r))
plt.matshow(confusion_matrix(dataset.target, vis_best_r),cmap=plt.cm.Blues)
plt.colorbar()
plt.grid(True)
plt.title('Contingency matrix after SVD normalization')
plt.show()

In [None]:
#SVD Tranformation
svd_log_matrix = np.log1p(X_SVD_reduced[:,:2])
km = KMeans(n_clusters=2, init='k-means++', max_iter=1000, random_state = 0,n_init = 30)
vis_best_r = km.fit_predict(svd_log_matrix)


#Visualising the results 
plt.scatter(X_SVD_reduced[:,0],X_SVD_reduced[:,1],c = vis_best_r)
plt.xlabel('x axis')
plt.ylabel('y axis')
plt.title('SVD Transformation with Cluster label')
plt.grid(True)
plt.colorbar()
plt.show()

plt.scatter(X_SVD_reduced[:,0],X_SVD_reduced[:,1],c = dataset.target)
plt.xlabel('x axis')
plt.ylabel('y axis')
plt.title('SVD Transformation with Ground Truth Label')
plt.grid(True)
plt.colorbar()
plt.show()

#Metrics
print('Metrics post SVD \n')
print("Homogeneity: %0.4f" % metrics.homogeneity_score(dataset.target, vis_best_r))
print("Completeness: %0.4f" % metrics.completeness_score(dataset.target, vis_best_r))
print("V-measure: %0.4f" % metrics.v_measure_score(dataset.target, vis_best_r))
print("Adjusted Rand-Index: %.4f"
      % metrics.adjusted_rand_score(dataset.target, vis_best_r))
print("Adjusted Mutual Info score: %.4f"
      % metrics.adjusted_mutual_info_score(dataset.target, vis_best_r))
plt.matshow(confusion_matrix(dataset.target, vis_best_r),cmap=plt.cm.Blues)
plt.colorbar()
plt.grid(True)
plt.title('Contingency matrix after SVD normalization')
plt.show()

In [None]:
# The below code does log transform + Normalize
scaler = StandardScaler()
scal_matrix = scaler.fit_transform(svd_log_matrix)
km = KMeans(n_clusters=2, init='k-means++', max_iter=1000, random_state = 0,n_init = 30)
vis_best_r = km.fit_predict(scal_matrix)

#Visualising the results 
plt.scatter(X_SVD_reduced[:,0],X_SVD_reduced[:,1],c = vis_best_r)
plt.xlabel('x axis')
plt.ylabel('y axis')
plt.title('SVD Normal. + Trans. with cluster label')
plt.grid(True)
plt.colorbar()
plt.show()

plt.scatter(X_SVD_reduced[:,0],X_SVD_reduced[:,1],c = dataset.target)
plt.xlabel('x axis')
plt.ylabel('y axis')
plt.title('SVD Normal. + Trans. with Ground Truth label')
plt.grid(True)
plt.colorbar()
plt.show()

#Metrics
print('Metrics post SVD \n')
print("Homogeneity: %0.4f" % metrics.homogeneity_score(dataset.target, vis_best_r))
print("Completeness: %0.4f" % metrics.completeness_score(dataset.target, vis_best_r))
print("V-measure: %0.4f" % metrics.v_measure_score(dataset.target, vis_best_r))
print("Adjusted Rand-Index: %.4f"
      % metrics.adjusted_rand_score(dataset.target, vis_best_r))
print("Adjusted Mutual Info score: %.4f"
      % metrics.adjusted_mutual_info_score(dataset.target, vis_best_r))
plt.matshow(confusion_matrix(dataset.target, vis_best_r),cmap=plt.cm.Blues)
plt.colorbar()
plt.grid(True)
plt.title('Contingency matrix after SVD normalization')
plt.show()

In [None]:
#The below code does Normalize + log transform

scal_matrix -= scal_matrix.min()
svd_log_matrix = np.log1p(scal_matrix)
km = KMeans(n_clusters=2, init='k-means++', max_iter=1000, random_state = 0,n_init = 30)
vis_best_r = km.fit_predict(svd_log_matrix)

#Visualising the results 
plt.scatter(X_SVD_reduced[:,0],X_SVD_reduced[:,1],c = vis_best_r)
plt.xlabel('x axis')
plt.ylabel('y axis')
plt.title('SVD Trans. + Norm. with Cluster label')
plt.grid(True)
plt.colorbar()
plt.show()


plt.scatter(X_SVD_reduced[:,0],X_SVD_reduced[:,1],c = dataset.target)
plt.xlabel('x axis')
plt.ylabel('y axis')
plt.title('SVD Trans. + Norm. with Ground Truth label')
plt.grid(True)
plt.colorbar()
plt.show()

#Metrics
print('Metrics post SVD \n')
print("Homogeneity: %0.4f" % metrics.homogeneity_score(dataset.target, vis_best_r))
print("Completeness: %0.4f" % metrics.completeness_score(dataset.target, vis_best_r))
print("V-measure: %0.4f" % metrics.v_measure_score(dataset.target, vis_best_r))
print("Adjusted Rand-Index: %.4f"
      % metrics.adjusted_rand_score(dataset.target, vis_best_r))
print("Adjusted Mutual Info score: %.4f"
      % metrics.adjusted_mutual_info_score(dataset.target, vis_best_r))
plt.matshow(confusion_matrix(dataset.target, vis_best_r),cmap=plt.cm.Blues)
plt.colorbar()
plt.grid(True)
plt.title('Contingency matrix after SVD normalization')
plt.show()

## Visualization For NMF

In [None]:
scaler = StandardScaler()
scal_matrix = scaler.fit_transform(X_NMF_reduced)
km = KMeans(n_clusters=2, init='k-means++', max_iter=1000, random_state = 0,n_init = 30)
vis_best_r = km.fit_predict(scal_matrix)


#Visualising the results 
plt.scatter(X_NMF_reduced[:,0],X_NMF_reduced[:,1],c = vis_best_r)
plt.xlabel('x axis')
plt.ylabel('y axis')
plt.title('NMF Normalization with cluster label')
plt.grid(True)
plt.colorbar()
plt.show()

plt.scatter(X_NMF_reduced[:,0],X_NMF_reduced[:,1],c = dataset.target)
plt.xlabel('x axis')
plt.ylabel('y axis')
plt.title('NMF Normalization with Ground Truth label')
plt.grid(True)
plt.colorbar()
plt.show()

#Metrics
print('Metrics post NMF \n')
print("Homogeneity: %0.4f" % metrics.homogeneity_score(dataset.target, vis_best_r))
print("Completeness: %0.4f" % metrics.completeness_score(dataset.target, vis_best_r))
print("V-measure: %0.4f" % metrics.v_measure_score(dataset.target, vis_best_r))
print("Adjusted Rand-Index: %.4f"
      % metrics.adjusted_rand_score(dataset.target, vis_best_r))
print("Adjusted Mutual Info score: %.4f"
      % metrics.adjusted_mutual_info_score(dataset.target, vis_best_r))
plt.matshow(confusion_matrix(dataset.target, vis_best_r),cmap=plt.cm.Blues)
plt.colorbar()
plt.grid(True)
plt.title('Contingency matrix after NMF normalization')
plt.show()

In [None]:
nmf_log_matrix = np.log1p(X_NMF_reduced)
km = KMeans(n_clusters=2, init='k-means++', max_iter=1000, random_state = 0,n_init = 30)
vis_best_r = km.fit_predict(nmf_log_matrix)

#Visualising the results 
plt.scatter(X_NMF_reduced[:,0],X_NMF_reduced[:,1],c = vis_best_r)
plt.xlabel('x axis')
plt.ylabel('y axis')
plt.title('NMF Transformation with Cluster Label')
plt.grid(True)
plt.colorbar()
plt.show()

plt.scatter(X_NMF_reduced[:,0],X_NMF_reduced[:,1],c = dataset.target)
plt.xlabel('x axis')
plt.ylabel('y axis')
plt.title('NMF Transformation with Ground Truth Label')
plt.grid(True)
plt.colorbar()
plt.show()

#Metrics
print('Metrics post NMF \n')
print("Homogeneity: %0.4f" % metrics.homogeneity_score(dataset.target, vis_best_r))
print("Completeness: %0.4f" % metrics.completeness_score(dataset.target, vis_best_r))
print("V-measure: %0.4f" % metrics.v_measure_score(dataset.target, vis_best_r))
print("Adjusted Rand-Index: %.4f"
      % metrics.adjusted_rand_score(dataset.target, vis_best_r))
print("Adjusted Mutual Info score: %.4f"
      % metrics.adjusted_mutual_info_score(dataset.target, vis_best_r))
plt.matshow(confusion_matrix(dataset.target, vis_best_r),cmap=plt.cm.Blues)
plt.colorbar()
plt.grid(True)
plt.title('Contingency matrix after NMF normalization')
plt.show()

In [None]:
# The below code does log transform + Normalize
scaler = StandardScaler()
scal_matrix = scaler.fit_transform(nmf_log_matrix)
km = KMeans(n_clusters=2, init='k-means++', max_iter=1000, random_state = 0,n_init = 30)
vis_best_r = km.fit_predict(scal_matrix)

#Visualising the results 
plt.scatter(X_NMF_reduced[:,0],X_NMF_reduced[:,1],c = vis_best_r)
plt.xlabel('x axis')
plt.ylabel('y axis')
plt.title('NMF Tranform + Normal. with Cluster Label')
plt.grid(True)
plt.colorbar()
plt.show()

plt.scatter(X_NMF_reduced[:,0],X_NMF_reduced[:,1],c = dataset.target)
plt.xlabel('x axis')
plt.ylabel('y axis')
plt.title('NMF Tranform + Normal. with Ground Truth Label')
plt.grid(True)
plt.colorbar()
plt.show()

#Metrics
print('Metrics post NMF \n')
print("Homogeneity: %0.4f" % metrics.homogeneity_score(dataset.target, vis_best_r))
print("Completeness: %0.4f" % metrics.completeness_score(dataset.target, vis_best_r))
print("V-measure: %0.4f" % metrics.v_measure_score(dataset.target, vis_best_r))
print("Adjusted Rand-Index: %.4f"
      % metrics.adjusted_rand_score(dataset.target, vis_best_r))
print("Adjusted Mutual Info score: %.4f"
      % metrics.adjusted_mutual_info_score(dataset.target, vis_best_r))
plt.matshow(confusion_matrix(dataset.target, vis_best_r),cmap=plt.cm.Blues)
plt.colorbar()
plt.grid(True)
plt.title('Contingency matrix after NMF normalization')
plt.show()



In [None]:
#The below code does Normalize + log transform

scal_matrix -= scal_matrix.min()
nmf_log_matrix = np.log1p(scal_matrix)
km = KMeans(n_clusters=2, init='k-means++', max_iter=1000, random_state = 0,n_init = 30)
vis_best_r = km.fit_predict(nmf_log_matrix)

#Visualising the results 
plt.scatter(X_NMF_reduced[:,0],X_NMF_reduced[:,1],c = vis_best_r)
plt.xlabel('x axis')
plt.ylabel('y axis')
plt.title('NMF Normal + Transform Cluster Label')
plt.grid(True)
plt.colorbar()
plt.show()

#Visualising the results 
plt.scatter(X_NMF_reduced[:,0],X_NMF_reduced[:,1],c = dataset.target)
plt.xlabel('x axis')
plt.ylabel('y axis')
plt.title('NMF Normal + Transform Ground Truth Label')
plt.grid(True)
plt.colorbar()
plt.show()


#Metrics
print('Metrics post NMF \n')
print("Homogeneity: %0.4f" % metrics.homogeneity_score(dataset.target, vis_best_r))
print("Completeness: %0.4f" % metrics.completeness_score(dataset.target, vis_best_r))
print("V-measure: %0.4f" % metrics.v_measure_score(dataset.target, vis_best_r))
print("Adjusted Rand-Index: %.4f"
      % metrics.adjusted_rand_score(dataset.target, vis_best_r))
print("Adjusted Mutual Info score: %.4f"
      % metrics.adjusted_mutual_info_score(dataset.target, vis_best_r))
plt.matshow(confusion_matrix(dataset.target, vis_best_r),cmap=plt.cm.Blues)
plt.colorbar()
plt.grid(True)
plt.title('Contingency matrix after NMF normalization')
plt.show()



