In [1]:
# Unnormalized spectral clustering example
# {a,b,c, d, e,f, g,h}

import numpy as np
from scipy.linalg import eigh
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

In [2]:
import numpy as np

names = ['a','b','c','d','e','f','g','h']

W = np.array([
    [0,1,0,0,0,0,0,0],  # a
    [1,0,0,0,0,0,0,0],  # b
    [0,0,0,1,0,0,0,0],  # c
    [0,0,1,0,0,0,0,0],  # d
    [0,0,0,0,0,1,0,0],  # e
    [0,0,0,0,1,0,0,0],  # f
    [0,0,0,0,0,0,0,1],  # g
    [0,0,0,0,0,0,1,0]   # h
])

print(W)


[[0 1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0]
 [0 0 1 0 0 0 0 0]
 [0 0 0 0 0 1 0 0]
 [0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 1 0]]


In [3]:
# Compute Degree matrix
D = np.diag(W.sum(axis=1))
print(D)


[[1 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0]
 [0 0 0 1 0 0 0 0]
 [0 0 0 0 1 0 0 0]
 [0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 1]]


In [4]:
# Compute the unnormalized Laplacian L
L = D - W
print(L)

[[ 1 -1  0  0  0  0  0  0]
 [-1  1  0  0  0  0  0  0]
 [ 0  0  1 -1  0  0  0  0]
 [ 0  0 -1  1  0  0  0  0]
 [ 0  0  0  0  1 -1  0  0]
 [ 0  0  0  0 -1  1  0  0]
 [ 0  0  0  0  0  0  1 -1]
 [ 0  0  0  0  0  0 -1  1]]


In [5]:

# Compute eigenvalues and eigenvectors of L
# eigh returns eigenvalues in ascending order
eigvals, eigvecs = eigh(L)

print("Eigen values: ", np.round(eigvals, 4))


Eigen values:  [0. 0. 0. 0. 2. 2. 2. 2.]


In [6]:
np.round(eigvecs, 3)

array([[ 0.707,  0.   ,  0.   ,  0.   , -0.707,  0.   ,  0.   ,  0.   ],
       [ 0.707,  0.   ,  0.   ,  0.   ,  0.707,  0.   ,  0.   ,  0.   ],
       [ 0.   ,  0.707,  0.   ,  0.   ,  0.   ,  0.   , -0.707,  0.   ],
       [ 0.   ,  0.707,  0.   ,  0.   ,  0.   ,  0.   ,  0.707,  0.   ],
       [ 0.   ,  0.   ,  0.707,  0.   ,  0.   , -0.707,  0.   ,  0.   ],
       [ 0.   ,  0.   ,  0.707,  0.   ,  0.   ,  0.707,  0.   ,  0.   ],
       [ 0.   ,  0.   ,  0.   ,  0.707,  0.   ,  0.   ,  0.   , -0.707],
       [ 0.   ,  0.   ,  0.   ,  0.707,  0.   ,  0.   ,  0.   ,  0.707]])

## Eigen Cluster

In [7]:
import numpy as np

# Number of nodes
n = 6

# Initialize adjacency matrix
A = np.zeros((n, n))

# Group 1: nodes 0,1,2 strongly connected (weight 1)
edges_group1 = [(0, 1), (1, 0), (0, 2), (2, 0), (1, 2), (2, 1)]
for i, j in edges_group1:
    A[i, j] = 1

# Group 2: nodes 3,4,5 strongly connected (weight 1)
edges_group2 = [(3, 4), (4, 3), (3, 5), (5, 3), (4, 5), (5, 4)]
for i, j in edges_group2:
    A[i, j] = 1

# Weak connection between groups (weight 0.1)
A[2, 3] = 0.1
A[3, 2] = 0.1
A[4, 2] = 0.1
A[2, 4] = 0.1


# Degree matrix
D = np.diag(np.sum(A, axis=1))

# Graph Laplacian
L = D - A

print("Adjacency matrix A:\n", A)
print("\nDegree matrix D:\n", D)
print("\nGraph Laplacian L:\n", L)


Adjacency matrix A:
 [[0.  1.  1.  0.  0.  0. ]
 [1.  0.  1.  0.  0.  0. ]
 [1.  1.  0.  0.1 0.1 0. ]
 [0.  0.  0.1 0.  1.  1. ]
 [0.  0.  0.1 1.  0.  1. ]
 [0.  0.  0.  1.  1.  0. ]]

Degree matrix D:
 [[2.  0.  0.  0.  0.  0. ]
 [0.  2.  0.  0.  0.  0. ]
 [0.  0.  2.2 0.  0.  0. ]
 [0.  0.  0.  2.1 0.  0. ]
 [0.  0.  0.  0.  2.1 0. ]
 [0.  0.  0.  0.  0.  2. ]]

Graph Laplacian L:
 [[ 2.  -1.  -1.   0.   0.   0. ]
 [-1.   2.  -1.   0.   0.   0. ]
 [-1.  -1.   2.2 -0.1 -0.1  0. ]
 [ 0.   0.  -0.1  2.1 -1.  -1. ]
 [ 0.   0.  -0.1 -1.   2.1 -1. ]
 [ 0.   0.   0.  -1.  -1.   2. ]]


In [8]:
eigvals, eigvecs = eigh(L)

print("Eigen values: ", np.round(eigvals, 4))


Eigen values:  [0.    0.126 3.    3.    3.1   3.174]


In [9]:
np.round(eigvecs,3)

array([[-0.408,  0.426,  0.73 ,  0.   ,  0.   ,  0.345],
       [-0.408,  0.426, -0.639, -0.354, -0.   ,  0.345],
       [-0.408,  0.372, -0.091,  0.354,  0.   , -0.749],
       [-0.408, -0.399, -0.091,  0.354,  0.707,  0.202],
       [-0.408, -0.399, -0.091,  0.354, -0.707,  0.202],
       [-0.408, -0.426,  0.183, -0.707, -0.   , -0.345]])

In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer
corpus = [
    'This is the first document.',
    'This document is the second document.',
    'And this is the third one.',
    'Is this the first document?',
]
# Create the TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Fit the vectorizer and transform the corpus
tfidf_matrix = vectorizer.fit_transform(corpus)

# Get feature names (words)
feature_names = vectorizer.get_feature_names_out()

# Convert to dense array for display
tfidf_dense = tfidf_matrix.toarray()

print("Feature names:\n", feature_names)
print("\nTF-IDF matrix:\n", np.round(tfidf_dense,2))

Feature names:
 ['and' 'document' 'first' 'is' 'one' 'second' 'the' 'third' 'this']

TF-IDF matrix:
 [[0.   0.47 0.58 0.38 0.   0.   0.38 0.   0.38]
 [0.   0.69 0.   0.28 0.   0.54 0.28 0.   0.28]
 [0.51 0.   0.   0.27 0.51 0.   0.27 0.51 0.27]
 [0.   0.47 0.58 0.38 0.   0.   0.38 0.   0.38]]
