# Spectral Clustering in scikit-learn

In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
%matplotlib inline
from sklearn.datasets import make_moons
from sklearn.cluster import SpectralClustering, KMeans

## Affinity data

In [15]:
fin = open('clus3.dat',"r")
n3 = np.zeros((16,16), dtype=int)
for line in fin:
    a,b = line.split()
    i = int(a)-1
    j = int(b)-1
    n3[i,j] = 1
    n3[j,i] = 1
n3

array([[0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       [1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1],
       [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0]])

## Feature Vector Data
### Moons Synthetic Dataset
1,000 samples, 2 classes.

In [16]:
moons_data = make_moons(n_samples=1000, noise=0.07, random_state=0)
moons = pd.DataFrame(data=moons_data[0], columns=['x', 'y'])
moons['label_truth'] = moons_data[1]

moons.plot(
    kind='scatter',
    x='x',
    y='y',
    figsize=(6,4),
    s=10,
    alpha=0.7
);

### Clustering using *k*-Means

In [17]:
km = KMeans(n_clusters = 2, n_init = 10, random_state=1)
km.fit(moons[['x', 'y']])

In [18]:
cm = {0 : 'g', 1 : 'pink'}
#cm = {0 : 'silver', 1 : 'black'}

label_color = [cm[l] for l in km.labels_]

moons.plot(
    kind='scatter',
    x='x',
    y='y',
    figsize=(6,4),
    s=10,
    c=label_color
)

<Axes: xlabel='x', ylabel='y'>

### Clustering using Spectral Clustering

In [19]:
sclust = SpectralClustering(
    n_clusters=2,
    affinity='nearest_neighbors',
    n_neighbors=7,
)

sclust.fit(moons[['x', 'y']]);
cm = {0 : 'r', 1 : 'b'}
label_color = [cm[l] for l in sclust.labels_]

moons.plot(
    kind='scatter',
    x='x',
    y='y',
    figsize=(6,4),
    s=10,
    c=label_color
)

<Axes: xlabel='x', ylabel='y'>

In [20]:
moons

Unnamed: 0,x,y,label_truth
0,2.029687,0.501357,1
1,1.695055,-0.174127,1
2,-0.274894,0.820801,0
3,-0.052078,0.177802,1
4,0.494296,-0.394137,1
...,...,...,...
995,1.537826,-0.202705,1
996,0.325163,0.951352,0
997,0.330385,-0.369431,1
998,0.109319,0.110873,1


## Harry Potter Data

In [21]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
TT_df = pd.read_csv('HarryPotterTT.csv')
TT_df.head()

Unnamed: 0,Name,Magic,Cunning,Courage,Wisdom,Temper
0,'Harry Potter',62,21,42,26,7
1,'Hermione Granger',60,16,40,73,2
2,'Ron Weasley',45,14,40,22,4
3,'Prof. Dumbledore',105,24,39,82,0
4,'Prof. Snape',85,24,19,71,7


In [22]:
features = [' Magic', ' Cunning', ' Courage', ' Wisdom', ' Temper']
X = TT_df[features].values
X_scal = StandardScaler().fit_transform(X)
X.shape

(22, 5)

In [23]:
sclust = SpectralClustering(
    n_clusters=4,
    random_state=42,
    affinity= 'nearest_neighbors', #'rbf', #
    n_neighbors=7,
)
sclust.fit(X_scal);

In [24]:
TT_df['Group'] = sclust.labels_

In [25]:
TT_df.sort_values('Group')

Unnamed: 0,Name,Magic,Cunning,Courage,Wisdom,Temper,Group
1,'Hermione Granger',60,16,40,73,2,0
18,'Cedric Diggory',58,23,40,55,2,0
3,'Prof. Dumbledore',105,24,39,82,0,0
4,'Prof. Snape',85,24,19,71,7,0
5,'Prof. McGonagail',95,19,29,76,5,0
6,'Prof. Moody',82,20,35,69,5,0
10,'Arthur Weasley',62,5,29,60,2,1
17,'Cho Chang',40,8,25,31,3,1
15,'Parvati Patil',24,11,23,15,2,1
14,'Padma Patil',24,9,23,13,1,1


In [26]:
for i in range(22):
    for j in range(22):
        print(sclust.affinity_matrix_[i,j], end = ' ')
    print()

1.0 0.0 1.0 0.0 0.5 0.0 0.5 0.0 1.0 0.5 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.5 1.0 0.0 0.0 
0.0 1.0 0.5 0.5 0.0 1.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.5 0.0 1.0 0.0 0.0 0.0 
1.0 0.5 1.0 0.0 0.0 0.0 0.0 0.5 1.0 1.0 0.0 0.0 0.0 0.0 0.5 0.5 1.0 1.0 0.5 0.5 1.0 0.0 
0.0 0.5 0.0 1.0 0.5 1.0 0.5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.5 0.0 1.0 0.0 0.0 0.0 
0.5 0.0 0.0 0.5 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.5 0.0 0.0 0.0 0.5 0.0 0.0 0.5 0.0 1.0 
0.0 1.0 0.0 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.5 0.0 0.0 0.5 
0.5 1.0 0.0 0.5 1.0 1.0 1.0 0.0 0.5 0.5 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 1.0 1.0 0.0 0.5 
0.0 0.0 0.5 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.5 1.0 0.5 1.0 0.0 0.5 0.0 0.0 1.0 0.0 
1.0 0.0 1.0 0.0 0.0 0.0 0.5 0.0 1.0 1.0 0.5 0.0 0.0 0.0 0.0 0.5 0.5 1.0 0.0 0.0 0.0 0.0 
0.5 0.0 1.0 0.0 0.0 0.0 0.5 0.0 1.0 1.0 0.5 0.0 0.0 0.0 0.0 0.0 0.5 0.5 0.0 0.5 0.0 0.0 
0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.5 0.5 1.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 
0.0 0.0 0.0 0.0 0.0 0