# FateZ Clustering 

This notebook demonstrate how to implement clustering method with FateZ's representing method

In [1]:
import os
import torch
import numpy as np
from torch.utils.data import DataLoader
from pkg_resources import resource_filename
from sklearn import cluster
import fatez.test as test
import fatez.model as model

# Ignoring warnings because of using LazyLinear
import warnings
warnings.filterwarnings('ignore')

### Initialize testing model first.

In [2]:
faker = test.Faker()
testM = faker.test_full_model()
# model.Save(faker.test_gat(), '../data/ignore/gat.model')
# model.Save(testM, '../data/ignore/trainer.model')

Testing Full Model.

	Pre-Trainer Green.

	Fine-Tuner Green.

	Explainer Green.



### Prepare flatten data for clustering

In [3]:
dataset = faker.make_data_loader().dataset
for x, labels in DataLoader(dataset, batch_size = len(dataset)):
    all_fea_mat = x[0]
    all_adj_mat = x[1]

origin = [torch.reshape(ele, (-1,)).tolist() for ele in all_fea_mat]

# The encoded representaions made by GAT -> BERT encoder
encode = [
    torch.reshape(ele, (-1,)).tolist() for ele in testM.get_encoder_output(
        all_fea_mat, all_adj_mat
    )
]

print(labels)

tensor([0, 0, 0, 1, 1, 1, 1, 1, 1, 1])


### Set clustering models and fit models with original data

In [4]:
eps = 0.5
n_clusters = len(np.unique(labels))

dbscan = cluster.DBSCAN(eps = eps)
kmeans = cluster.KMeans(n_clusters = n_clusters)

dbscan.fit(origin)
kmeans.fit(origin)

# Get labels
print(dbscan.labels_.astype(int))
print(kmeans.labels_.astype(int))

[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
[1 0 0 0 0 0 0 0 1 0]


### Reset models and fit with encoded representaions

In [5]:
dbscan = cluster.DBSCAN(eps = eps)
kmeans = cluster.KMeans(n_clusters = n_clusters)
dbscan.fit(encode)
kmeans.fit(encode)

# Get labels
print(dbscan.labels_.astype(int))
print(kmeans.labels_.astype(int))

[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
[0 1 0 0 1 0 1 0 0 0]
