In [35]:
import torch
import numpy as np
from torch import nn 
import torch.nn.functional as F 
from collections import OrderedDict
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import numpy as np
import cv2

In [61]:
import numpy as np
import argparse
from tqdm import tqdm 
from sklearn.datasets import load_wine
from sklearn import preprocessing
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.cluster import normalized_mutual_info_score
from torch import nn,optim
import torch

In [51]:
class GraphEncoder(nn.Module):
    def __init__(self,layers,clusters):
        super(GraphEncoder,self).__init__()
        
        self.layers = nn.Sequential(OrderedDict({
            "lin1": nn.Linear(layers[0],layers[1]),
            "sig1": nn.Sigmoid(),
            "lin2": nn.Linear(layers[1],layers[2]),
            "sig2": nn.Sigmoid(),
            "lin3": nn.Linear(layers[2],layers[3]),
            "sig3": nn.Sigmoid(),
            "lin4": nn.Linear(layers[3],layers[4]),
            "sig4": nn.Sigmoid(),
        }))
        
        
        self.clusters = clusters
        self.outputs = {}
        
        self.layers[0].register_forward_hook(self.get_activation("lin1"))
        self.layers[2].register_forward_hook(self.get_activation("lin2"))
        self.layers[4].register_forward_hook(self.get_activation("lin3"))
        
    def get_activation(self,name):
        def hook(module,input,output):
            self.outputs[name] = output
        return hook
    
    def forward(self,X):
        output = self.layers(X)
        return output
    
    def layer_activations(self,layername):
       # print(torch.sigmoid(self.outputs[layername]).shape)
        return torch.mean(torch.sigmoid(self.outputs[layername]),dim=0)
    
    def sparse_result(self,rho,layername):
        rho_hat = self.layer_activations(layername)
        return rho * np.log(rho) - rho * torch.log(rho_hat) + (1 - rho) * np.log(1 - rho) \
                - (1 - rho) * torch.log(1 - rho_hat)
    
    def kl_div(self,rho):
        first = torch.mean(self.sparse_result(rho,"lin1"))
        second = torch.mean(self.sparse_result(rho,"lin2"))
        return first + second
    
    def get_index_by_name(self,name):
        return list(dict(self.layers.named_children()).keys()).index(name)
    
    def loss(self,x_hat,x,beta,rho):
        loss = F.mse_loss(x_hat,x) + beta*self.kl_div(rho)
        return loss 
    
    def get_cluster(self):
        kmeans = KMeans(n_clusters = self.clusters).fit(self.outputs["lin2"].detach().cpu().numpy())
        self.centroids = kmeans.cluster_centers_
        return kmeans.labels_
    
        
                

In [90]:
class SAE(nn.Module):
    def __init__(self,layers):
        super(SAE,self).__init__()
        self.layers = layers
        
        self.net = nn.Sequential(OrderedDict({
            "lin1": nn.Linear(layers[0],layers[1]),
            "sig1":nn.Sigmoid(),
            "lin2": nn.Linear(layers[1],layers[2]),
            "sig2":nn.Sigmoid()
        }))
        
        self.outputs = {}
        
        self.net[0].register_forward_hook(self.get_activation("lin1"))
        
        
    def get_activation(self,name):
        def hook(module,input,output):
            self.outputs[name] = output
        return hook

    def forward(self,X):
        output = self.net(X)
        return output


    def layer_activations(self,layername):
        return torch.mean(torch.sigmoid(self.outputs[layername]),dim=0)

    def sparse_result(self,rho,layername):
        rho_hat = self.layer_activations(layername)
        return rho * np.log(rho) - rho * torch.log(rho_hat) + (1 - rho) * np.log(1 - rho) \
            - (1 - rho) * torch.log(1 - rho_hat)

    def kl_div(self,rho):
        return torch.mean(self.sparse_result(rho,"lin1"))

    def loss(self,x_hat,x,rho,beta):
        loss = F.mse_loss(x_hat,x) + beta*self.kl_div(rho)
        return loss
                    

In [47]:
args = {
    "dataset": "wine",
    "layers":[128,64,128],
    "beta": 0.01,
    "rho":0.5,
    "lr": 0.01,
    "epoch": 200,
    "device":"gpu"
}

device = torch.device("cuda" if args["device"] == "gpu" else "cpu")

                      

In [62]:
def main():
    if args["dataset"].lower() == "wine":
        data = load_wine()
    else:
        raise Exception("Invalid Dataset")
    
    X  = data.data
    y = data.target
    k = len(np.unique(y))
    
    min_max_scaler = preprocessing.MinMaxScaler()
    X = min_max_scaler.fit_transform(X)
    
    S = cosine_similarity(X,X)
    
    D  = np.diag(1.0/np.sqrt(S.sum(axis=1)))
    
    X_train = torch.tensor(D.dot(S).dot(D)).float().to(device)
    
    layers = [len(X_train)] + args["layers"] + [len(X_train)]
    
    model = GraphEncoder(layers,k).to(device)
    optimizer  = optim.Adam(model.parameters(),lr=args["lr"])
    
    with tqdm(total= args["epoch"]) as tq:
        for epoch in range(1,args["epoch"] +1):
            optimizer.zero_grad()
            X_hat = model(X_train)
            loss = model.loss(X_hat,X_train, args["beta"],args["rho"])
            nmi = normalized_mutual_info_score(model.get_cluster(),y,average_method="arithmetic")
            loss.backward()
            optimizer.step()
            
            tq.set_postfix(loss=loss,nmi ="{:.3f}".format(nmi))
            tq.update()
        print(model.get_cluster())
    return model

if __name__ == "__main__":
    model = main()

100%|██████████| 200/200 [00:06<00:00, 30.69it/s, loss=tensor(3.1616e-05, device='cuda:0', grad_fn=<AddBackward0>), nmi=0.491]

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 2 2 1 0 2 0 0 2 0 1 2 0 0 0 0
 2 0 0 0 2 0 2 0 0 1 2 0 0 0 0 0 0 0 1 0 2 2 2 0 2 0 0 0 0 0 0 1 0 0 0 0 2
 0 0 0 0 2 0 0 1 0 0 0 0 2 2 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]





In [63]:
data = load_wine()
X = data.data
y = data.target
k = len(np.unique(y))
model = GraphEncoder([178,128,64,128,178],k).to(device)
sae  = SAE([178,128,178]).to(device)


In [64]:
X_train = cosine_similarity(X,X)
X_train = torch.Tensor(X_train).to(device)
output = model(X_train)
out = sae(X_train)


torch.Size([178, 128])

In [50]:
model.layer_activations("lin1").shape

torch.Size([178, 128])


torch.Size([128])

In [81]:
hyper = {
    "beta": 0.01,
    "rho":0.5,
    "lr": 0.01,
    "epoch": 200,

}

data = load_wine()
X = data.data
y = data.target
k = len(np.unique(y))
min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)
S = cosine_similarity(X,X)
D  = np.diag(1.0/np.sqrt(S.sum(axis=1)))
X_train = torch.tensor(D.dot(S).dot(D)).float().to(device)

layers = [128,64,32]
for layer in layers:
    model = sae(layer)
    with tqdm(total=hyper["epoch"]) as tq:
        for epoch in range(1,range(hyper["epoch"]+1)):
            x_hat = model(X_train)
            loss = model.loss()
    

AttributeError: 'int' object has no attribute 'dim'

In [95]:
hyper = {
    "beta": 0.01,
    "rho":0.5,
    "lr": 0.01,
    "epoch": 200,

}

data = load_wine()
X = data.data
y = data.target
k = len(np.unique(y))
min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)
S = cosine_similarity(X,X)
D  = np.diag(1.0/np.sqrt(S.sum(axis=1)))
X_train = torch.tensor(D.dot(S).dot(D)).float().to(device)



saet = SAE([178,128,178]).to(device)
optimizer = optim.Adam(saet.parameters(),lr=hyper["lr"])
with tqdm(total=hyper["epoch"]) as tq:
        for epoch in range(1,hyper["epoch"]+1):
            optimizer.zero_grad()
            x_hat = saet(X_train)
            loss = saet.loss(x_hat,X_train,hyper["rho"],hyper["beta"])
            loss.backward()
            optimizer.step()
            tq.set_postfix(loss=loss)
            tq.update()

            
            
    

100%|██████████| 200/200 [00:00<00:00, 307.04it/s, loss=tensor(2.4287e-05, device='cuda:0', grad_fn=<AddBackward0>)]


In [91]:
output = saet(X_train)


loss = saet.loss(output,X_train,hyper["rho"],hyper["beta"])


In [93]:
loss