In [2]:
import  torch
from    torch import nn
from    torch import optim
from    torch.nn import functional as F
import  numpy as np
from    data import load_data, preprocess_features, preprocess_adj, load_data_all
from    utils import masked_loss, masked_acc, sparse_dropout
import warnings
# import faiss

warnings.filterwarnings("ignore")

In [3]:
import  argparse
args = argparse.ArgumentParser()
args.add_argument('--dataset', default='cora')
args.add_argument('--model', default='gcn')
args.add_argument('--learning_rate', type=float, default=0.03)
args.add_argument('--epochs', type=int, default=2000)
args.add_argument('--hidden', type=int, default=32)
args.add_argument('--weight_decay', type=float, default=5e-4)
args.add_argument('--dropout', type=float, default=0.0)
args = args.parse_args(['--dataset', 'cora'])
print(args)

Namespace(dataset='cora', model='gcn', learning_rate=0.03, epochs=2000, hidden=32, weight_decay=0.0005, dropout=0.0)


In [5]:
import numpy as np
from scipy.cluster.vq import kmeans, vq
from scipy.stats import mode
import torch
from sklearn.cluster import KMeans

def run_kmeans(x, y, num_cluster, niter=100):
    print('performing kmeans clustering')
    
    # Run kmeans to find centroids
    # centroids, distortion = kmeans(x, num_cluster, iter=niter)
    
    # # Assign samples to the nearest centroids
    # cluster_assignments, _ = vq(x, centroids)
    kmeans = KMeans(n_clusters=num_cluster, random_state=0)
    cluster_assignments = kmeans.fit_predict(x)
    centroids = kmeans.cluster_centers_
    
    # Map each cluster to the most frequent class label and reorder centroids
    # 这里如果两个cluster的mode of lable 要是一致的话，后面的centroids会替换掉前面的
    reordered_centroids = np.zeros_like(centroids)
    for cluster in range(num_cluster):
        indices = np.where(cluster_assignments == cluster)[0]  # Indices of points in this cluster
        if len(indices) > 0:
            cluster_label = mode(y[indices]).mode[0]  # Most common label in the cluster
            if cluster_label < num_cluster:  # Ensure the label is within the range of cluster numbers
                reordered_centroids[cluster_label] = centroids[cluster]
    
    return reordered_centroids

In [6]:
def one_hot_encode(labels, num_classes):
    # Create a numpy array filled with zeros and of appropriate size
    one_hot = np.zeros((labels.shape[0], num_classes))
    # Use fancy indexing to place ones where the class label indicates
    one_hot[np.arange(labels.shape[0]), labels] = 1
    return one_hot


def get_mutual_information(p_1, p_2, p_12, eps=1e-10):
    # Add epsilon to avoid taking log(0)
    p_12 = p_12 + eps

    p1_p2 = np.outer(p_1, p_2)  # A x B
    p1_p2 = p1_p2 + eps  # Add epsilon to the denominator to prevent division by zero

    mi = np.sum(p_12 * np.log(p_12 / p1_p2))
    return mi

def get_clust_score(feat, centroids, beta=1.0):
    # compute cluster score for each feature
    # feat: N x D
    # centroids: K x D
    # return: N x K

    N = feat.shape[0]
    K = centroids.shape[0]
    score = np.zeros((N, K))
    feat_normalized = feat / np.linalg.norm(feat, axis=1, keepdims=True)  # Normalize features
    
    #这里的centroids 也应该normalize一下？
    
    for i in range(K):
        score[:, i] = np.linalg.norm(feat_normalized - centroids[i], axis=1) ** 2
    score = -beta * score
    score = np.exp(score)
    score /= np.sum(score, axis=1, keepdims=True)  # softmax
    return score


In [7]:
class Linear(nn.Module):
    def __init__(self, input_dim, output_dim, num_features_nonzero,
                 dropout=0.,
                 is_sparse_inputs=False,
                 bias=False,
                 activation=True,
                 featureless=False):
        super(Linear, self).__init__()
        self.dropout = dropout
        self.bias = bias

        self.activation = activation
        self.is_sparse_inputs = is_sparse_inputs
        self.featureless = featureless
        self.num_features_nonzero = num_features_nonzero

        self.weight = nn.Parameter(torch.randn(input_dim, output_dim))
        self.bias = None
        if bias:
            self.bias = nn.Parameter(torch.zeros(output_dim))


    def forward(self, x):

        if self.training and self.is_sparse_inputs:
            x = sparse_dropout(x, self.dropout, self.num_features_nonzero)
        elif self.training:
            x = F.dropout(x, self.dropout)

        # convolve
        if not self.featureless: # if it has features x
            if self.is_sparse_inputs:
                xw = torch.sparse.mm(x, self.weight)
            else:
                xw = torch.mm(x, self.weight)
        else:
            xw = self.weight

        out = xw
        if self.bias is not None:
            out += self.bias

        if self.activation:
            out = F.relu(out)
            
        return out
    
class GraphConvolution(nn.Module):
    def __init__(self, input_dim, output_dim, num_features_nonzero,
                 dropout=0.,
                 is_sparse_inputs=False,
                 bias=False,
                 activation=True,
                 featureless=False):
        super(GraphConvolution, self).__init__()
        self.dropout = dropout
        self.bias = bias

        self.activation = activation
        self.is_sparse_inputs = is_sparse_inputs
        self.featureless = featureless
        self.num_features_nonzero = num_features_nonzero

        self.weight = nn.Parameter(torch.randn(input_dim, output_dim))
        self.bias = None
        if bias:
            self.bias = nn.Parameter(torch.zeros(output_dim))


    def forward(self, x, support):

        if self.training and self.is_sparse_inputs:
            x = sparse_dropout(x, self.dropout, self.num_features_nonzero)
        elif self.training:
            x = F.dropout(x, self.dropout)

        # convolve
        if not self.featureless: # if it has features x
            if self.is_sparse_inputs:
                xw = torch.sparse.mm(x, self.weight)
            else:
                xw = torch.mm(x, self.weight)
        else:
            xw = self.weight

        out = torch.sparse.mm(support, xw)
        if self.bias is not None:
            out += self.bias

        if self.activation:
            out = F.relu(out)
            
        return out
    
class GCN_base(nn.Module):
    def __init__(self, input_dim, output_dim, num_features_nonzero, args):
        super(GCN_base, self).__init__()

        self.input_dim = input_dim # 1433
        self.output_dim = output_dim

        print('input dim:', input_dim)
        print('output dim:', output_dim)
        print('num_features_nonzero:', num_features_nonzero)
   
        self.gcn_1 = GraphConvolution(self.input_dim, args.hidden, num_features_nonzero,
                                    activation=True,
                                    dropout=args.dropout,
                                    is_sparse_inputs=True)

        self.gcn_2 = GraphConvolution(args.hidden, output_dim, num_features_nonzero,
                                    activation=True,
                                    dropout=args.dropout,
                                    is_sparse_inputs=False)   
        

    def forward(self, x, support):

        out = self.gcn_1(x, support)
        out = self.gcn_2(out, support)
        return out
    
    def l2_loss(self):
        loss = None
        for p in self.gcn_1.parameters():
            if loss is None:
                loss = p.pow(2).sum()
            else:
                loss += p.pow(2).sum()

        for p in self.gcn_2.parameters():
            if loss is None:
                loss = p.pow(2).sum()
            else:
                loss += p.pow(2).sum()

        return loss



In [20]:
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, y_all = load_data_all(args.dataset)
print('adj:', adj.shape)
print('features:', features.shape)
print('y:', y_train.shape, y_val.shape, y_test.shape)
print('mask:', train_mask.shape, val_mask.shape, test_mask.shape)

# D^-1@X
features = preprocess_features(features) # [49216, 2], [49216], [2708, 1433]
supports = preprocess_adj(adj)


adj: (2708, 2708)
features: (2708, 1433)
y: (2708, 7) (2708, 7) (2708, 7)
mask: (2708,) (2708,) (2708,)


In [22]:
device = torch.device('cpu')
train_label = torch.from_numpy(y_train).long().to(device)
num_classes = train_label.shape[1]
train_label = train_label.argmax(dim=1)
train_mask = torch.from_numpy(train_mask.astype(np.int64)).to(device)
val_label = torch.from_numpy(y_val).long().to(device)
val_label = val_label.argmax(dim=1)
val_mask = torch.from_numpy(val_mask.astype(np.int64)).to(device)
test_label = torch.from_numpy(y_test).long().to(device)
test_label = test_label.argmax(dim=1)
test_mask = torch.from_numpy(test_mask.astype(np.int64)).to(device)

i = torch.from_numpy(features[0]).long().to(device)
v = torch.from_numpy(features[1]).to(device)
feature = torch.sparse.FloatTensor(i.t(), v, features[2]).to(device)

i = torch.from_numpy(supports[0]).long().to(device)
v = torch.from_numpy(supports[1]).to(device)
support = torch.sparse.FloatTensor(i.t(), v, supports[2]).float().to(device)

In [23]:
feature

tensor(indices=tensor([[   0,    0,    0,  ..., 2707, 2707, 2707],
                       [1274, 1247, 1194,  ...,  329,  186,   19]]),
       values=tensor([0.1111, 0.1111, 0.1111,  ..., 0.0769, 0.0769, 0.0769]),
       size=(2708, 1433), nnz=49216, layout=torch.sparse_coo)

In [29]:

# load data
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, y_all = load_data_all(args.dataset)
print('adj:', adj.shape)
print('features:', features.shape)
print('y:', y_train.shape, y_val.shape, y_test.shape)
print('mask:', train_mask.shape, val_mask.shape, test_mask.shape)

# D^-1@X
features = preprocess_features(features) # [49216, 2], [49216], [2708, 1433]
supports = preprocess_adj(adj)

device = torch.device('cuda')
train_label = torch.from_numpy(y_train).long().to(device)
num_classes = train_label.shape[1]
train_label = train_label.argmax(dim=1)
train_mask = torch.from_numpy(train_mask.astype(np.int64)).to(device)
val_label = torch.from_numpy(y_val).long().to(device)
val_label = val_label.argmax(dim=1)
val_mask = torch.from_numpy(val_mask.astype(np.int64)).to(device)
test_label = torch.from_numpy(y_test).long().to(device)
test_label = test_label.argmax(dim=1)
test_mask = torch.from_numpy(test_mask.astype(np.int64)).to(device)

i = torch.from_numpy(features[0]).long().to(device)
v = torch.from_numpy(features[1]).to(device)
feature = torch.sparse.FloatTensor(i.t(), v, features[2]).to(device)

i = torch.from_numpy(supports[0]).long().to(device)
v = torch.from_numpy(supports[1]).to(device)
support = torch.sparse.FloatTensor(i.t(), v, supports[2]).float().to(device)

print('x :', feature)
print('sp:', support)
num_features_nonzero = feature._nnz()
feat_dim = feature.shape[1]

net = GCN_base(feat_dim, num_classes, num_features_nonzero, args)
net.to(device)
optimizer = optim.Adam(net.parameters(), lr=args.learning_rate)
net.train()
acc_test_history=[]

best_acc = 0

train_loss_hist = []
test_loss_hist = []
acc_hist = []
for epoch in range(args.epochs):
    net.train()
    out = net(feature, support)
    
    loss = masked_loss(out, train_label, train_mask)
    #这个loss为什么要+后面l2loss？
    loss_all = loss + args.weight_decay * net.l2_loss()
    # loss_all = loss
    optimizer.zero_grad()
    loss_all.backward()
    optimizer.step()

    net.eval()
    out = net(feature, support)
    test_loss = masked_loss(out, test_label, test_mask)
    acc_train = masked_acc(out, train_label, train_mask)
    acc_test = masked_acc(out, test_label, test_mask)

    if (epoch+1) % 10 == 0:
        print('Epoch:', epoch+1, 'train loss:', '%.6f' % loss.item(), 'test loss:', '%.6f' % test_loss.item(), 'train acc:', '%.4f' % acc_train.item(), 'test acc:', '%.4f' % acc_test.item())
    train_loss_hist.append(loss.item())
    test_loss_hist.append(test_loss.item())
    acc_hist.append(acc_test.item())
    acc_test_history.append(acc_test.item())
    acc_test_all = np.array(acc_test_history)
    if np.max(acc_test_all) > best_acc:
        best_acc = np.max(acc_test_all)        
        np.save('gcn_out_'+args.dataset+'.npy', out.cpu().data.numpy())
net.eval()
out = net(feature, support)
# out = out[0]
acc = masked_acc(out, test_label, test_mask)
acc_test_all = np.array(acc_test_history)
print('best test acc:', best_acc)




adj: (2708, 2708)
features: (2708, 1433)
y: (2708, 7) (2708, 7) (2708, 7)
mask: (2708,) (2708,) (2708,)
x : tensor(indices=tensor([[   0,    0,    0,  ..., 2707, 2707, 2707],
                       [1274, 1247, 1194,  ...,  329,  186,   19]]),
       values=tensor([0.1111, 0.1111, 0.1111,  ..., 0.0769, 0.0769, 0.0769]),
       device='cuda:0', size=(2708, 1433), nnz=49216, layout=torch.sparse_coo)
sp: tensor(indices=tensor([[   0,  633, 1862,  ..., 1473, 2706, 2707],
                       [   0,    0,    0,  ..., 2707, 2707, 2707]]),
       values=tensor([0.2500, 0.2500, 0.2236,  ..., 0.2000, 0.2000, 0.2000]),
       device='cuda:0', size=(2708, 2708), nnz=13264, layout=torch.sparse_coo)
input dim: 1433
output dim: 7
num_features_nonzero: 49216
Epoch: 10 train loss: 1.691896 test loss: 1.864222 train acc: 0.5071 test acc: 0.2650
Epoch: 20 train loss: 1.334045 test loss: 1.701272 train acc: 0.6500 test acc: 0.4210
Epoch: 30 train loss: 1.014300 test loss: 1.466798 train acc: 0.7643 tes

In [32]:
inter_feat = net(feature, support).cpu().data.numpy()
print('feat:', inter_feat.shape)

input_feat = feature.to_dense().cpu().data.numpy()
print('input feat:', input_feat.shape)

y_all_1d = y_all.argmax(axis=1)

inter_centroids = run_kmeans(inter_feat, y_all_1d, num_classes)
input_centroids = run_kmeans(input_feat, y_all_1d, num_classes)

inter_score = get_clust_score(inter_feat, inter_centroids)
input_score = get_clust_score(input_feat, input_centroids)

# inter_label = np.argmax(inter_score, axis=1)
# input_label = np.argmax(input_score, axis=1)
# inter_acc = np.sum(inter_label == y_all_1d) / len(y_all_1d)
# input_acc = np.sum(input_label == y_all_1d) / len(y_all_1d)
# print('inter acc:', inter_acc)
# print('input acc:', input_acc)

clust_score_input =  get_clust_score(input_feat, input_centroids)
p_in = np.sum(clust_score_input, axis=0)

clust_score_output = get_clust_score(inter_feat, inter_centroids)
p_out = np.sum(clust_score_output, axis=0)

one_hot_target = y_all
p_label = np.sum(one_hot_target, axis=0)

p_in_out = np.sum(np.matmul(clust_score_input[:, :, np.newaxis], 
                            clust_score_output[:, np.newaxis, :]), axis=0)

p_out_label = np.sum(np.matmul(clust_score_output[:, :, np.newaxis], 
                               one_hot_target[:, np.newaxis, :]), axis=0)

p_in = p_in / np.sum(p_in)
p_out = p_out / np.sum(p_out)
p_label = p_label / np.sum(p_label)
p_in_out = p_in_out / np.sum(p_in_out)
p_out_label = p_out_label / np.sum(p_out_label)

MI_in_out = get_mutual_information(p_in, p_out, p_in_out)
MI_out_label = get_mutual_information(p_out, p_label, p_out_label)
information_bottleneck = MI_in_out - MI_out_label
print('The MI_in_out is: ', MI_in_out.item())
print('The MI_out_label is: ', MI_out_label.item())
print('The information bottleneck is: ', information_bottleneck)


feat: (2708, 7)
input feat: (2708, 1433)
performing kmeans clustering
performing kmeans clustering
The MI_in_out is:  2.243115284471518e-05
The MI_out_label is:  0.4326593000668484
The information bottleneck is:  -0.43263686891400366


In [33]:
inter_feat = net.gcn_1(feature, support).cpu().data.numpy()
print('feat:', inter_feat.shape)

input_feat = feature.to_dense().cpu().data.numpy()
print('input feat:', input_feat.shape)

y_all_1d = y_all.argmax(axis=1)

inter_centroids = run_kmeans(inter_feat, y_all_1d, num_classes)
input_centroids = run_kmeans(input_feat, y_all_1d, num_classes)

inter_score = get_clust_score(inter_feat, inter_centroids)
input_score = get_clust_score(input_feat, input_centroids)

# inter_label = np.argmax(inter_score, axis=1)
# input_label = np.argmax(input_score, axis=1)
# inter_acc = np.sum(inter_label == y_all_1d) / len(y_all_1d)
# input_acc = np.sum(input_label == y_all_1d) / len(y_all_1d)
# print('inter acc:', inter_acc)
# print('input acc:', input_acc)

clust_score_input =  get_clust_score(input_feat, input_centroids)
p_in = np.sum(clust_score_input, axis=0)

clust_score_output = get_clust_score(inter_feat, inter_centroids)
p_out = np.sum(clust_score_output, axis=0)

one_hot_target = y_all
p_label = np.sum(one_hot_target, axis=0)

p_in_out = np.sum(np.matmul(clust_score_input[:, :, np.newaxis], 
                            clust_score_output[:, np.newaxis, :]), axis=0)

p_out_label = np.sum(np.matmul(clust_score_output[:, :, np.newaxis], 
                               one_hot_target[:, np.newaxis, :]), axis=0)

p_in = p_in / np.sum(p_in)
p_out = p_out / np.sum(p_out)
p_label = p_label / np.sum(p_label)
p_in_out = p_in_out / np.sum(p_in_out)
p_out_label = p_out_label / np.sum(p_out_label)

MI_in_out = get_mutual_information(p_in, p_out, p_in_out)
MI_out_label = get_mutual_information(p_out, p_label, p_out_label)
information_bottleneck = MI_in_out - MI_out_label
print('The MI_in_out is: ', MI_in_out.item())
print('The MI_out_label is: ', MI_out_label.item())
print('The information bottleneck is: ', information_bottleneck)


feat: (2708, 32)
input feat: (2708, 1433)
performing kmeans clustering
performing kmeans clustering
The MI_in_out is:  8.699037547742988e-07
The MI_out_label is:  0.009720624638259708
The information bottleneck is:  -0.009719754734504933
