In [1]:
import torch
print(torch.__version__)
print(torch.version.cuda)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
import warnings
warnings.filterwarnings('ignore')

2.1.2+cu121
12.1


In [2]:
import torch
import matplotlib.pyplot as plt
import numpy as np

import torch_geometric
from torch_geometric.data import Data
from torch_geometric.datasets import Planetoid
from torch_geometric.utils import to_dense_adj
import torch.nn.functional as F
import pandas as pd

import os
from sklearn import decomposition
from sklearn.manifold import TSNE
import pandas as pd

from torch_geometric.nn import GCNConv
from torch.nn import Linear
from torch_geometric.nn import GATConv
from sklearn.decomposition import PCA
import numpy as np

import random
import networkx as nx
from torch_geometric.utils import from_scipy_sparse_matrix, coalesce,remove_self_loops, to_networkx, from_networkx
import scipy.sparse as sp

In [3]:
class GCN(torch.nn.Module):
    def __init__(self, input_feature, nclasses):
        super().__init__()
        
        self.input_feature = input_feature
        self.nclasses = nclasses
        #define layers
        self.conv1 = GCNConv(in_channels=self.input_feature,out_channels=1024)
        self.conv2 = GCNConv(in_channels=1024, out_channels=512)
        self.conv3 = GCNConv(in_channels= 512, out_channels=512)
        self.classifier = Linear(512,self.nclasses)

    def forward(self,x, edge_idx):
        a = self.conv1(x,edge_idx)
        a= a.tanh()
        a = self.conv2(a,edge_idx)
        a = a.tanh()
        a = self.conv3(a,edge_idx)
        a= a.tanh()
        out = self.classifier(a)

        return out,a

In [4]:
def mask_to_index(index, size):
    all_idx = np.arange(size)
    return all_idx[index]

def index_to_mask(index, size):
    mask = torch.zeros((size, ), dtype=torch.bool)
    mask[index] = 1
    return mask

In [5]:
class arguments():
    def __init__(self, seed = 5):
        self.seed = seed


In [12]:
def randompoints(data, count):
    idx_train = mask_to_index(data.train_mask, len(data.train_mask))
    label_train = data.y[data.train_mask]
    class_labels = {}
    for i in range(data.num_classes):
        class_labels[i] = idx_train[label_train==i]   # for each class a list of idx of training set only
#     count = 5 # numbers to be selected per class
    selected_idx = []
    selected_dict = {}
    labels_out = []
    for class_id, idx in class_labels.items():
        selected = np.random.permutation(idx)
        print("class_id: ", class_id, " selected: ", selected[:count])
        selected_dict[class_id] = selected[:count]
        selected_idx.extend(selected[:count])
        labels_out.extend([class_id]*count)

    return selected_idx, selected_dict, labels_out
    

In [7]:
def create_idx(labels_out):
    
    new_idx_dict = {}
    for idx,val in enumerate(labels_out):
        if val not in new_idx_dict:
            new_idx_dict[val] = [idx]
        else:
            new_idx_dict[val].append(idx)


    new_edge_set = set()

    for class_id, nodes in new_idx_dict.items():

        # Generate all possible combinations of nodes within the same class
        combinations = [(i, j) for i in nodes for j in nodes if i != j]

        # Add unique combinations to the set
        new_edge_set.update(combinations)

    new_edge_2 = torch.tensor(list(new_edge_set)).t().contiguous()
    return new_edge_2

In [34]:
class train_d():
    def __init__(self, model, data, test_data):
        self.model = model
        self.data = data
        self.test_data = test_data
        self.criterion = torch.nn.CrossEntropyLoss()  # Define loss criterion.
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.00012)  # Define optimizer.
        
        self.embed = 0
        self.best_accuracy=0
        
        

    def train(self, data):
        self.optimizer.zero_grad()  # Clear gradients.
        out, h = self.model.forward(data.x, data.edge_index)  # Perform a single forward pass.

        loss = self.criterion(out[data.train_mask], data.y[data.train_mask])  # Compute the loss solely based on the training nodes.

        loss.backward()  # Derive gradients.
        self.optimizer.step()  # Update parameters based on gradients.
        return loss, h, out
    

    def train_model(self, epo = 400):
        data = self.data
        print('Training Started...')
        train_accuracy = []
        test_accuracy = []
        for epoch in range(1,epo):
            loss, h, out = self.train(data)
#             loss_t+=[loss]

            if epoch % 5 == 0:
                #visualize_embedding(h, color=data.y, epoch=epoch, loss=loss)
#                 ep.append(epoch)

                with torch.no_grad():

                    train_accuracy.append(((torch.argmax(out[data.train_mask], axis=1) == data.y[data.train_mask]).sum()/len(data.y[data.train_mask])).item()*100)
                    
                    test_acc = self.test()   # test with the original test data
                    test_accuracy.append(test_acc)
                    if test_acc> self.best_accuracy:
                        self.best_accuracy = test_acc
                        print("Saving Model at acc: ", self.best_accuracy)
                        torch.save(self.model.state_dict(), f'Best_model.pt')

                    print(f'Epoch : {epoch:.2f}, Training Accuracy: {train_accuracy[-1]:.2f}, Testing Accuracy: {test_accuracy[-1]:.2f}')


        print('Training Finished!')
        print("Best accuracy: ", self.best_accuracy )
        with torch.no_grad():
            self.model.load_state_dict(torch.load('Best_model.pt'))
    def test(self):
        data = self.test_data
        with torch.no_grad():
            out, h = self.model.forward(data.x, data.edge_index)
            acc = ((torch.argmax(out[data.test_mask], axis=1) == data.y[data.test_mask]).sum()/len(data.y[data.test_mask])).item()*100
            return acc
                    
            
        

In [44]:
# set the seed
args = arguments(105)

random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)

# select the data
dataname = "cora"
data = Planetoid(root='data', name= dataname)

# reduction rate
count = 10

# select random points from dataset
selected_idx, selected_dict, labels_out = randompoints(data, count)  # labels out is labels for selected_dict

# select the features of the selected points
newx = data.x[selected_idx]

# create edges amongst the same class of nodes
new_edges = create_idx(labels_out)

# build two data instances - with and without Adjacency information
labels_new = torch.tensor(labels_out)
empty_adj = torch.empty((2, 0), dtype=torch.int64) # need this for the first pass
data_mix = Data(x=newx, edge_index=new_edges, y=labels_new, train_mask=[True]*len(labels_new)) #new_edge_2
data_mix_no_adj = Data(x=newx, edge_index=empty_adj, y=labels_new, train_mask=[True]*len(labels_new))


class_id:  0  selected:  [10 65 80 35 60 74 61 79 11 98]
class_id:  1  selected:  [ 18 126 102 133 138  36 134 136 135 121]
class_id:  2  selected:  [101  90  58  46  53  16 111  73 105  71]
class_id:  3  selected:  [25 13 30 19 27 57 14 17  0  4]
class_id:  4  selected:  [55 12  2  1 22 91 38 75 82 33]
class_id:  5  selected:  [ 67 114 119  47  37  70 115 130  20 120]
class_id:  6  selected:  [ 87  93  96  42  56  26  99 127  92 108]


In [45]:
data_mix

Data(x=[70, 1433], edge_index=[2, 630], y=[70], train_mask=[70])

In [46]:
model = GCN(data.num_features,data.num_classes)
exp2 = train_d(model,data_mix_no_adj,data[0])
exp2.train_model()
exp2.data = data_mix
exp2.train_model()

# with adj

Training Started...
Saving Model at acc:  41.999998688697815
Epoch : 5.00, Training Accuracy: 91.43, Testing Accuracy: 42.00
Saving Model at acc:  66.29999876022339
Epoch : 10.00, Training Accuracy: 98.57, Testing Accuracy: 66.30
Saving Model at acc:  73.90000224113464
Epoch : 15.00, Training Accuracy: 98.57, Testing Accuracy: 73.90
Saving Model at acc:  75.3000020980835
Epoch : 20.00, Training Accuracy: 98.57, Testing Accuracy: 75.30
Epoch : 25.00, Training Accuracy: 100.00, Testing Accuracy: 74.90
Epoch : 30.00, Training Accuracy: 100.00, Testing Accuracy: 75.20
Epoch : 35.00, Training Accuracy: 100.00, Testing Accuracy: 74.70
Epoch : 40.00, Training Accuracy: 100.00, Testing Accuracy: 74.80
Epoch : 45.00, Training Accuracy: 100.00, Testing Accuracy: 74.60
Epoch : 50.00, Training Accuracy: 100.00, Testing Accuracy: 74.40
Epoch : 55.00, Training Accuracy: 100.00, Testing Accuracy: 73.70
Epoch : 60.00, Training Accuracy: 100.00, Testing Accuracy: 72.80
Epoch : 65.00, Training Accuracy:

Epoch : 210.00, Training Accuracy: 100.00, Testing Accuracy: 73.10
Epoch : 215.00, Training Accuracy: 100.00, Testing Accuracy: 73.10
Epoch : 220.00, Training Accuracy: 100.00, Testing Accuracy: 73.10
Epoch : 225.00, Training Accuracy: 100.00, Testing Accuracy: 73.10
Epoch : 230.00, Training Accuracy: 100.00, Testing Accuracy: 73.10
Epoch : 235.00, Training Accuracy: 100.00, Testing Accuracy: 73.10
Epoch : 240.00, Training Accuracy: 100.00, Testing Accuracy: 73.10
Epoch : 245.00, Training Accuracy: 100.00, Testing Accuracy: 73.10
Epoch : 250.00, Training Accuracy: 100.00, Testing Accuracy: 73.10
Epoch : 255.00, Training Accuracy: 100.00, Testing Accuracy: 73.10
Epoch : 260.00, Training Accuracy: 100.00, Testing Accuracy: 73.10
Epoch : 265.00, Training Accuracy: 100.00, Testing Accuracy: 73.10
Epoch : 270.00, Training Accuracy: 100.00, Testing Accuracy: 73.10
Epoch : 275.00, Training Accuracy: 100.00, Testing Accuracy: 73.10
Epoch : 280.00, Training Accuracy: 100.00, Testing Accuracy: 7

Training Started...
Saving Model at acc:  71.29999995231628
Epoch : 5.00, Training Accuracy: 100.00, Testing Accuracy: 71.30
Saving Model at acc:  71.60000205039978
Epoch : 10.00, Training Accuracy: 100.00, Testing Accuracy: 71.60
Epoch : 15.00, Training Accuracy: 100.00, Testing Accuracy: 71.50
Epoch : 20.00, Training Accuracy: 100.00, Testing Accuracy: 71.60
Epoch : 25.00, Training Accuracy: 100.00, Testing Accuracy: 71.50
Epoch : 30.00, Training Accuracy: 100.00, Testing Accuracy: 71.30
Epoch : 35.00, Training Accuracy: 100.00, Testing Accuracy: 71.60
Saving Model at acc:  71.70000076293945
Epoch : 40.00, Training Accuracy: 100.00, Testing Accuracy: 71.70
Epoch : 45.00, Training Accuracy: 100.00, Testing Accuracy: 71.70
Epoch : 50.00, Training Accuracy: 100.00, Testing Accuracy: 71.50
Epoch : 55.00, Training Accuracy: 100.00, Testing Accuracy: 71.50
Epoch : 60.00, Training Accuracy: 100.00, Testing Accuracy: 71.40
Epoch : 65.00, Training Accuracy: 100.00, Testing Accuracy: 71.40
Epo