In [2]:
import networkx as nx
import numpy as np
import torch
from datetime import datetime
import os
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
import torch_geometric.nn as geom_nn
import matplotlib.pyplot as plt
from icecream import ic
import sys

ic.configureOutput("debug | -> ")

import pandas as pd
import torch_geometric.transforms as T

sys.path.append("Models/")
from models import GNN

sys.path.append("FastCover/")

from utils import *


In [30]:
PATH_TO_TRAIN = "BRKGA/instances/Erdos/train/"
#PATH_TO_TRAIN = "BRKGA/instances/scalefree/train/"
epochs = 30
seed = 22

#v.g. python TrainModels.py -pi "../BRKGA/instances/Erdos/train/" -ps "runs/Erdos/" -MDH 0 -s 13 -e 31 -pv ""

PATH_SAVE_TRAINS = 'Models/runs/Erdos/'
#PATH_TO_TRAIN = "../BRKGA/instances/Erdos/train/"

num_features = 4 # Change if needed
    
num_classes = 2

threshold = 0.5

optimizer_name = "Adam"
lr = 5e-4

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

layers = ["GCN", "GAT","GraphConv", "SAGE"]
#layers = ["SAGE"]
    
Models = [GNN(num_features, num_classes, name_layer = layer_name) for 
         layer_name in layers]




In [31]:
Instances = [graph for graph in os.listdir(PATH_TO_TRAIN + 'txt')]

graphs = []
for er in Instances:
    graph = igraph.Graph.Read_Edgelist(PATH_TO_TRAIN+"txt/"+er, directed = False)
    graphs.append(graph.to_networkx())    

OptInstances = [graph for graph in os.listdir(PATH_TO_TRAIN+'optimal')]
Solutions = []
for er in OptInstances:
    opt = []
    with open(PATH_TO_TRAIN+'optimal/'+er) as f:
        for line in f.readlines():
            opt.append(int(line.replace("\n", "")))
    Solutions.append(opt)   


print("\nCargando Features...\n")
graphFeatures = [feat for feat in os.listdir(PATH_TO_TRAIN+'feats')]
Features = []
for er in graphFeatures:
    temp = []
    try:
        with open(PATH_TO_TRAIN+'feats/'+er) as f:
            c = 0

            for line in f.readlines()[1:]:
                c+=1
                feats = np.array(line.split(","), dtype = float)
                temp.append(feats)
        temp = np.array(temp)
        #temp = np.delete(temp, 2, 1)
        Features.append(temp)
    except:
        print(er)
        print(line)
        print(c)
    
Graphs_Train = Convert2DataSet(graphs, Solutions, feats = Features)

num_features = Graphs_Train[0].num_features
num_classes = Graphs_Train[0].num_classes


Cargando Features...



In [32]:
import torch.nn as nn
import torch_geometric.nn as geom_nn
import torch.nn.functional as F
import torch
from torch_geometric.nn import Linear

class GNN(torch.nn.Module):
    def __init__(self, num_node_features, num_classes, name_layer = "SAGE"):
        super().__init__()
        self.name = name_layer
        layer = None
        hidden_feats = 64
        
        if name_layer == "SAGE":
            layer = geom_nn.SAGEConv
            
            self.conv1 = layer(num_node_features, hidden_feats)
            self.conv3 = Linear(hidden_feats, num_classes)
            
        elif name_layer == "GAT":
            layer = geom_nn.GATConv
            
            self.conv1 = layer(num_node_features, hidden_feats)
            self.conv3 = Linear(hidden_feats, num_classes)
            
        elif name_layer == "GCN":
            layer = geom_nn.GCNConv
            
            self.conv1 = layer(num_node_features, num_classes)
            #self.conv1 = layer(num_node_features, hidden_feats)
            #self.conv3 = Linear(hidden_feats, num_classes)
            
        elif name_layer == "GraphConv":
            layer = geom_nn.GraphConv
            
            self.conv1 = layer(num_node_features, num_classes)
            
        elif name_layer == "SGConv":
            layer = geom_nn.SGConv
            
            self.conv1 = layer(num_node_features, hidden_feats)
            self.conv3 = Linear(hidden_feats, num_classes)
        
        
        else:
            print("Nanais")
    
        
        #self.conv2 = layer(hidden_feats, hidden_feats)
        #self.conv3 = Linear(hidden_feats, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        #x = F.dropout(x, training=self.training)
        #x = self.conv2(x, edge_index)

        #return F.log_softmax(self.conv3(x), dim=1)
        if self.name in ['GraphConv', 'GCN']:
            return F.log_softmax(x, dim=1)
        else:
            return F.log_softmax(self.conv3(x), dim=1)
        

In [33]:
layers = ["GCN", "GAT","GraphConv", "SAGE", "SGConv"]

torch.manual_seed(seed)
Models = [GNN(num_features, num_classes, name_layer = layer_name) for 
         layer_name in layers]

In [34]:
torch.manual_seed(seed)
dt_string = datetime.now().strftime("%m-%d_%H-%M")

weights = [1, 3]
class_weights = torch.FloatTensor(weights).cpu()

for i in range(len(layers)):
    print(f"Probando {layers[i]}")
    
        

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    Models[i] = Models[i].to(device)
    
    minloss = np.inf
    bestacc = None
    bestaccnorm = None
    
    for epoch in range(100):
        EpochAcc = []
        EpochNorm = []
        EpochNaive = []
        EpochLoss = []
        EpochMDH = []
        for ig, data in enumerate(Graphs_Train):


            data = data.to(device)
            optimizer = torch.optim.Adam(Models[i].parameters(), lr=0.01, weight_decay= 0)

            y = data.y.detach().numpy().copy()

            Models[i].train()
            optimizer.zero_grad()
            out = Models[i](data)
            loss = F.nll_loss(out, data.y, weight = class_weights)
            loss.backward()
            optimizer.step()
            
            
            y_pred = torch.exp(Models[i](data)).T[1]
            y_pred_new = np.sort(torch.topk(y_pred, int(sum(y)))[1].detach().numpy())

            val = len(np.intersect1d(np.where(y == 1)[0], y_pred_new)) / len(y)
            
            y_mdh = torch.topk( torch.tensor(np.array(list(dict(nx.degree(graphs[ig])).values()))) , int(sum(y)))[1].detach().numpy()
            mdh = len(np.intersect1d(np.where(y == 1)[0], y_mdh)) / len(y)
            
            EpochAcc.append(val)
            EpochLoss.append(float(loss))
            EpochMDH.append(mdh)
            
            y_pred_ = torch.clone(y_pred)
            y_pred_[y_pred_ > 0.5] = 1
            y_pred_[y_pred_ <= 0.5] = 0
            EpochNorm.append(np.sum(y_pred_.detach().numpy() == y) / len(y))
            EpochNaive.append(np.sum(y==0) / len(y))
            
        if np.mean(EpochLoss) < minloss:
            #print(f"\nloss improved from {minloss :.3f} to {np.mean(EpochLoss):.3f} Saving...")
            torch.save(Models[i].state_dict(), 
                   f=f"{PATH_SAVE_TRAINS}{layers[i]}_seed_{seed}_thr_{int(threshold*10)}_date_{dt_string}.pt")
            minloss = np.mean(EpochLoss)
            bestacc = np.mean(EpochAcc)
            bestaccnorm = np.mean(EpochNorm)
            
            

        if epoch%20 == 0:
            
            print(f"Epoch {epoch} - Loss: {np.mean(EpochLoss):.2f} - Mean Acc {np.mean(EpochAcc):.2f} - MDH {np.mean(EpochMDH):.2f} - Acc Norm {np.mean(EpochNorm):.3f} - AccNaive {np.mean(EpochNaive):.3f}")
    print(f"\nFinal Epoch - Best Loss: {minloss:.2f} - Best Acc {bestacc:.2f} - MDH {np.mean(EpochMDH):.2f} - Best Acc Norm {bestaccnorm:.3f} - AccNaive {np.mean(EpochNaive):.3f}\n")

Probando GCN
Epoch 0 - Loss: 0.71 - Mean Acc 0.14 - MDH 0.16 - Acc Norm 0.522 - AccNaive 0.817
Epoch 20 - Loss: 0.48 - Mean Acc 0.14 - MDH 0.16 - Acc Norm 0.875 - AccNaive 0.817
Epoch 40 - Loss: 0.43 - Mean Acc 0.14 - MDH 0.16 - Acc Norm 0.860 - AccNaive 0.817
Epoch 60 - Loss: 0.41 - Mean Acc 0.14 - MDH 0.16 - Acc Norm 0.852 - AccNaive 0.817
Epoch 80 - Loss: 0.39 - Mean Acc 0.14 - MDH 0.16 - Acc Norm 0.853 - AccNaive 0.817

Final Epoch - Best Loss: 0.38 - Best Acc 0.14 - MDH 0.16 - Best Acc Norm 0.856 - AccNaive 0.817

Probando GAT
Epoch 0 - Loss: 0.67 - Mean Acc 0.05 - MDH 0.16 - Acc Norm 0.817 - AccNaive 0.817
Epoch 20 - Loss: 0.67 - Mean Acc 0.06 - MDH 0.16 - Acc Norm 0.668 - AccNaive 0.817
Epoch 40 - Loss: 0.72 - Mean Acc 0.06 - MDH 0.16 - Acc Norm 0.686 - AccNaive 0.817
Epoch 60 - Loss: 0.72 - Mean Acc 0.06 - MDH 0.16 - Acc Norm 0.705 - AccNaive 0.817
Epoch 80 - Loss: 0.67 - Mean Acc 0.10 - MDH 0.16 - Acc Norm 0.753 - AccNaive 0.817

Final Epoch - Best Loss: 0.66 - Best Acc 0.06 -

## Adding new feature

In [5]:
#!/usr/bin/env python
# coding: utf-8

# In[152]:

import numpy as np
import networkx as nx
import os
import time
import argparse


#PATH = "./BRKGA/instances/Erdos/test/txt/"
#PATH_save = "./BRKGA/instances/Erdos/test/feats/"

PATH = "./BRKGA/instances/socialnetworks/txt/"
PATH_save = "./BRKGA/instances/socialnetworks/feats/"


#PATH = './BRKGA/instances/Erdos/test/txt/'
#PATH_save = './BRKGA/instances/Erdos/test/feats/'
#python savefeats.py -p "./BRKGA/instances/Erdos/test/txt/" -ps "./BRKGA/instances/Erdos/test/feats/"
#python savefeats.py -p "./BRKGA/instances/Erdos/test/txt/" -ps "./BRKGA/instances/Erdos/test/feats/"


def getFeatures(G):
    
    #BC = np.array(list(nx.betweenness_centrality(G, k = 500).values()))
    #CC = np.array(list(nx.closeness_centrality(G).values()))
    #LC = np.array(list(nx.load_centrality(G).values()))
    #DG = np.array(list(nx.degree(G))).T[1]
    #PR = np.array(list(nx.pagerank(G).values()))
    
    EC = np.array(list(nx.eigenvector_centrality(G, max_iter = 200).values()))

    #features = [BC, PR, DG, CC]#, LC]
    features = [EC]#, LC]
    names = ["EC"]#, "LC"]
    return np.array(features).T, names


# In[227]:


def writeFeatures(PATH, ins, features, elapsed):
    subfij = '_feat'
    with open(PATH + ins.split("/")[-1].replace(".txt","") + subfij + "_EC.npy", "wb") as f:
        np.save(f, features, allow_pickle=True)
    """
    
    file2 = open(, 'w')
    c = 0
    
    file2.write(f"time: {elapsed}, n: {features.shape[0]}")
    file2.write('\n')
    
    for f in features:
        st = ",".join(str(x) for x in f)
        file2.write(st)
        file2.write('\n')
        c += 1
    file2.close()
    print(f"para {ins} se escribieron {c} lines")
    """


# In[231]:

graphs = [graph for graph in os.listdir(PATH)]
graphs.sort(reverse = False)
graphs = graphs[:2]
Graphs = []

for ins in graphs:
    file1 = open(PATH+ins, 'r')
    
    Lines = file1.readlines()
    VectorList = []
    for line in Lines:
        VectorList.append(line.replace("\n",""))

    file1.close()
    G2 = nx.parse_edgelist(VectorList, nodetype=int)

    H = nx.Graph()
    H.add_nodes_from(sorted(G2.nodes(data=True)))
    H.add_edges_from(G2.edges(data=True))
    """
    G = igraph.Graph.Read_Edgelist(PATH+ins, directed = False)
    G = G.to_networkx()
    """
    Graphs.append(H)

# In[229]:

c = 0
for G, ins in zip(Graphs, graphs):
    c+=1
    print(f"\n------------ {c} out of {len(Graphs)} ------------\n")
    print(f"\nNext graph: {ins}")
    
    s = time.time()
    features, _ = getFeatures(G)
    elapsed = time.time() - s
    print(f"\nTime elapsed: {elapsed:.3f}")
    
    writeFeatures(PATH_save, ins, features, elapsed)
    


# In[204]:



------------ 1 out of 2 ------------


Next graph: Amazon0302.txt

Time elapsed: 137.735

------------ 2 out of 2 ------------


Next graph: Amazon0312.txt

Time elapsed: 375.686


In [39]:
with open(PATH_save + "ER_50000_20_0_feat_EC.npy", "rb") as f:
        f = np.load(f, allow_pickle=True)

In [11]:
c = 0
for G, ins in zip(Graphs, graphs):
    c+=1
    print(f"\n------------ {c} out of {len(Graphs)} ------------\n")
    print(f"\nNext graph: {ins}")
    
    s = time.time()
    features, _ = getFeatures(G)
    elapsed = time.time() - s
    print(f"\nTime elapsed: {elapsed:.3f}")
    
    writeFeatures(PATH_save, ins, features, elapsed)


------------ 1 out of 25 ------------


Next graph: socfb-nips-ego.txt

Time elapsed: 0.699

------------ 2 out of 25 ------------


Next graph: socfb-Mich67.txt

Time elapsed: 1.115

------------ 3 out of 25 ------------


Next graph: socfb-Brandeis99.txt

Time elapsed: 1.630

------------ 4 out of 25 ------------


Next graph: soc-gplus.txt

Time elapsed: 3.713

------------ 5 out of 25 ------------


Next graph: musae_git.txt

Time elapsed: 5.992

------------ 6 out of 25 ------------


Next graph: loc-gowalla_edges.txt

Time elapsed: 22.054

------------ 7 out of 25 ------------


Next graph: graph_ncstrlwg2.txt

Time elapsed: 2.349

------------ 8 out of 25 ------------


Next graph: graph_karate.txt

Time elapsed: 0.015

------------ 9 out of 25 ------------


Next graph: graph_jazz.txt

Time elapsed: 0.031

------------ 10 out of 25 ------------


Next graph: graph_football.txt

Time elapsed: 0.016

------------ 11 out of 25 ------------


Next graph: graph_dolphins.txt

Time e

In [10]:
def getFeatures(G):
    
    #BC = np.array(list(nx.betweenness_centrality(G, k = 500).values()))
    #CC = np.array(list(nx.closeness_centrality(G).values()))
    #LC = np.array(list(nx.load_centrality(G).values()))
    #DG = np.array(list(nx.degree(G))).T[1]
    #PR = np.array(list(nx.pagerank(G).values()))
    
    EC = np.array(list(nx.eigenvector_centrality(G, max_iter = 200).values()))

    #features = [BC, PR, DG, CC]#, LC]
    features = [EC]#, LC]
    names = ["EC"]#, "LC"]
    return np.array(features).T, names


In [3]:
PATH = "./BRKGA/instances/socialnetworks/txt/"
graphs = [graph for graph in os.listdir(PATH)]
graphs.sort(reverse = False)
graphs = graphs[:2]

In [4]:
graphs

['Amazon0302.txt', 'Amazon0312.txt']