In [7]:
import networkx as nx
import numpy as np
import torch
from datetime import datetime
import os
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
import matplotlib.pyplot as plt

import pandas as pd
import torch_geometric.transforms as T

#from create_dataset import CreateDataset

from models import GNNModel

import sys
sys.path.append("../FastCover/")
from utils import *

In [5]:
PATH_SAVE_TRAINS_CHECKPOINTS = 'runs/checkpoints/'
PATH_SAVE_TRAINS = 'runs/'
PATH_TRAIN = '../FastCover/data/ER_graphs/train/'

num_features = 1
num_classes  = 2

threshold = 0.5

optimizer_name = "Adam"
lr = 1e-3
epochs = 20

SEED = 13

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

layers = ["GCN", "GAT","GraphConv"]

Models = [GNNModel(c_in = 1, c_hidden = 100, c_out = 2, num_layers = 2, layer_name = layer_name, dp_rate=0.1) for 
         layer_name in layers]

In [9]:
ERInstances = [graph for graph in os.listdir(PATH_TRAIN+'pkl')]
graphs = []
for er in ERInstances:
    graph = igraph.Graph().Read_Pickle(PATH_TRAIN + 'pkl/'+er)
    graphs.append(graph.to_networkx())    

OptInstances = [graph for graph in os.listdir(PATH_TRAIN+'optimal')]
Solutions = []
for er in OptInstances:
    opt = []
    with open(PATH_TRAIN+'optimal/'+er) as f:
        for line in f.readlines():
            opt.append(int(line.replace("\n", "")))
    Solutions.append(opt)

In [73]:
## Falta por incluir estax

In [74]:
from torch_geometric.nn import GCNConv, GINConv
import torch
import torch.nn.functional as F
from torch.nn import Linear, Sequential, BatchNorm1d, ReLU, Dropout
from torch_geometric.nn import GCNConv, GINConv
from torch_geometric.nn import global_mean_pool, global_add_pool

class GIN(torch.nn.Module):
    """GIN"""
    def __init__(self, dim_h):
        super(GIN, self).__init__()
        self.conv1 = GINConv(
            Sequential(Linear(1, dim_h),
                       BatchNorm1d(dim_h), ReLU(),
                       Linear(dim_h, dim_h), ReLU()))
        self.conv2 = GINConv(
            Sequential(Linear(dim_h, dim_h), BatchNorm1d(dim_h), ReLU(),
                       Linear(dim_h, dim_h), ReLU()))
        self.conv3 = GINConv(
            Sequential(Linear(dim_h, dim_h), BatchNorm1d(dim_h), ReLU(),
                       Linear(dim_h, dim_h), ReLU()))
        self.lin1 = Linear(dim_h*3, dim_h*3)
        self.lin2 = Linear(dim_h*3, dataset.num_classes)

    def forward(self, x, edge_index, batch):
        # Node embeddings 
        h1 = self.conv1(x, edge_index)
        h2 = self.conv2(h1, edge_index)
        h3 = self.conv3(h2, edge_index)

        # Graph-level readout
        h1 = global_add_pool(h1, batch)
        h2 = global_add_pool(h2, batch)
        h3 = global_add_pool(h3, batch)

        # Concatenate graph embeddings
        h = torch.cat((h1, h2, h3), dim=1)

        # Classifier
        h = self.lin1(h)
        h = h.relu()
        h = F.dropout(h, p=0.5, training=self.training)
        h = self.lin2(h)
        
        return h, F.log_softmax(h, dim=1)

In [75]:
gin = GIN(dim_h=1)
Graphs_Train[0]

Data(edge_index=[2, 9982], num_nodes=1000, x=[1000, 1], y=[1000], num_classes=2)

In [59]:
Models.append(gin)
layers.append('GIN')

## Train

In [10]:
Graphs_Train = Convert2DataSet(graphs, Solutions)

In [11]:
def train(model, optimizer, data):
        model.train()
        optimizer.zero_grad()

        F.nll_loss(model(data.x, data.edge_index), data.y).backward()
        optimizer.step()
        return model, optimizer
      
    
@torch.no_grad()
def test(data, model):
  model.eval()
  logits = model(data.x, data.edge_index)
  pred = logits.max(1)[1]
  acc = pred.eq(data.y).sum().item() / data.num_nodes
  return acc

In [12]:
torch.manual_seed(SEED)
for i in range(len(Models)):
    print()
    print(f" ----- Model:{layers[i]} -----")
    optimizer = getattr(torch.optim, optimizer_name)(Models[i].parameters(), lr = lr)

    for epoch in range(1, epochs):
        
        for data in Graphs_Train:
            train(Models[i], optimizer, data)
        
        if epoch%5 == 0:
            torch.save(Models[i].state_dict(), f=f"{PATH_SAVE_TRAINS_CHECKPOINTS}Checkpoint-model-{layers[i]}-epoch-{epoch}.pt")
            print(f"Epoch {epoch} saved for {layers[i]}.\n")
        
            Acc = []

            for data in Graphs_Train:
                Acc.append(test(data, Models[i]))
            print(f"Mean Acc: {np.mean(Acc)}")
            print()
        
    dt_string = datetime.now().strftime("%m-%d_%H-%M")
    torch.save(Models[i].state_dict(), f=f"{PATH_SAVE_TRAINS}{layers[i]}_seed_{SEED}_thr_{int(threshold*10)}_date_{dt_string}.pt")
    
    print(f"{layers[i]} saved in {PATH_SAVE_TRAINS}\n")


 ----- Model:GCN -----
Epoch 5 saved for GCN.

Mean Acc: 0.8551333333333333

Epoch 10 saved for GCN.

Mean Acc: 0.8551333333333333

Epoch 15 saved for GCN.

Mean Acc: 0.8551333333333333

GCN saved in runs/


 ----- Model:GAT -----
Epoch 5 saved for GAT.

Mean Acc: 0.8551333333333333

Epoch 10 saved for GAT.

Mean Acc: 0.8551333333333333

Epoch 15 saved for GAT.

Mean Acc: 0.8551333333333333

GAT saved in runs/


 ----- Model:GraphConv -----
Epoch 5 saved for GraphConv.

Mean Acc: 0.8551333333333333

Epoch 10 saved for GraphConv.

Mean Acc: 0.8551333333333333

Epoch 15 saved for GraphConv.

Mean Acc: 0.8551333333333333

GraphConv saved in runs/



## Test

In [131]:
import igraph
import dgl
import torch
import time
import os
import networkx as nx
from datetime import datetime
import pandas as pd
from utils import *
from GRAT import GRAT3
import warnings
warnings.filterwarnings('ignore')
import argparse

"""
parser = argparse.ArgumentParser()

parser.add_argument("-th", "--Threshold", help = "Infection Threshold", type = float)
parser.add_argument("-type", "--Type", help = "short, large or full", type = str)
args = parser.parse_args()
"""
# Example: python EvaluateFastCover.py -th 0.5 -type "short"

Graphs_short = [
 'ego-facebook.txt',
 'gemsec_facebook_artist.txt',
 'graph_actors_dat.txt',
 'graph_CA-AstroPh.txt',
 'graph_CA-CondMat.txt',
 'graph_CA-GrQc.txt',
 'graph_CA-HepPh.txt',
 'graph_CA-HepTh.txt',
 'graph_dolphins.txt',
 'graph_Email-Enron.txt',
 'graph_football.txt',
 'graph_jazz.txt',
 'graph_karate.txt',
 'graph_ncstrlwg2.txt',
 'soc-gplus.txt',
 'socfb-Brandeis99.txt',
 'socfb-Mich67.txt',
 'socfb-nips-ego.txt']

Graphs_large = ['Amazon0302.txt',
 'Amazon0312.txt',
 'Amazon0505.txt',
 'Amazon0601.txt',
 'com-youtube.ungraph.txt',
 'com-dblp.ungraph.txt',
 'loc-gowalla_edges.txt',
 'deezer_HR.txt',
 'musae_git.txt']

PATH_TO_TEST = "../BRKGA/instances/txt/"
"""
if args.Type == "short":
    Graphs = Graphs_short
elif args.Type == "large":
    Graphs = Graphs_large
elif args.Type == "full":
    Graphs = [graph for graph in os.listdir(PATH_TO_TEST)]
else:
    raise NameError("Only: 'short', 'large' or 'full")
"""
Graphs = [Graphs_short[11]]
PATH_SAVED_TRAINS = "runs/"
PATH_SAVE_RESULTS = 'results/'

NAME_SAVE_RESULTS = 'Models' #Change this

FEATURE_TYPE = "1"
HIDDEN_FEATS = [32]*6
input_dim = 32
use_cuda = False
directed_test = False

#threshold = args.Threshold
threshold = 0.5
dt_string = datetime.now().strftime("%m-%d_%H-%M")

RUNS_LIST = [run for run in os.listdir(PATH_SAVED_TRAINS) if ".pt" in run]

SEEDS = []
MODELS = []
for run_name in RUNS_LIST:
    SEEDS.append(run_name.split("_")[2])
    MODELS.append(run_name.split("_")[0])


In [132]:
MODELS

['GAT', 'GCN', 'GraphConv']

In [133]:
records = []

Total = len(Graphs)
    
for run_name, model, seed in zip(RUNS_LIST, MODELS, SEEDS):
    print()
    print(f"Evaluation of model: {model}, seed: {seed} in {run_name}")
    print()
    
    net = GNNModel(c_in = 1, c_hidden = 100, c_out = 2, num_layers = 2, layer_name = model, dp_rate=0.1)
    net.load_state_dict(torch.load(PATH_SAVE_TRAINS+run_name))
    
    if use_cuda:
        net.cuda()

    c = 1
    for file in Graphs:
            print(f"Loading {PATH_TO_TEST+file} ...")
            name = file.split(".")[0].replace("graph_", "")

            graph = igraph.Graph().Read_Edgelist(PATH_TO_TEST + file)
            data = Convert2DataSet([graph.to_networkx()], [[]])[0]

            #dglgraph = get_rev_dgl(graph, FEATURE_TYPE, input_dim, directed_test, use_cuda)
            
            print("\nStarting infection\n")

            start_time = time.time()

            out = net(data.x, data.edge_index).max(1)[0]

            G = graph.to_networkx().to_undirected()

            n = len(G.nodes())

            _ , minTargetGRAT = FindMinimumTarget(G, out, threshold)

            final_time = (time.time() - start_time)
            print()
            print(f"{c}/{Total} Graph: {name}")
            print(f"Best Target Set length: {minTargetGRAT} out of {n}")
            print(f"Ratio Solution / Graph lentgh: {minTargetGRAT/n:.3f}")
            print(f"Time: {final_time:.2f}s")
            print()
            records.append({
            "graph": name,
            "model": model,
            "seed": seed,
            "threshold": threshold,
            "n_covered": minTargetGRAT,
            "n": n,
            "coverage": minTargetGRAT/n,
            "t_mean": final_time
            })

            pd.DataFrame(records).to_csv(PATH_SAVE_RESULTS + NAME_SAVE_RESULTS +"_" + dt_string + ".csv")

            c+=1
print(f"Evaluation has finnished successfully. \nData saved in {PATH_SAVE_RESULTS}")



Evaluation of model: GAT, seed: 13 in GAT_seed_13_thr_5_date_10-29_21-22.pt

Loading ../BRKGA/instances/txt/graph_jazz.txt ...

Starting infection

0.01 Infected
0.01 Infected
0.02 Infected
0.03 Infected
0.03 Infected
0.04 Infected
1/1 Graph: jazz
Best Target Set length: 54 out of 198
Ratio Solution / Graph lentgh: 0.273
Time: 3.67s


Evaluation of model: GCN, seed: 13 in GCN_seed_13_thr_5_date_10-29_21-22.pt

Loading ../BRKGA/instances/txt/graph_jazz.txt ...

Starting infection

0.01 Infected
0.02 Infected
0.02 Infected
0.03 Infected
0.03 Infected
0.04 Infected
1/1 Graph: jazz
Best Target Set length: 76 out of 198
Ratio Solution / Graph lentgh: 0.384
Time: 7.67s


Evaluation of model: GraphConv, seed: 13 in GraphConv_seed_13_thr_5_date_10-29_21-22.pt

Loading ../BRKGA/instances/txt/graph_jazz.txt ...

Starting infection

0.01 Infected
0.01 Infected
0.02 Infected
0.02 Infected
0.03 Infected
1/1 Graph: jazz
Best Target Set length: 59 out of 198
Ratio Solution / Graph lentgh: 0.298
Time

In [None]:
Models = [GNNModel(c_in = 1, c_hidden = 100, c_out = 2, num_layers = 2, layer_name = layer_name, dp_rate=0.1) for 
         layer_name in layers]

In [88]:
PATH_SAVE_TRAINS+run_name

'runs/GraphConv_seed_13_thr_5_date_10-29_21-22.pt'

In [94]:
#Convert2DataSet(Graphs, Optimals)
D = Convert2DataSet([graph.to_networkx()], [[]])[0]

In [115]:
#graph = igraph.Graph().Read_Edgelist(PATH_TO_TEST + file)
data = Convert2DataSet([graph.to_networkx()], [[]])[0]

#dglgraph = get_rev_dgl(graph, FEATURE_TYPE, input_dim, directed_test, use_cuda)

print("\nStarting infection\n")

start_time = time.time()

out = net(data.x, data.edge_index).max(1)[0]

G = graph.to_networkx().to_undirected()

n = len(G.nodes())

_ , minTargetGRAT = FindMinimumTarget(G, out, threshold)


Starting infection

0.00 Infected
0.00 Infected
0.00 Infected
0.00 Infected
0.01 Infected
0.01 Infected


In [118]:
Graphs

'ego-facebook.txt'