In [1]:
import sys
sys.path.append("/home/gui/Documents/Experimentos_Pesquisa/experimentos_pesquisa")

import warnings
warnings.filterwarnings("ignore")

from Auxiliares.requirements import *
from Auxiliares.auxiliar_functions import *
from Auxiliares.NN_models import *

from Algoritmos.LP_PUL import LP_PUL
from Algoritmos.AE_PUL import autoencoder_PUL_model
from Algoritmos.CCRNE import CCRNE
from Algoritmos.MCLS import MCLS
from Algoritmos.PU_LP import PU_LP
from Algoritmos.RCSVM_RN import RCSVM_RN
from Algoritmos.GAE_PUL import graphautoencoder_PUL_model

# Criando o dataset sintético

In [2]:
from sklearn.datasets import make_gaussian_quantiles

In [3]:
# Criando o grafo
G = nx.LFR_benchmark_graph(n=1000, tau1=2, tau2=1.1, mu=0.5, min_degree=4, max_degree=12, min_community=200, max_community=200)
communities = {frozenset(G.nodes[v]["community"]) for v in G}

In [4]:
_X, _y = make_gaussian_quantiles(n_samples = 1000, n_features=32, n_classes=5)
_y = torch.tensor(_y)
_X = 0.5 * torch.tensor(_X)
noise = torch.rand_like(_X)
_X += noise

In [5]:
# Inicializando um dicionário para armazenar as listas de nós por comunidade
community_lists = {frozenset(c): [] for c in communities}

# Preenchendo as listas de nós por comunidade
for node in G.nodes():
    node_comm = frozenset(G.nodes[node]["community"])
    community_lists[node_comm].append(node)

# Convertendo o dicionário de listas em uma lista de listas
list_of_lists = list(community_lists.values())

# Imprimindo as listas de nós por comunidade
# for idx, community in enumerate(list_of_lists):
#     print(f"Comunidade {idx + 1}: {community}")
#     print(len(community))

In [6]:
X = torch.zeros_like(_X)
y = torch.zeros_like(_y)
for i in range(5):
    indices = torch.nonzero(_y == i).squeeze().tolist()
    for j in range(200):
        y[list_of_lists[i][j]] = _y[indices[j]]
        X[list_of_lists[i][j]] = _X[indices[j]]

In [7]:
positive_label = [3]
y = [1 if y[x] in positive_label else 0 for x in range(1000)]

all_positives = [index for index in range(len(y)) if y[index] == 1]
all_negatives = [index for index in range(len(y)) if y[index] == 0]

## Testando o parâmetro num_neg

In [None]:
results_AE = list()
results_GAE1 = list()
results_GAE2 = list()
results_GAE3 = list()
results_GAE4 = list()

num_neg_list = list(range(1, 250, 1))

for i in range(5):
    positives = random.sample(all_positives, int(0.2 * len(all_positives)))
    unlabeled = list(set(range(len(G.nodes()))) - set(positives))

    model_AE = AutoEncoder(input_size=X.shape[1], hidden_size1=16, hidden_size2=4)
    model_GAE1 = GraphAutoEncoder(input_dim=X.shape[1], hidden_dim1=16, hidden_dim2=4)
    model_GAE2 = GraphAutoEncoder(input_dim=X.shape[1], hidden_dim1=16, hidden_dim2=4)
    model_GAE3 = GraphAutoEncoder(input_dim=X.shape[1], hidden_dim1=16, hidden_dim2=4)
    model_GAE4 = GraphAutoEncoder(input_dim=X.shape[1], hidden_dim1=16, hidden_dim2=4)

    optimizer_AE = optim.Adam(model_AE.parameters(), lr=0.001)
    optimizer_GAE1 = optim.Adam(model_GAE1.parameters(), lr=0.001)
    optimizer_GAE2 = optim.Adam(model_GAE2.parameters(), lr=0.001)
    optimizer_GAE3 = optim.Adam(model_GAE3.parameters(), lr=0.001)
    optimizer_GAE4 = optim.Adam(model_GAE4.parameters(), lr=0.001)

    edge_index = torch.tensor(list(G.edges)).t().contiguous() # GAE_PUL1

    G1 = connect_nearest_nodes(G, positives, 5) # GAE_PUL3
    edge_index1 = torch.tensor(list(G1.edges)).t().contiguous() # GAE_PUL3

    edge_weight1 = strong_connect_positives(positives, edge_index1, 3) # GAE_PUL4
    edge_weight2 = dijkstra_n_weight(G, edge_index, positives, 5, 3) # GAE_PUL2

    algorithms = {
            'AE_PUL' : autoencoder_PUL_model(model = model_AE, optimizer = optimizer_AE, epochs = 5, data = X, positives = positives, unlabeled = unlabeled),
            'GAE_PUL1' : graphautoencoder_PUL_model(model = model_GAE1, optimizer = optimizer_GAE1, epochs = 5, data = X, positives = positives, unlabeled = unlabeled, edge_index = edge_index, edge_weight = None),
            'GAE_PUL2' : graphautoencoder_PUL_model(model = model_GAE2, optimizer = optimizer_GAE2, epochs = 5, data = X, positives = positives, unlabeled = unlabeled, edge_index = edge_index, edge_weight = edge_weight2),
            'GAE_PUL3' : graphautoencoder_PUL_model(model = model_GAE2, optimizer = optimizer_GAE2, epochs = 5, data = X, positives = positives, unlabeled = unlabeled, edge_index = edge_index1, edge_weight = None),
            'GAE_PUL4' : graphautoencoder_PUL_model(model = model_GAE3, optimizer = optimizer_GAE3, epochs = 5, data = X, positives = positives, unlabeled = unlabeled, edge_index = edge_index1, edge_weight = edge_weight1),
    }



    for algorithm in algorithms:
        algorithms[algorithm].train()
        for num_neg in num_neg_list:
            RN = algorithms[algorithm].negative_inference(num_neg)
            acc = round(compute_accuracy(y, RN), 4)
            f1 = round(compute_f1_score(y, RN), 4)
            if algorithm == 'AE_PUL':
                results_AE.append((num_neg, acc, f1))
            elif algorithm == 'GAE_PUL1':
                results_GAE1.append((num_neg, acc, f1))
            elif algorithm == 'GAE_PUL2':
                results_GAE2.append((num_neg, acc, f1))
            elif algorithm == 'GAE_PUL3':
                results_GAE3.append((num_neg, acc, f1))
            elif algorithm == 'GAE_PUL4':
                results_GAE4.append((num_neg, acc, f1))

df_num_neg = pd.DataFrame()
df_num_neg['num_neg'] = [x[0] for x in results_AE]
df_num_neg['acc_AE'] = [x[1] for x in results_AE]
df_num_neg['f1_AE'] = [x[2] for x in results_AE]
df_num_neg['acc_GAE1'] = [x[1] for x in results_GAE1]
df_num_neg['f1_GAE1'] = [x[2] for x in results_GAE1]
df_num_neg['acc_GAE2'] = [x[1] for x in results_GAE2]
df_num_neg['f1_GAE2'] = [x[2] for x in results_GAE2]
df_num_neg['acc_GAE3'] = [x[1] for x in results_GAE3]
df_num_neg['f1_GAE3'] = [x[2] for x in results_GAE3]
df_num_neg['acc_GAE4'] = [x[1] for x in results_GAE4]
df_num_neg['f1_GAE4'] = [x[2] for x in results_GAE4]

df_num_neg.to_csv('resultados_experimento_num_neg.csv', index = False)


## Testando o parâmetro epoch

In [9]:
epochs = list(range(1,40,3))
results_AE = list()
results_GAE1 = list()
results_GAE2 = list()
results_GAE3 = list()
results_GAE4 = list()
for i in range(5):
    for epoch in epochs:
        positives = random.sample(all_positives, int(0.1 * len(all_positives)))
        unlabeled = list(set(range(len(G.nodes()))) - set(positives))


        model_AE = AutoEncoder(input_size=X.shape[1], hidden_size1=16, hidden_size2=4)
        model_GAE1 = GraphAutoEncoder(input_dim=X.shape[1], hidden_dim1=16, hidden_dim2=4)
        model_GAE2 = GraphAutoEncoder(input_dim=X.shape[1], hidden_dim1=16, hidden_dim2=4)
        model_GAE3 = GraphAutoEncoder(input_dim=X.shape[1], hidden_dim1=16, hidden_dim2=4)
        model_GAE4 = GraphAutoEncoder(input_dim=X.shape[1], hidden_dim1=16, hidden_dim2=4)

        optimizer_AE = optim.Adam(model_AE.parameters(), lr=0.001)
        optimizer_GAE1 = optim.Adam(model_GAE1.parameters(), lr=0.001)
        optimizer_GAE2 = optim.Adam(model_GAE2.parameters(), lr=0.001)
        optimizer_GAE3 = optim.Adam(model_GAE3.parameters(), lr=0.001)
        optimizer_GAE4 = optim.Adam(model_GAE4.parameters(), lr=0.001)

        edge_index = torch.tensor(list(G.edges)).t().contiguous() # GAE_PUL1

        G1 = connect_nearest_nodes(G, positives, 5) # GAE_PUL3
        edge_index1 = torch.tensor(list(G1.edges)).t().contiguous() # GAE_PUL3

        edge_weight1 = strong_connect_positives(positives, edge_index1, 3) # GAE_PUL4
        edge_weight2 = dijkstra_n_weight(G, edge_index, positives, 5, 3) # GAE_PUL2

        algorithms = {
                'AE_PUL' : autoencoder_PUL_model(model = model_AE, optimizer = optimizer_AE, epochs = 5, data = X, positives = positives, unlabeled = unlabeled),
                'GAE_PUL1' : graphautoencoder_PUL_model(model = model_GAE1, optimizer = optimizer_GAE1, epochs = epoch, data = X, positives = positives, unlabeled = unlabeled, edge_index = edge_index, edge_weight = None),
                'GAE_PUL2' : graphautoencoder_PUL_model(model = model_GAE2, optimizer = optimizer_GAE2, epochs = epoch, data = X, positives = positives, unlabeled = unlabeled, edge_index = edge_index, edge_weight = edge_weight2),
                'GAE_PUL3' : graphautoencoder_PUL_model(model = model_GAE2, optimizer = optimizer_GAE2, epochs = epoch, data = X, positives = positives, unlabeled = unlabeled, edge_index = edge_index1, edge_weight = None),
                'GAE_PUL4' : graphautoencoder_PUL_model(model = model_GAE3, optimizer = optimizer_GAE3, epochs = epoch, data = X, positives = positives, unlabeled = unlabeled, edge_index = edge_index1, edge_weight = edge_weight1),
        }



        for algorithm in algorithms:
            algorithms[algorithm].train()
            RN = algorithms[algorithm].negative_inference(100)
            acc = round(compute_accuracy(y, RN), 4)
            f1 = round(compute_f1_score(y, RN), 4)
            if algorithm == 'AE_PUL':
                results_AE.append((epoch, acc, f1))
            elif algorithm == 'GAE_PUL1':
                results_GAE1.append((epoch, acc, f1))
            elif algorithm == 'GAE_PUL2':
                results_GAE2.append((epoch, acc, f1))
            elif algorithm == 'GAE_PUL3':
                results_GAE3.append((epoch, acc, f1))
            elif algorithm == 'GAE_PUL4':
                results_GAE4.append((epoch, acc, f1))

df_epoch = pd.DataFrame()
df_epoch['epoch'] = [x[0] for x in results_AE]
df_epoch['acc_AE'] = [x[1] for x in results_AE]
df_epoch['f1_AE'] = [x[2] for x in results_AE]
df_epoch['acc_GAE1'] = [x[1] for x in results_GAE1]
df_epoch['f1_GAE1'] = [x[2] for x in results_GAE1]
df_epoch['acc_GAE2'] = [x[1] for x in results_GAE2]
df_epoch['f1_GAE2'] = [x[2] for x in results_GAE2]
df_epoch['acc_GAE3'] = [x[1] for x in results_GAE3]
df_epoch['f1_GAE3'] = [x[2] for x in results_GAE3]
df_epoch['acc_GAE4'] = [x[1] for x in results_GAE4]
df_epoch['f1_GAE4'] = [x[2] for x in results_GAE4]

df_epoch.to_csv('resultados_experimento_epoch.csv', index = False)


## Testando os Hiperparâmetros da regularização

Aqui serão testados os parâmetros de:
* quantidade de vizinhos para a ligação direta alterando a topologia do grafo - GAE_PUL3
* quantidade de vizinhos para computar o shortest path, aumentando as arestas definidas de interesse - GAE_PUL2
* valor real do aumento das arestas para a mudança de topologia - GAE_PUL4

In [1]:
# GAE_PUL2

results_GAE2 = list()

num_neg = 100

for i in range(5):
    model_GAE2 = GraphAutoEncoder(input_dim=X.shape[1], hidden_dim1=16, hidden_dim2=4)
    optimizer_GAE2 = optim.Adam(model_GAE2.parameters(), lr=0.001)

SyntaxError: incomplete input (390095165.py, line 7)