In [36]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

In [37]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [38]:
class NeuralNetwork(nn.Module):
    def __init__(self, features):
        super().__init__()
        self.layer1 = nn.Linear(features, 32)
        self.fun1 = nn.ReLU()
        self.layer2 = nn.Linear(32, 32)
        self.fun2 = nn.ReLU()
        self.layer3 = nn.Linear(32, 1)
        self.fun3 = nn.Sigmoid()

    def forward(self, x):
        return self.fun3(self.layer3(self.fun2(self.layer2(self.fun1(self.layer1(x))))))

In [39]:
def prep_data(path, percentage_in_training, to_drop):
    to_normalize = ['neighbours_1', 'neighbours_2', 'common_neigbhours', 'total_neigbhours',
                     'prefferential_attachment', 'friends_measure', 'shortest_path']

    dataset = pd.read_csv(path)
    features = dataset.drop('link_exists', axis='columns')
    label = dataset['link_exists']
    for col in to_normalize:
        features[col] = (features[col] - features[col].min()) / (features[col].max() - features[col].min())
    
    features.drop(to_drop, axis=1, inplace=True)

    x_train, x_test, y_train, y_test = train_test_split(features, label, test_size=1-percentage_in_training)
    x_train = torch.tensor(x_train.to_numpy(), dtype=torch.float32)
    y_train = torch.tensor(y_train.to_numpy(), dtype=torch.float32)
    x_test = torch.tensor(x_test.to_numpy(), dtype=torch.float32)
    y_test = torch.tensor(y_test.to_numpy(), dtype=torch.float32)

    return x_train, y_train, x_test, y_test

In [40]:
def accuracy(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = correct / len(y_true)
    return acc

In [41]:
torch.manual_seed(42)

def model_train(model, x_train, y_train, x_test, y_test):
    x_train, y_train = x_train.to(device), y_train.to(device)
    x_test, y_test = x_test.to(device), y_test.to(device)

    loss_fn = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for i in range(50):
        model.train()

        y_logits = model(x_train).squeeze()
        y_pred = torch.round(y_logits)

        loss = loss_fn(y_logits, y_train)
        accuracy_train = accuracy(y_train, y_pred)

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        model.eval()
        with torch.inference_mode():
            y_logits = model(x_test).squeeze()
            y_pred = torch.round(y_logits)

            test_loss = loss_fn(y_logits, y_test)
            test_accuracy = accuracy(y_test, y_pred)

    return accuracy_train, test_accuracy

In [42]:
datasets = ['data/GenRel.csv', 'data/CondMat.csv', 'data/ErdosRenyi.csv', 'data/BarabasiAlbert.csv']
percentages = [0.1, 0.3, 0.5, 0.7, 0.9]
model_performance_data = pd.DataFrame(columns=['Dataset' ,'Model', 'Percentage of data in training', 'Train Accuracy','Test Accuracy'])
to_drop = ['node_1', 'node_2'] # 12 features
# to_drop = ['node_1', 'node_2', 'shortest_path'] # 11 features
# to_drop = ['node_1', 'node_2', 'shortest_path', 'density_ego_with_node_1', 'density_ego_with_node_2',
                # 'density_ego_without_node_1', 'density_ego_without_node_2'] # 7 features
# to_drop = ['node_1', 'node_2', 'density_ego_with_node_1', 'density_ego_with_node_2', 
                # 'density_ego_without_node_1', 'density_ego_without_node_2'] # 8 features

for dataset in datasets:
    for percentage in percentages:
    
        model = NeuralNetwork(12).to(device)
        tmp = dataset.split('/')[1].split('.')[0]
        x_train, y_train, x_test, y_test = prep_data(dataset, percentage, to_drop)
        train_acc, test_acc = model_train(model, x_train, y_train, x_test, y_test)
        model_performance_data.loc[len(model_performance_data)] = [tmp, 'Neural Network', percentage, train_acc, test_acc]
        save_path = f'neuralnetworks/{tmp}/{tmp}_{percentage}.pt'
        torch.save(model.state_dict(), save_path)

model_performance_data.to_csv('performance/neural_network_performance_all.csv', index=False)