In [4]:
import os
import pathlib
import torch
from torch_geometric.data import InMemoryDataset, download_url
import networkx as nx
from torch_geometric.data import Data
from torch_geometric.utils import from_networkx
import numpy as np
from torch.autograd import Variable
from sklearn.model_selection import train_test_split

In [5]:
crisi = ('2014_1', '2014_2', '2014_3','2016_11', '2016_12', '2017_1', '2019_8', '2019_9', '2019_10', '2021_1', '2021_2', '2021_3')

In [6]:
leg = ['xvii', 'xviii']
filenames = []

for l in leg:
    dataset_folder = pathlib.Path(os.path.dirname(os.getcwd())+'/'+l+'-months')
    file_list = list(dataset_folder.iterdir())
    for i, path in enumerate(file_list):
        if path.suffix == ".txt":
            filenames.append(file_list[i])


In [7]:
import torch
from torch_geometric.data import Dataset, Data
from torch_geometric.utils import from_networkx
import networkx as nx

class MyOwnDataset(InMemoryDataset):
    def __init__(self, root, transform=None, pre_transform=None):
        super().__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])

    @property
    def raw_file_names(self):
        # Return the list of raw edge list files in the directory
        return filenames

    @property
    def processed_file_names(self):
        # Define the processed file name
        return ['data.pt']

    def download(self):
        # Download logic if required
        pass

    def process(self):
        data_list = []
        
        for raw_path in self.raw_file_names:
            # Read the edge list from the file and create a networkx graph
            G = nx.read_edgelist(raw_path)

            if str(raw_path)[-11:-4].replace('-','') in crisi:
                label = 1
            else:
                label = 0

            # Convert the networkx graph to a PyG Data object
            pyg_graph = from_networkx(G)

            # Create a Data object and set node features and label
            x = self.extract_node_features(G)  # Set node features if available
            #x = None
            data = Data(x=x, edge_index=pyg_graph.edge_index, y=label)
            data_list.append(data)

        # Save the processed data list
        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])

    def read_edge_list(self, path):
        # Read the edge list from the file and return as a list of tuples
        edge_list = []
        with open(path, 'r') as file:
            for line in file:
                node1, node2 = line.strip().split()
                edge_list.append((int(node1), int(node2)))
        return edge_list


    def extract_node_features(self, graph):
        # Extract node features from the graph, including degree as a feature
        features = []

        cc = nx.closeness_centrality(graph)
        pr = nx.pagerank(graph)
        bt = nx.betweenness_centrality(graph)
        ei = nx.eigenvector_centrality(graph)

        for node in graph.nodes:
            degree = graph.degree[node]
            # Add the degree as a feature to the feature list
            features.append([degree, bt[node], cc[node], pr[node], ei[node]])

        return torch.tensor(features, dtype=torch.float)


In [8]:
dataset = MyOwnDataset(root=os.getcwd())

Processing...
Done!


In [9]:
val_split = 0.3
train_idx, test_idx = train_test_split(list(range(len(dataset))), test_size = val_split, stratify= dataset.y, random_state= 1)
train_idx, val_idx  = train_test_split(list(range(len(dataset[train_idx]))), test_size=0.2, random_state=1)

In [10]:
train_dataset = dataset[train_idx]
test_dataset = dataset[test_idx]
val_dataset = dataset[val_idx]

In [11]:
from torch_geometric.loader import DataLoader

train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=20, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=20, shuffle=False)


In [12]:
from torch.nn import Linear, Softmax, Sigmoid
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, 2)
        self.sig = Sigmoid()
        self.soft = Softmax(1)

        

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        x = self.sig(x)
        x = F.softmax(x, dim = 1)
        
        return torch.argmax(x, dim = 1, keepdim= True)

model = GCN(hidden_channels=64)
print(model)

GCN(
  (conv1): GCNConv(5, 64)
  (conv2): GCNConv(64, 64)
  (conv3): GCNConv(64, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
  (sig): Sigmoid()
  (soft): Softmax(dim=1)
)


In [14]:
!pip install tensorflow

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting tensorflow
  Downloading tensorflow-2.13.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (524.1 MB)
[K     |████████████████████████████████| 524.1 MB 1.1 MB/s eta 0:00:0132
Collecting astunparse>=1.6.0
  Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting tensorflow-estimator<2.14,>=2.13.0
  Downloading tensorflow_estimator-2.13.0-py2.py3-none-any.whl (440 kB)
[K     |████████████████████████████████| 440 kB 14.1 MB/s eta 0:00:01
Collecting opt-einsum>=2.3.2
  Downloading opt_einsum-3.3.0-py3-none-any.whl (65 kB)
[K     |████████████████████████████████| 65 kB 27.0 MB/s eta 0:00:01
Collecting flatbuffers>=23.1.21
  Downloading flatbuffers-23.5.26-py2.py3-none-any.whl (26 kB)
Collecting libclang>=13.0.0
  Downloading libclang-16.0.0-py2.py3-none-manylinux2010_x86_64.whl (22.9 MB)
[K     |████████████████████████████████| 22.9 MB 33.1 MB/s eta 0:00:01
Collecting grpcio<2.0

In [15]:
import tensorflow as tf

2023-07-08 10:11:22.007414: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [16]:
best_loss = np.inf
c = 0
model = GCN(hidden_channels=64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.BCELoss()

PATH = 'saved_model.pt'

def train(best_loss, c):

    model.train()

    for data in train_loader:
        out = model(data.x, data.edge_index, data.batch)
        loss_train = criterion(out.float(), data.y.view(-1,1).float())        
        loss = Variable(loss_train, requires_grad = True)
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.
    


def test(loader):
     model.eval()

     correct = 0
     for data in loader:  # Iterate in batches over the training/test dataset.
         out = model(data.x, data.edge_index, data.batch)  
         #pred = out.argmax(dim=1)  # Use the class with highest probability.
         correct += int((out.T == data.y).sum())  # Check against ground-truth labels.
     return correct / len(loader.dataset)  # Derive ratio of correct predictions.


for epoch in range(100):

    train(best_loss, c)

    #extract unique batch for validation, model needs batch
    for val in val_loader:
        out_val = model(val.x, val_dataset.edge_index, val.batch)
        loss_val = criterion(out_val.float(), val_dataset.y.view(-1,1).float())
    #compare with the validation set
    if loss_val.item() < best_loss:
        best_loss = loss_val.item()
        torch.save(model.state_dict(), PATH)
        c = 0
    else:
        c+=1
    
    if c >= 15:
        break


In [17]:
# GET BEST MODEL
model.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [18]:
test(test_loader)

0.25

In [19]:
model.eval()
for data in test_loader:  # Iterate in batches over the training/test dataset.
    out = model(data.x, data.edge_index, data.batch)  
    pred = out.argmax(dim=1)  # Use the class with highest probability.
    print(pred)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])


In [20]:
print(test_dataset.y)

tensor([0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0])
