<a href="https://colab.research.google.com/github/LeoVogiatzis/GNN_based_NILM/blob/main/notebooks/Nilm_graph.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# !pip install pandas
# !pip install networkx
# !pip install torch-scatter -f https://data.pyg.org/whl/torch-1.9.0+cu111.html
# !pip install torch-sparse -f https://data.pyg.org/whl/torch-1.9.0+cu111.html
# !pip install torch-geometric
# !pip install torch
# Install required packages.
!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu113.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-1.10.0+cu113.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git


[K     |████████████████████████████████| 7.9 MB 6.9 MB/s 
[K     |████████████████████████████████| 3.5 MB 5.4 MB/s 
[K     |████████████████████████████████| 407 kB 5.1 MB/s 
[K     |████████████████████████████████| 45 kB 3.0 MB/s 
[?25h  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone


In [None]:
# !pip install torch-scatter     -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
# !pip install torch-sparse      -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
# !pip install torch-cluster     -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
# !pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
# !pip install torch-geometric
import networkx as nx
import pandas as pd
import torch
import torch_geometric
from torch_geometric.data import Dataset, Data
import numpy as np
import os
from tqdm import tqdm

In [None]:
print(f"Torch version: {torch.__version__}")
print(f"Cuda available: {torch.cuda.is_available()}")
print(f"Torch geometric version: {torch_geometric.__version__}")


Torch version: 1.10.0+cu111
Cuda available: True
Torch geometric version: 2.0.2


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch_geometric.transforms import RandomLinkSplit, RandomNodeSplit


In [None]:
class NilmDataset(Dataset):
    def __init__(self, root, filename, test=False, transform=None, pre_transform=None):
        """2
        root = Where the dataset should be stored. This folder is split 
        into raw_dir (downloaded dataset) and processed_dir (processed data).
        """
        self.test = test
        self.filename = filename
        super(NilmDataset, self).__init__(root, transform, pre_transform)

    @property
    def raw_file_names(self):
        """ If this file exists in raw_dir, the download is not triggered.
            (The download func. is not implemented here)
        """
        return self.filename

    @property
    def processed_file_names(self):
        """ If these files are found in raw_dir, processing is skipped"""
        self.data = pd.read_csv(self.raw_paths[0]).reset_index()

        if self.test:
            return [f'data_test_{i}.pt' for i in list(self.data.index)]
        else:
            return [f'data_{i}.pt' for i in list(self.data.index)]

    def download(self):
        pass

    def process(self):
        self.G = nx.read_graphml(self.raw_paths[0])
        print(len(self.G.nodes), len(self.G.edges))
        # TODO: read graphs below
            # Get node features
        node_feats = self._get_node_features(self.G)
            # Get edge features
        edge_feats = self._get_edge_features(self.G)
            # Get adjacency info
        edge_index = self._get_adjacency_info(self.G)
            # Get labels info
        labels = self._get_labels(nx.get_node_attributes(self.G, 'state'))  # pass label here. E.g. if it is a column for this graph it could be graph_csv['label']
        
            # Create data object
        self.data = Data(x=node_feats, edge_index=edge_index, y=labels)
        # self.data = Data(x=node_feats, edge_index=edge_index, edge_attr=edge_feats, y=labels)

        # self.data.num_classes = 2

        if self.test:
          torch.save(self.data, os.path.join(self.processed_dir, 'data_test_0.pt'))
        else:
          torch.save(self.data, os.path.join(self.processed_dir, 'data_0.pt'))

    def _get_node_features(self, graph):
        """
        This will return a matrix / 2d array of the shape
        [Number of Nodes, Node Feature size]

        We could also use torch_geometric.from_networkx to create a Data object
        with both adjacency and features, but instead we do it manually here
        """
        all_node_feats = list(nx.get_node_attributes(graph, 'drift').values())

        all_node_feats = np.asarray(all_node_feats)
        all_node_feats = all_node_feats.reshape((-1, 1))
        return torch.tensor(all_node_feats, dtype=torch.float)

    def _get_edge_features(self, graph):
      """
        This will return a matirx with the gaussian filter kernel of all 
        edges
      """

      all_edge_feats = []
      for e in graph.edges(data=True):
        all_edge_feats += [[e[2]['gaussian_kernel']], [e[2]['gaussian_kernel']]]

      return torch.tensor(all_edge_feats, dtype=torch.float)

    def _get_adjacency_info(self, graph):
        """
        We could also use torch_geometric.from_networkx to create a Data object
        with both adjacency and features, but instead we do it manually here
        """
        nodes = {n: i for i, n in enumerate(graph.nodes())}
        
        edge_indices = []
        for edge in graph.edges:
            i = nodes[edge[0]]  # get source
            j = nodes[edge[1]]  # get destination
            edge_indices += [[i, j], [j, i]]  # undirected graph

        edge_indices = torch.tensor(edge_indices)
        edge_indices = edge_indices.t().to(torch.long).view(2, -1)
        return edge_indices

    def _get_labels(self, labels):
        labels = list(labels.values())
        labels = np.asarray(labels)
        return torch.tensor(labels, dtype=torch.int64)

    def len(self):
        return self.data.shape[0]

    def get(self, idx):
        """ - Equivalent to __getitem__ in pytorch
            - Is not needed for PyG's InMemoryDataset
        """
        if self.test:
            data = torch.load(os.path.join(self.processed_dir, f'data_test_{idx}.pt'))
        else:
            data = torch.load(os.path.join(self.processed_dir, f'data_{idx}.pt'))
        return data

In [None]:
data = NilmDataset(root='data', filename='dishwaser_20.graphml')
print(data.data.y)
# from torch_geometric.datasets import Planetoid
# from torch_geometric.transforms import NormalizeFeatures
# dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())
# data = dataset[0]
# print(data.y)

Processing...


410 41820
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0

Done!


In [None]:
# transform = RandomNodeSplit()
# dataset = transform(data.data)
# print(dataset)

transform = RandomLinkSplit(is_undirected=True)
train_data, val_data, test_data = transform(data.data)
# train_data, val_data, test_data = transform(data)
print(train_data, val_data, test_data)

Data(x=[410, 1], edge_index=[2, 58548], y=[410], edge_label=[58548], edge_label_index=[2, 58548]) Data(x=[410, 1], edge_index=[2, 58548], y=[410], edge_label=[8364], edge_label_index=[2, 8364]) Data(x=[410, 1], edge_index=[2, 66912], y=[410], edge_label=[16728], edge_label_index=[2, 16728])


In [None]:
import torch
from torch.nn import Linear
from torch_geometric.nn import GCNConv
import torch.nn.functional as F

class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        torch.manual_seed(42)

        # Initialize the layers
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        # First Message Passing Layer (Transformation)
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)

        # Second Message Passing Layer
        x = self.conv2(x, edge_index)

        return x


In [None]:
from IPython.display import Javascript  # Restrict height of output cell.
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))

# model = GCN(in_channels = dataset.x.shape[1], hidden_channels=dataset.x.shape[1])
model = GCN(in_channels = train_data.x.shape[1], hidden_channels=train_data.x.shape[1], out_channels=len(np.unique(train_data.y)))
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

def train():
      model.train()
      optimizer.zero_grad()  # Clear gradients.
      # out = model(dataset.x, dataset.edge_index)  # Perform a single forward pass.
      out = model(train_data.x, train_data.edge_index)

      # loss = criterion(out[dataset.train_mask], dataset.y[dataset.train_mask])  # Compute the loss solely based on the training nodes.
      loss = criterion(out, train_data.y)
      loss.backward()  # Derive gradients.
      optimizer.step()  # Update parameters based on gradients.
      return loss

def test():
      model.eval()
      out = model(dataset.x, dataset.edge_index)
      pred = out.argmax(dim=1)  # Use the class with highest probability.
      test_correct = pred[dataset.test_mask] == dataset.y[dataset.test_mask]  # Check against ground-truth labels.
      test_acc = int(test_correct.sum()) / int(dataset.test_mask.sum())  # Derive ratio of correct predictions.
      return test_acc


for epoch in range(1, 101):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

<IPython.core.display.Javascript object>

Epoch: 001, Loss: 0.5026
Epoch: 002, Loss: 0.5288
Epoch: 003, Loss: 0.4858
Epoch: 004, Loss: 0.4932
Epoch: 005, Loss: 0.4908
Epoch: 006, Loss: 0.4862
Epoch: 007, Loss: 0.4765
Epoch: 008, Loss: 0.4766
Epoch: 009, Loss: 0.4557
Epoch: 010, Loss: 0.4705
Epoch: 011, Loss: 0.4408
Epoch: 012, Loss: 0.4445
Epoch: 013, Loss: 0.4115
Epoch: 014, Loss: 0.4136
Epoch: 015, Loss: 0.3968
Epoch: 016, Loss: 0.4142
Epoch: 017, Loss: 0.3931
Epoch: 018, Loss: 0.4049
Epoch: 019, Loss: 0.4051
Epoch: 020, Loss: 0.3546
Epoch: 021, Loss: 0.3941
Epoch: 022, Loss: 0.3363
Epoch: 023, Loss: 0.3570
Epoch: 024, Loss: 0.3485
Epoch: 025, Loss: 0.3577
Epoch: 026, Loss: 0.3413
Epoch: 027, Loss: 0.3279
Epoch: 028, Loss: 0.3389
Epoch: 029, Loss: 0.3063
Epoch: 030, Loss: 0.3141
Epoch: 031, Loss: 0.3127
Epoch: 032, Loss: 0.2905
Epoch: 033, Loss: 0.2850
Epoch: 034, Loss: 0.3074
Epoch: 035, Loss: 0.2792
Epoch: 036, Loss: 0.2566
Epoch: 037, Loss: 0.2667
Epoch: 038, Loss: 0.2681
Epoch: 039, Loss: 0.2432
Epoch: 040, Loss: 0.2530
