<a href="https://colab.research.google.com/github/LeoVogiatzis/GNN_based_NILM/blob/main/REDD_graph_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu113.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-1.10.0+cu113.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git


In [14]:
import networkx as nx
import pandas as pd
import torch
import torch_geometric
from torch_geometric.data import Dataset, Data
import numpy as np
import os
from tqdm import tqdm

In [15]:
print(f"Torch version: {torch.__version__}")
print(f"Cuda available: {torch.cuda.is_available()}")
print(f"Torch geometric version: {torch_geometric.__version__}")

Torch version: 1.10.0+cu111
Cuda available: True
Torch geometric version: 2.0.2


In [16]:
import io
from google.colab import files
uploaded = files.upload()

Saving mains_1.graphml to mains_1.graphml


In [25]:
class NilmDataset(Dataset):
    def __init__(self, root, filename, test=False, transform=None, pre_transform=None):
        """2
        root = Where the dataset should be stored. This folder is split 
        into raw_dir (downloaded dataset) and processed_dir (processed data).
        """
        self.test = test
        self.filename = filename
        super(NilmDataset, self).__init__(root, transform, pre_transform)

    @property
    def raw_file_names(self):
        """ If this file exists in raw_dir, the download is not triggered.
            (The download func. is not implemented here)
        """
        return self.filename

    @property
    def processed_file_names(self):
        """ If these files are found in raw_dir, processing is skipped"""
        self.data = pd.read_csv(self.raw_paths[0]).reset_index()

        if self.test:
            return [f'data_test_{i}.pt' for i in list(self.data.index)]
        else:
            return [f'data_{i}.pt' for i in list(self.data.index)]

    def download(self):
        pass

    def process(self):
        self.G = nx.read_graphml(self.raw_paths[0])
        print(len(self.G.nodes), len(self.G.edges))
        # TODO: read graphs below
            # Get node features
        node_feats = self._get_node_features(self.G)
            # Get edge features
        edge_feats = self._get_edge_features(self.G)
            # Get adjacency info
        edge_index = self._get_adjacency_info(self.G)
            # Get labels info
        labels = self._get_labels(nx.get_node_attributes(self.G, 'state'))  # pass label here. E.g. if it is a column for this graph it could be graph_csv['label']

            # Create data object
        # self.data = Data(x=node_feats, edge_index=edge_index, y=labels)
        self.data = Data(x=node_feats, edge_index=edge_index, edge_attr=edge_feats, y=labels)
        if self.test:
          torch.save(self.data, os.path.join(self.processed_dir, 'data_test_0.pt'))
        else:
          torch.save(self.data, os.path.join(self.processed_dir, 'data_0.pt'))

    def _get_node_features(self, graph):
        """
        This will return a matrix / 2d array of the shape
        [Number of Nodes, Node Feature size]

        We could also use torch_geometric.from_networkx to create a Data object
        with both adjacency and features, but instead we do it manually here
        """
        all_node_feats = list(nx.get_node_attributes(graph, 'drift').values())

        all_node_feats = np.asarray(all_node_feats)
        all_node_feats = all_node_feats.reshape((-1, 1))
        return torch.tensor(all_node_feats, dtype=torch.float)

    def _get_edge_features(self, graph):
      """
      """
      all_edge_feats = []
      for e in graph.edges(data=True):
        all_edge_feats += [[e[2]['gaussian_kernel']], [e[2]['gaussian_kernel']]]

      return torch.tensor(all_edge_feats, dtype=torch.float)

    def _get_adjacency_info(self, graph):
        """
        We could also use torch_geometric.from_networkx to create a Data object
        with both adjacency and features, but instead we do it manually here
        """

        edge_indices = []
        for edge in graph.edges:
            i = int(edge[0])  # get source
            j = int(edge[1])  # get destination
            edge_indices += [[i, j], [j, i]]  # undirected graph

        edge_indices = torch.tensor(edge_indices)
        edge_indices = edge_indices.t().to(torch.long).view(2, -1)
        return edge_indices

    def _get_labels(self, labels):
        labels = list(labels.values())
        labels = np.asarray(labels)
        return torch.tensor(labels, dtype=torch.int64)

    def len(self):
        return self.data.shape[0]

    def get(self, idx):
        """ - Equivalent to __getitem__ in pytorch
            - Is not needed for PyG's InMemoryDataset
        """
        if self.test:
            data = torch.load(os.path.join(self.processed_dir, f'data_test_{idx}.pt'))
        else:
            data = torch.load(os.path.join(self.processed_dir, f'data_{idx}.pt'))
        return data

In [26]:
data = NilmDataset(root='data', filename='mains_1.graphml')
print(data.data)

159 2370
Data(x=[159, 1], edge_index=[2, 4740], edge_attr=[4740, 1], y=[159])


Processing...
Done!


In [19]:
# missing_values = ['?', '--', ' ', 'NA', 'N/A', '-'] #Sometimes Missing Values are't in form of NaN
# data = nx.read_graphml(io.BytesIO(uploaded['mains_1.csv'])) 
# print('There are Total {} datapoints in the dataset with {} Features listed as {}:'.format(df.shape[0], df.shape[1], df.columns.values))

In [20]:
from torch_geometric.utils.convert import from_networkx
g = from_networkx(G, group_node_attrs=['Timestamp','drift'])

NameError: ignored

In [None]:
g.num_node_features


In [None]:
data = g
print(data)
print('==============================================================')

# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
# print(f'Number of training nodes: {data.train_mask.sum()}')
# print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

In [None]:
from IPython.display import Javascript  # Restrict height of output cell.
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))

edge_index = g.edge_index
print(edge_index.t())

In [None]:
import torch
from torch.nn import Linear
from torch_geometric.nn import GCNConv


class GCN(torch.nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        torch.manual_seed(1234)
        self.conv1 = GCNConv(data.num_features, 4)
        self.conv2 = GCNConv(4, 4)
        # self.conv3 = GCNConv(4, 2)
        self.classifier = Linear(2, 2)

    def forward(self, x, edge_index):
        h = self.conv1(x, edge_index)
        h = h.tanh()
        h = self.conv2(h, edge_index)
        h = h.tanh()
        
        # Apply a final (linear) classifier.
        out = self.classifier(h)

        return out, h

model = GCN()
print(model)

In [None]:
data.num_node_features
G.nodes(data=True)