# Torch Geometric 

Data Processing

In [None]:
import pandas as pd
import torch
from torch_geometric.data import Data, Batch
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy as np
from torch_geometric.data import DataLoader



# load and scale the dataset
df = pd.read_csv('SensorData.csv').dropna()
scaler = MinMaxScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)


nodes_order = [
    'Sensor1', 'Sensor2', 'Sensor3', 'Sensor4', 
    'Sensor5', 'Sensor6', 'Sensor7', 'Sensor8'
]

# define the graph connectivity for the data
edges_directed = torch.tensor([
    [0, 1, 2, 2, 3, 3, 6, 2],  # source nodes
    [1, 2, 3, 4, 5, 6, 2, 7]   # target nodes
], dtype=torch.long)


# Reverse the edges to make the graph undirected
edges_reversed = edges_directed[[1, 0], :]

# Concatenate the original and reversed edges
edges = torch.cat([edges_directed, edges_reversed], dim=1)


graphs = []

# iterate through each row of data to create a graph for each observation
# some nodes will not have any data, not the case here but created a mask to allow us to deal with any nodes that do not have data available
for _, row in df_scaled.iterrows():
    node_features = []
    node_data_mask = []
    for node in nodes_order:
        if node in df_scaled.columns:
            node_features.append([row[node]])
            node_data_mask.append(1) # mask value of to indicate present of data
        else:
            # missing nodes feature if necessary
            node_features.append(2)
            node_data_mask.append(0) # data not present
    
    node_features_tensor = torch.tensor(node_features, dtype=torch.float)
    node_data_mask_tensor = torch.tensor(node_data_mask, dtype=torch.float)

    
    # Create a Data object for this row/graph
    graph_data = Data(x=node_features_tensor, edge_index=edges.t().contiguous(), mask = node_data_mask_tensor)
    graphs.append(graph_data)


#### splitting the data into train, test observation
# Split indices
observation_indices = df_scaled.index.tolist()
train_indices, test_indices = train_test_split(observation_indices, test_size=0.05, random_state=42)

# Create training and testing graphs
train_graphs = [graphs[i] for i in train_indices]
test_graphs = [graphs[i] for i in test_indices]

Convert back to NetworkX for visualisation

In [None]:
import networkx as nx
import matplotlib.pyplot as plt

G = nx.Graph() 
for src, dst in edges.t().numpy():
    G.add_edge(nodes_order[src], nodes_order[dst])

plt.figure(figsize=(10, 8))
pos = nx.spring_layout(G)
nx.draw(G, pos, with_labels=True, node_color='lightblue', edge_color='gray', node_size=2000, font_weight='bold')
plt.title('Graph Visualization')
plt.show()

Model Definition

In [None]:
from torch_geometric.nn import GATConv
import torch.nn.functional as F
import torch.nn as nn

class GNNModel(nn.Module):
    def __init__(self, num_node_features):
        super(GNNModel, self).__init__()
        self.conv1 = GATConv(num_node_features, 16)
        self.conv2 = GATConv(16, 8)
        self.fc = nn.Linear(8, 1)  # Outputting a single value per node
    
        # self.conv1 = GATConv(num_node_features, 16)
        # self.conv2 = GATConv(16, 8)
        # self.conv3 = GATConv(8, 4)
        # self.fc = nn.Linear(4, 1)  # Outputting a single value per node

    def forward(self, data, target_node_idx=None):
        x, edge_index = data.x, data.edge_index
        edge_index = edge_index.T
        x = x.clone()

        # Mask the target node's feature with a value of zero! 
        # Aim is to predict this value from the features of the neighbours
        if target_node_idx is not None:
            x[target_node_idx] = torch.zeros_like(x[target_node_idx])

        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.1, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        #x = F.relu(self.conv3(x, edge_index))
        x = F.dropout(x, p=0.1, training=self.training)
        x = self.fc(x)

        return x


In [None]:

model = GNNModel(num_node_features=1)  
batch_size = 8
pretrained = True

# Load the state dictionary
if pretrained:
    model_path = "GNNmodel_state_dict_Batched_new2.pth"
    model.load_state_dict(torch.load(model_path))


optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-6)

criterion = torch.nn.MSELoss()
#criterion = torch.nn.HuberLoss(delta=1.0)  # Huber Loss, delta is the threshold between MAE and MSE

num_epochs = 200  # Or any other number of epochs
train_loader = DataLoader(train_graphs, batch_size=1, shuffle=True) 

model.train()

actual = []
pred = []

for epoch in range(num_epochs):
    accumulated_loss = 0 
    optimizer.zero_grad()
    loss = 0  # Initialize loss for accumulation
    for batch_idx, data in enumerate(train_loader):
        mask = data.mask 
        for i in range(1,data.num_nodes):
            if mask[i] == 1 and i != 4:  # Only train on nodes with data
                output = model(data, i)  # Get predictions with the target node masked
                #print("output:",output)
                target = data.x[i] # Ensure target is correctly shaped
                prediction = output[i].view(1)  # Ensure prediction is correctly shaped
                loss += criterion(prediction, target)
                # actual.append(target)
                # pred.append(prediction)
        #Update parameters at the end of each set of batches
        if (batch_idx+1) % batch_size == 0 or (batch_idx +1 ) == len(train_loader):
            loss.backward()  # Backpropagate accumulated loss
            optimizer.step()
            optimizer.zero_grad()
            accumulated_loss += loss.item()
            loss = 0

    average_loss = accumulated_loss / len(train_loader)
    print(f'Epoch {epoch+1}, Average Loss: {average_loss}')

In [None]:
# After training
model_path = "GNNmodel_state_dict_Batched_new2.pth"
torch.save(model.state_dict(), model_path)


Testing

In [None]:
test_loader = DataLoader(test_graphs, batch_size=1, shuffle=True)

#model_path = "GNNmodel_state_dict_Batched.pth"
#model.load_state_dict(torch.load(model_path))
model.eval()

actual = []
pred = []

for data in test_loader:
    mask = data.mask
    for i in range(1,4):
        if mask[i] == 1:
            # if i == 1 or i ==2 or i == 6:
            output = model(data, i)
            prediction = output[i].view(1)
            target = data.x[i]

            actual.append(target)
            pred.append(prediction)


In [None]:
import plotly.graph_objects as go
from plotly.offline import iplot

actual_values_float = [value.item() for value in actual]
pred_values_float = [value.item() for value in pred]


scatter_trace = go.Scatter(
    x=actual_values_float,
    y=pred_values_float,
    mode='markers',
    marker=dict(
        size=10,
        opacity=0.5,  # Adjust opacity
        color='rgba(255,255,255,0)',  # Transparent fill
        line=dict(
            width=2,
            color='rgba(152, 0, 0, .8)',  # Border color
        )
    ),
    name='Actual vs Predicted'
)

line_trace = go.Scatter(
    x=[min(actual_values_float), max(actual_values_float)],
    y=[min(actual_values_float), max(actual_values_float)],
    mode='lines',
    marker=dict(color='blue'),
    name='Perfect Prediction'
)

data = [scatter_trace, line_trace]

layout = dict(
    title='Actual vs Predicted Values',
    xaxis=dict(title='Actual Values'),
    yaxis=dict(title='Predicted Values'),
    autosize=False,
    width=800,
    height=600
)

fig = dict(data=data, layout=layout)

iplot(fig)
