In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
cmap = matplotlib.colormaps.get('tab10').colors
import torch
import torch_geometric as tg
from gen_autoencoder_dataset import AutoencoderDataset

In [92]:
n = 30
dataset = AutoencoderDataset(root='autoencoder_graphs', n_graphs_per_type=n)
#dataset.process()
train_loader = tg.loader.DataLoader(dataset[:n*12], batch_size=1, shuffle=True)
test_loader = tg.loader.DataLoader(dataset[n*12:], batch_size=1, shuffle=True)
train_loader.dataset[2]


Data(x=[250, 3], edge_index=[2, 3310], y=[250, 1], pos=[250, 3])

In [3]:
class GCNEncoder(torch.nn.Module):
    def __init__(self, in_channels):
        super(GCNEncoder, self).__init__()
        self.conv1 = tg.nn.GCNConv(in_channels, 2) 
        self.conv2 = tg.nn.GCNConv(2, 1) 

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv2(x, edge_index)
  

In [4]:
# parameters
num_features = dataset.num_features
epochs = 10

# model
model = tg.nn.GAE(GCNEncoder(num_features))

# move to GPU (if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# inizialize the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [5]:
def train(loader):
    model.train()
    avg_loss = 0
    for data in loader:
        optimizer.zero_grad()
        z = model.encode(data.x, data.edge_index)
        loss = model.recon_loss(z, data.edge_index)
        avg_loss += loss.item()
        loss.backward()
        optimizer.step()
    return avg_loss / len(loader)

for epoch in range(1, epochs+1):
    loss = train(train_loader)
    print('Epoch: {:03d}, Loss: {:.4f}'.format(epoch, loss))
    

Epoch: 001, Loss: 1.5988
Epoch: 002, Loss: 1.3423
Epoch: 003, Loss: 1.3295
Epoch: 004, Loss: 1.3245
Epoch: 005, Loss: 1.3232
Epoch: 006, Loss: 1.3174
Epoch: 007, Loss: 1.3020
Epoch: 008, Loss: 1.3034
Epoch: 009, Loss: 1.2958
Epoch: 010, Loss: 1.2883


In [3]:
data = dataset[0]
z = model.encode(data.x, data.edge_index)
print(model.decode(z, data.edge_index))

NameError: name 'model' is not defined

In [4]:
import pygod
import torch_sparse

In [61]:
model = pygod.detector.DOMINANT(gpu=-1, contamination=1/5**3, verbose=1, epoch=10)
for epoch in range(50):
    for i in range(10):
        data = dataset[i]
        model = model.fit(data)



Epoch 0000: Loss 95.6572 | 
Epoch 0001: Loss 44.7497 | 
Epoch 0002: Loss 19.6220 | 
Epoch 0003: Loss 9.4738 | 
Epoch 0004: Loss 7.0847 | 
Epoch 0005: Loss 7.9077 | 
Epoch 0006: Loss 9.1893 | 
Epoch 0007: Loss 9.5983 | 
Epoch 0008: Loss 9.0204 | 
Epoch 0009: Loss 7.8943 | 
Epoch 0000: Loss 54.4753 | 
Epoch 0001: Loss 22.4479 | 
Epoch 0002: Loss 9.2137 | 
Epoch 0003: Loss 5.4009 | 
Epoch 0004: Loss 5.0982 | 
Epoch 0005: Loss 5.5107 | 
Epoch 0006: Loss 5.6200 | 
Epoch 0007: Loss 5.2904 | 
Epoch 0008: Loss 4.7511 | 
Epoch 0009: Loss 4.2312 | 
Epoch 0000: Loss 345.5786 | 
Epoch 0001: Loss 64.3878 | 
Epoch 0002: Loss 40.4716 | 
Epoch 0003: Loss 39.8365 | 
Epoch 0004: Loss 30.7774 | 
Epoch 0005: Loss 21.9629 | 
Epoch 0006: Loss 15.8649 | 
Epoch 0007: Loss 11.9904 | 
Epoch 0008: Loss 9.5632 | 
Epoch 0009: Loss 7.9850 | 
Epoch 0000: Loss 270.1629 | 
Epoch 0001: Loss 124.8473 | 
Epoch 0002: Loss 59.9586 | 
Epoch 0003: Loss 37.3742 | 
Epoch 0004: Loss 31.4227 | 
Epoch 0005: Loss 30.5062 | 
Epoch 

In [64]:
data = dataset[2]
outliers = model.predict(data)
print('Predicted bad node at position:', torch.argmax(outliers))
print('Real bad node at position:', torch.argmax(data.y))

Test: Loss 182.4610 | 
Predicted bad node at position: tensor(0)
Real bad node at position: tensor(117)


In [45]:
outliers

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 1])

## New idea: Do on my own

In [107]:
num_features = dataset.num_features
alpha = 0.5 # weight of the attribute loss


class dominant(torch.nn.Module):
    def __init__(self):
        super(dominant, self).__init__()
        self.encoder = torch.nn.ModuleList()
        self.encoder.append(tg.nn.GCNConv(num_features, 2))
        self.encoder.append(tg.nn.GCNConv(2, 1))
        
        self.struct_decoder = tg.nn.InnerProductDecoder()
        self.attr_decoder =torch.nn.ModuleList()
        self.attr_decoder.append(tg.nn.GCNConv(1, 2))
        self.attr_decoder.append(tg.nn.GCNConv(2, num_features))
    def forward(self, x, edge_index):
        # encoder
        print(x.shape, edge_index.shape)
        for layer in self.encoder:
            z = layer(x, edge_index).relu()
        # decoder
        adj = self.struct_decoder(z, edge_index) # structure decoder
        for layer in self.attr_decoder:
            x_ = layer(z, edge_index).relu() # attribute decoder
        return x_, adj
    
        
        
model = dominant()
print(model)

def loss_func(x, x_, edge_index, adj):
    print(f"x shape: {x.shape}, x_ shape: {x_.shape}")
    print(f"edge_index shape: {edge_index.shape}, adj shape: {adj.shape}")
    edge_index = edge_index.type(torch.float)
    adj = adj.type(torch.float)
    return alpha * torch.nn.functional.mse_loss(x, x_) + (1-alpha) * torch.nn.functional.mse_loss(adj, edge_index)

optimizer = torch.optim.Adam(model.parameters())

dominant(
  (encoder): ModuleList(
    (0): GCNConv(3, 2)
    (1): GCNConv(2, 1)
  )
  (struct_decoder): InnerProductDecoder()
  (attr_decoder): ModuleList(
    (0): GCNConv(1, 2)
    (1): GCNConv(2, 3)
  )
)


In [108]:
# training:
epochs = 10
def train(loader):
    model.train()
    avg_loss = 0
    for data in loader:
        x_, adj = model.forward(data.x, data.edge_index)
        loss = loss_func(data.x, x_, data.edge_index, adj)
        avg_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return avg_loss / len(loader)
for epoch in range(1, epochs+1):
    loss = train(train_loader)
    print('Epoch: {:03d}, Loss: {:.4f}'.format(epoch, loss))

torch.Size([250, 3]) torch.Size([2, 3384])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (250x3 and 2x1)