In [1]:
import os
os.chdir("/home/tim/Development/OCPPM/")
import pickle
import numpy as np
import random
import pandas as pd
import torch
from torch_geometric.nn import HGTConv, GATConv, Linear, to_hetero
import torch_geometric.transforms as T
import torch.nn.functional as F
import torch.optim as O

# from ....models.geometric_models import AGNN, CGNN

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ofg_in_file = "data/BPI17/feature_encodings/OFG/ofg/raw/BPI17_OFG.pkl"
with open(ofg_in_file, "rb") as het_data_pkl:
    het_data = pickle.load(het_data_pkl)

# het_data = T.AddSelfLoops()(het_data)
# het_data = T.NormalizeFeatures()(het_data)

In [3]:
het_data

HeteroData(
  [1mapplication[0m={
    y=[31509],
    x=[31509, 18]
  },
  [1moffer[0m={
    y=[42995],
    x=[42995, 8]
  },
  [1m(application, interacts, application)[0m={ edge_index=[2, 0] },
  [1m(application, interacts, offer)[0m={ edge_index=[2, 42995] },
  [1m(offer, interacts, offer)[0m={ edge_index=[2, 6027] }
)

In [4]:
class GAT(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GATConv((-1, -1), hidden_channels, add_self_loops=False)
        self.lin1 = Linear(-1, hidden_channels)
        self.conv2 = GATConv((-1, -1), out_channels, add_self_loops=False)
        self.lin2 = Linear(-1, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index) + self.lin1(x)
        x = x.relu()
        x = self.conv2(x, edge_index) + self.lin2(x)
        return x

class HGT(torch.nn.Module):
    def __init__(self,data, hidden_channels, out_channels, num_heads, num_layers):
        super().__init__()

        self.lin_dict = torch.nn.ModuleDict()
        for node_type in data.node_types:
            self.lin_dict[node_type] = Linear(-1, hidden_channels)

        self.convs = torch.nn.ModuleList()
        for _ in range(num_layers):
            conv = HGTConv(hidden_channels, hidden_channels, data.metadata(),
                           num_heads, group='sum')
            self.convs.append(conv)

        self.lin = Linear(hidden_channels, out_channels)

    def forward(self, x_dict, edge_index_dict):
        for node_type, x in x_dict.items():
            x_dict[node_type] = self.lin_dict[node_type](x).relu_()

        for conv in self.convs:
            x_dict = conv(x_dict, edge_index_dict)

        return self.lin(x_dict['offer'])


In [5]:
df = pd.DataFrame(
    het_data['offer'].x,
    columns=[
        "event_NumberOfTerms",
        "event_Accepted",
        "event_Selected",
        "event_OfferedAmount",
        "event_CreditScore",
        "event_FirstWithdrawalAmount",
        "event_MonthlyCost",
    ],
)
y = df['event_NumberOfTerms'].values
df = df.drop('event_NumberOfTerms', axis=1)
x = df.values
het_data['offer'].x = torch.tensor(x)
het_data['offer'].y = torch.tensor(y)

ValueError: Shape of passed values is (42995, 8), indices imply (42995, 7)

In [6]:
# model = HGT(het_data, hidden_channels=64, out_channels=1, num_heads=2, num_layers=2)
model = GAT(hidden_channels=32, out_channels=1)
model = to_hetero(model, het_data.metadata(), aggr="sum")
model.double()

# with torch.no_grad():  # Initialize lazy modules.
#      out = model(het_data.x_dict, het_data.edge_index_dict)


GraphModule(
  (conv1): ModuleDict(
    (application__interacts__application): GATConv((-1, -1), 32, heads=1)
    (application__interacts__offer): GATConv((-1, -1), 32, heads=1)
    (offer__interacts__offer): GATConv((-1, -1), 32, heads=1)
  )
  (lin1): ModuleDict(
    (application): Linear(-1, 32, bias=True)
    (offer): Linear(-1, 32, bias=True)
  )
  (conv2): ModuleDict(
    (application__interacts__application): GATConv((-1, -1), 1, heads=1)
    (application__interacts__offer): GATConv((-1, -1), 1, heads=1)
    (offer__interacts__offer): GATConv((-1, -1), 1, heads=1)
  )
  (lin2): ModuleDict(
    (application): Linear(-1, 1, bias=True)
    (offer): Linear(-1, 1, bias=True)
  )
)

In [7]:
def train(model, data, optimizer, loss_fn):
    model.train()
    optimizer.zero_grad()
    out = model(data.x_dict, data.edge_index_dict)
    mask = data["offer"].train_mask
    loss = loss_fn(out["offer"][mask], data["offer"].y[mask])
    loss.backward()
    optimizer.step()
    return float(loss)

In [8]:
het_data

HeteroData(
  [1mapplication[0m={
    y=[31509],
    x=[31509, 18]
  },
  [1moffer[0m={
    y=[42995],
    x=[42995, 8]
  },
  [1m(application, interacts, application)[0m={ edge_index=[2, 0] },
  [1m(application, interacts, offer)[0m={ edge_index=[2, 42995] },
  [1m(offer, interacts, offer)[0m={ edge_index=[2, 6027] }
)

In [9]:
# from torch_geometric.sampler import HeteroSamplerOutput, HGTSampler
# from torch_geometric.loader import HGTLoader

# het_data = T.RandomNodeSplit()(het_data)
# loader = HGTLoader(het_data, 
#                    num_samples={key: [10_000] for key in het_data.node_types},
#                    input_nodes=('offer', het_data['offer'].train_mask))


In [10]:
het_data = T.RandomNodeSplit()(het_data)

optimizer = O.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
loss = train(model,het_data,optimizer, F.l1_loss)
loss

  loss = loss_fn(out["offer"][mask], data["offer"].y[mask])


: 

: 

In [None]:
loss
# 1.3763950517127197

10588784.253657103

In [None]:
net.

AssertionError: 