In [1]:
# Install required packages.
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

import pandas as pd

# Helper function for visualization.
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

import torch_geometric.transforms as T
from torch_geometric.datasets import OGB_MAG
from torch_geometric.nn import SAGEConv, to_hetero
# impoort HeteroData
from torch_geometric.data import HeteroData



  from .autonotebook import tqdm as notebook_tqdm


1.13.1


In [2]:
# from torch_geometric.datasets import OGB_MAG
# import torch_geometric.transforms as T

# dataset = OGB_MAG(root='./data', preprocess='metapath2vec')
# data = dataset[0]

# print(data.metadata())
# data['institution'].x.shape
# data.x

In [17]:
# create a tensor of 84 nodes that are empty
x = torch.empty(84, 0)
x[0][0]

IndexError: index 0 is out of bounds for dimension 0 with size 0

In [45]:
def node_df_to_torch(df: pd.DataFrame):
    return torch.tensor(df.values, dtype=torch.float)

def edge_df_to_torch(df: pd.DataFrame):
    # assert edge_type in ["VarVal","ValOp", "OpVal"]
        return torch.tensor(df.index, dtype=torch.long).t().contiguous()

def problem_dfs(problem_path):
    """
    Returns the dataframes for the variables, values, operators, and their respective edges
    """
    variables_df = pd.read_csv(os.path.join(problem_path, 'variables.csv'), index_col=0)
    variables_df = variables_df.drop(columns=["is_goal"])
    
    values_df = pd.read_csv(os.path.join(problem_path, 'values.csv'), index_col=0)

    operators_df = pd.read_csv(os.path.join(problem_path, 'operators.csv'), index_col=0)

    val_var_df = pd.read_csv(os.path.join(problem_path, 'ValVar_edges.csv'), index_col=[0,1])
    val_op_df = pd.read_csv(os.path.join(problem_path, 'ValOp_edges.csv'), index_col=[0,1])
    val_op_df = val_op_df.drop(columns=["label"])
    op_val_df = pd.read_csv(os.path.join(problem_path, 'OpVal_edges.csv'), index_col=[0,1])
    op_val_df = op_val_df.drop(columns=["label"])

    return variables_df, values_df, operators_df, val_var_df, val_op_df, op_val_df

def build_hetero(
    variables_df,
    values_df,
    operators_df,
    val_var_df,
    val_op_df,
    op_val_df,
):
    hetero_data = HeteroData()
    hetero_data['variable'].x = node_df_to_torch(variables_df)
    hetero_data['value'].x = node_df_to_torch(values_df)
    hetero_data['operator'].x = x = torch.empty(len(operators_df), 0)
    hetero_data['operator'].y = node_df_to_torch(operators_df)

    hetero_data['variable', 'has_value', 'value'].edge_index = edge_df_to_torch(val_var_df)
    hetero_data['value', 'precondition', 'operator'].edge_index = edge_df_to_torch(val_op_df)
    hetero_data['operator', 'effect', 'value'].edge_index = edge_df_to_torch(op_val_df)

    VarVal = edge_df_to_torch(val_var_df)
    ValOp = edge_df_to_torch(val_op_df)
    OpVal = edge_df_to_torch(op_val_df)


    # return hetero_data
    return T.ToUndirected()(hetero_data)


dfs = problem_dfs("p1_2_1_2_5_1")
var_df, val_df, op_df, val_var_df, val_op_df, op_val_df = dfs

variables = node_df_to_torch(var_df)
values = node_df_to_torch(val_df)
operators = node_df_to_torch(op_df)

VarVal = edge_df_to_torch(val_var_df)
ValOp = edge_df_to_torch(val_op_df)
OpVal = edge_df_to_torch(op_val_df)

our_data = build_hetero(*dfs)



In [46]:
# mask = torch.randint(0, 2, (len(our_data["operator"].x), ), dtype=torch.bool)


# our_data["operator"].x[mask]

In [47]:


class GNN(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv((-1, -1), hidden_channels)
        self.conv2 = SAGEConv((-1, -1), out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index).sigmoid()
        return x


model = GNN(hidden_channels=64, out_channels=1)
model = to_hetero(model, our_data.metadata(), aggr='sum')
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [48]:
print(out['operator'][0:10])
print(our_data['operator'].y[0:10])

tensor([[-0.0464],
        [-0.0464],
        [-0.0464],
        [-0.0464],
        [-0.0464],
        [-0.0464],
        [-0.0464],
        [-0.0464],
        [-0.0464],
        [-0.0464]], grad_fn=<SliceBackward0>)
tensor([[1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]])


In [52]:
import torch.nn.functional as F

def train():
    for i in range(1, 100):
        if i % 10 == 0:
            print(f'Epoch: {i:03d}, Loss: {loss:.4f}')
        model.train()
        optimizer.zero_grad()
        out = model(our_data.x_dict, our_data.edge_index_dict)

        pred = out['operator']
        true_label = our_data['operator'].y

        print(pred[0])
        # print(true_label)
        loss = F.binary_cross_entropy_with_logits(pred, true_label)
        loss.backward()
        optimizer.step()
        # return float(loss)

train()

tensor([6.1497e-06], grad_fn=<SelectBackward0>)
tensor([6.1442e-06], grad_fn=<SelectBackward0>)
tensor([6.1387e-06], grad_fn=<SelectBackward0>)
tensor([6.1332e-06], grad_fn=<SelectBackward0>)
tensor([6.1277e-06], grad_fn=<SelectBackward0>)
tensor([6.1222e-06], grad_fn=<SelectBackward0>)
tensor([6.1167e-06], grad_fn=<SelectBackward0>)
tensor([6.1112e-06], grad_fn=<SelectBackward0>)
tensor([6.1056e-06], grad_fn=<SelectBackward0>)
Epoch: 010, Loss: 0.6931
tensor([6.1001e-06], grad_fn=<SelectBackward0>)
tensor([6.0946e-06], grad_fn=<SelectBackward0>)
tensor([6.0890e-06], grad_fn=<SelectBackward0>)
tensor([6.0835e-06], grad_fn=<SelectBackward0>)
tensor([6.0780e-06], grad_fn=<SelectBackward0>)
tensor([6.0724e-06], grad_fn=<SelectBackward0>)
tensor([6.0668e-06], grad_fn=<SelectBackward0>)
tensor([6.0613e-06], grad_fn=<SelectBackward0>)
tensor([6.0557e-06], grad_fn=<SelectBackward0>)
tensor([6.0501e-06], grad_fn=<SelectBackward0>)
Epoch: 020, Loss: 0.6931
tensor([6.0446e-06], grad_fn=<SelectBa

In [36]:
# def train():
#     model.train()

#     total_examples = total_loss = 0
#     for batch in train_loader:
#         optimizer.zero_grad()
#         batch = batch.to('cuda:0')
#         batch_size = batch['paper'].batch_size
#         out = model(batch.x_dict, batch.edge_index_dict)
#         loss = F.cross_entropy(out['paper'][:batch_size],
#                                batch['paper'].y[:batch_size])
#         loss.backward()
#         optimizer.step()

#         total_examples += batch_size
#         total_loss += float(loss) * batch_size

#     return total_loss / total_examples

In [37]:
model.eval()

out = model(our_data.x_dict, our_data.edge_index_dict)
out["variable"]

tensor([[0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0.4159],
        [0

In [38]:
a = torch.randint(10, (1,100))
len(a[0])

100