Init commit

JiaxuanYou · Oct 10, 2019 · cb94314 · cb94314
1 parent d860521
commit cb94314
Show file tree

Hide file tree

Showing 47 changed files with 751,945 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,9 @@
+.idea/
+result_stats/
+model/
+logging/
+graphs/
+formulas/
+fig/
+
+*.py~
diff --git a/README.md b/README.md
@@ -0,0 +1,65 @@
+# G2SAT: Learning to Generate SAT Formulas
+This repository is the official PyTorch implementation of "G2SAT: Learning to Generate SAT Formulas".
+
+[Jiaxuan You*](https://cs.stanford.edu/~jiaxuan/), [Haoze Wu*](https://anwu1219.github.io/), [Clark Barrett](https://theory.stanford.edu/~barrett/), [Raghuram Ramanujan](https://www.davidson.edu/people/raghu-ramanujan), [Jure Leskovec](https://cs.stanford.edu/people/jure/index.html), [Position-aware Graph Neural Networks](http://proceedings.mlr.press/v97/you19b/you19b.pdf), NeurIPS 2019.
+
+## Installation
+
+- Install PyTorch (tested on 1.0.0), please refer to the offical website for further details
+```bash
+conda install pytorch torchvision cudatoolkit=9.0 -c pytorch
+```
+- Install PyTorch Geometric (tested on 1.1.2), please refer to the offical website for further details
+```bash
+pip install --verbose --no-cache-dir torch-scatter
+pip install --verbose --no-cache-dir torch-sparse
+pip install --verbose --no-cache-dir torch-cluster
+pip install --verbose --no-cache-dir torch-spline-conv (optional)
+pip install torch-geometric
+```
+- Install networkx (tested on 2.3), make sure you are not using networkx 1.x version!
+```bash
+pip install networkx
+```
+- Install tensorboardx
+```bash
+pip install tensorboardX
+```
+
+
+## Run
+
+
+1. Preprocess data
+```bash
+python 
+```
+
+2. Train G2SAT
+```bash
+python main_train.py --model GCN --num_layers 3
+```
+
+3. Use G2SAT to generate Formulas
+```bash
+python main_test.py --model GCN --num_layers 3
+```
+
+4. Analyze results
+```bash
+python
+```
+
+
+You are highly encouraged to tune all kinds of hyper-parameters to get better performance. We only did very limited hyper-parameter tuning.
+
+We recommend using tensorboard to monitor the training process. To do this, you may run
+```bash
+tensorboard --logdir runs
+```
+
+## Citation
+If you find this work useful, please cite our paper:
+```latex
+
+```
diff --git a/args.py b/args.py
@@ -0,0 +1,68 @@
+from argparse import ArgumentParser
+def make_args():
+    parser = ArgumentParser()
+    # general
+    parser.add_argument('--comment', dest='comment', default='0', type=str,
+                        help='comment')
+    parser.add_argument('--task', dest='task', default='link', type=str,
+                        help='link; node')
+    parser.add_argument('--model', dest='model', default='gcn', type=str,
+                        help='model class name')
+    parser.add_argument('--dataset', dest='dataset', default='grid', type=str,
+                        help='grid; caveman; barabasi, cora, citeseer, pubmed')
+    parser.add_argument('--loss', dest='loss', default='l2', type=str,
+                        help='l2; cross_entropy')
+    parser.add_argument('--gpu', dest='gpu', action='store_true',
+                        help='whether use gpu')
+    parser.add_argument('--cache_no', dest='cache', action='store_false',
+                        help='whether use cache')
+    parser.add_argument('--cpu', dest='gpu', action='store_false',
+                        help='whether use cpu')
+    parser.add_argument('--cuda', dest='cuda', default='0', type=str)
+
+    # dataset
+    parser.add_argument('--graph_test_ratio', dest='graph_test_ratio', default=0.2, type=float)
+    parser.add_argument('--feature_pre', dest='feature_pre', action='store_true',
+                        help='whether pre transform feature')
+    parser.add_argument('--feature_pre_no', dest='feature_pre', action='store_false',
+                        help='whether pre transform feature')
+    parser.add_argument('--dropout', dest='dropout', action='store_true',
+                        help='whether dropout, default 0.5')
+    parser.add_argument('--dropout_no', dest='dropout', action='store_false',
+                        help='whether dropout, default 0.5')
+    parser.add_argument('--speedup', dest='speedup', action='store_true',
+                        help='whether speedup')
+    parser.add_argument('--speedup_no', dest='speedup', action='store_false',
+                        help='whether speedup')
+    parser.add_argument('--recompute_template', dest='recompute_template', action='store_true',
+                        help='whether save_template')
+    parser.add_argument('--load_model', dest='load_model', action='store_true',
+                        help='whether load_model')
+
+
+    parser.add_argument('--batch_size', dest='batch_size', default=64, type=int) # implemented via accumulating gradient
+    parser.add_argument('--layer_num', dest='layer_num', default=3, type=int)
+    parser.add_argument('--feature_dim', dest='feature_dim', default=32, type=int)
+    parser.add_argument('--hidden_dim', dest='hidden_dim', default=32, type=int)
+    parser.add_argument('--output_dim', dest='output_dim', default=32, type=int)
+    parser.add_argument('--worker_num', dest='worker_num', default=6, type=int)
+
+    parser.add_argument('--lr', dest='lr', default=1e-3, type=float)
+    parser.add_argument('--yield_prob', dest='yield_prob', default=1, type=float)
+    parser.add_argument('--clause_ratio', dest='clause_ratio', default=1.1, type=float)
+    parser.add_argument('--epoch_num', dest='epoch_num', default=2000, type=int)
+    parser.add_argument('--epoch_log', dest='epoch_log', default=50, type=int) # test every
+    parser.add_argument('--epoch_test', dest='epoch_test', default=2001, type=int) # test start from when
+    parser.add_argument('--epoch_save', dest='epoch_save', default=50, type=int) # save every
+    parser.add_argument('--epoch_load', dest='epoch_load', default=1950, type=int) # test start from when
+    parser.add_argument('--gen_graph_num', dest='gen_graph_num', default=1, type=int) # graph num per template
+    parser.add_argument('--sample_size', dest='sample_size', default=20000, type=int) # number of action samples
+    parser.add_argument('--repeat', dest='repeat', default=0, type=int)
+    parser.add_argument('--sat_id', dest='sat_id', default=0, type=int)
+
+
+    parser.set_defaults(gpu=True, task='link', model='GCN', dataset='Cora', cache=True,
+                        feature_pre=True, dropout=False, recompute_template=False, load_model=False,
+                        speedup=False)
+    args = parser.parse_args()
+    return args
diff --git a/batch.py b/batch.py
@@ -0,0 +1,200 @@
+import torch
+from torch_geometric.data import Data
+import torch.utils.data
+import pdb
+import re
+import numpy as np
+
+
+class Dataset_mine(torch.utils.data.Dataset):
+    def __init__(self, data_list):
+        super(Dataset_mine, self).__init__()
+        self.data = data_list
+        # self.num_features = self.data[0].x.shape[1]
+        # self.num_classes = len(np.unique(self.data[0].y))
+
+    def __getitem__(self, index):
+        return self.data[index]
+
+    def __len__(self):
+        return len(self.data)
+
+    @property
+    def num_features(self):
+        return self.data[0].x.shape[1]
+    @property
+    def num_classes(self):
+        return len(np.unique(self.data[0].y))
+
+
+
+
+def __cat_dim__(key, value):
+    r"""Returns the dimension for which :obj:`value` of attribute
+    :obj:`key` will get concatenated when creating batches.
+
+    .. note::
+
+        This method is for internal use only, and should only be overridden
+        if the batch concatenation process is corrupted for a specific data
+        attribute.
+    """
+    # `*index*` and `*face*` should be concatenated in the last dimension,
+    # everything else in the first dimension.
+    return -1 if bool(re.search('(index|face|mask_link)', key)) else 0
+
+def __cumsum__(key, value):
+    r"""If :obj:`True`, :obj:`value` of attribute :obj:`key` is
+    cumulatively summed up when creating batches.
+
+    .. note::
+
+        This method is for internal use only, and should only be overridden
+        if the batch concatenation process is corrupted for a specific data
+        attribute.
+    """
+    return bool(re.search('(index|face|mask_link)', key))
+
+class Batch(Data):
+    r"""A plain old python object modeling a batch of graphs as one big
+    (dicconnected) graph. With :class:`torch_geometric.data.Data` being the
+    base class, all its methods can also be used here.
+    In addition, single graphs can be reconstructed via the assignment vector
+    :obj:`batch`, which maps each node to its respective graph identifier.
+    """
+
+    def __init__(self, batch=None, **kwargs):
+        super(Batch, self).__init__(**kwargs)
+        self.batch = batch
+
+    @staticmethod
+    def from_data_list(data_list):
+        r"""Constructs a batch object from a python list holding
+        :class:`torch_geometric.data.Data` objects.
+        The assignment vector :obj:`batch` is created on the fly."""
+        keys = [set(data.keys) for data in data_list]
+        keys = list(set.union(*keys))
+        # don't take "dists"
+        keys = [key for key in keys if key!='dists']
+        assert 'batch' not in keys
+
+        batch = Batch()
+
+        for key in keys:
+            batch[key] = []
+        batch.batch = []
+
+        cumsum = 0
+        for i, data in enumerate(data_list):
+            num_nodes = data.num_nodes
+            batch.batch.append(torch.full((num_nodes, ), i, dtype=torch.long))
+            for key in keys:
+                item = data[key]
+                item = item + cumsum if __cumsum__(key, item) else item
+                batch[key].append(item)
+            cumsum += num_nodes
+        for key in keys:
+            item = batch[key][0]
+            if torch.is_tensor(item):
+                batch[key] = torch.cat(
+                    batch[key], dim=__cat_dim__(key, item))
+            elif isinstance(item, int) or isinstance(item, float):
+                batch[key] = torch.tensor(batch[key])
+            else:
+                raise ValueError('Unsupported attribute type.')
+        batch.batch = torch.cat(batch.batch, dim=-1)
+
+        return batch.contiguous()
+
+    @staticmethod
+    def from_data_list_batch(data_list):
+        # load one batch at a time
+        r"""Constructs a batch object from a python list holding
+        :class:`torch_geometric.data.Data` objects.
+        The assignment vector :obj:`batch` is created on the fly."""
+        flatten = lambda l: [item for sublist in l for item in sublist]
+        data_list = flatten(data_list)
+        keys = [set(data.keys) for data in data_list]
+        keys = list(set.union(*keys))
+        # don't take "dists"
+        keys = [key for key in keys if key != 'dists']
+        assert 'batch' not in keys
+
+        batch = Batch()
+
+        for key in keys:
+            batch[key] = []
+        batch.batch = []
+
+        cumsum = 0
+        for i, data in enumerate(data_list):
+            num_nodes = data.num_nodes
+            batch.batch.append(torch.full((num_nodes,), i, dtype=torch.long))
+            for key in keys:
+                item = data[key]
+                item = item + cumsum if __cumsum__(key, item) else item
+                batch[key].append(item)
+            cumsum += num_nodes
+        for key in keys:
+            item = batch[key][0]
+            if torch.is_tensor(item):
+                batch[key] = torch.cat(
+                    batch[key], dim=__cat_dim__(key, item))
+            elif isinstance(item, int) or isinstance(item, float):
+                batch[key] = torch.tensor(batch[key])
+            else:
+                raise ValueError('Unsupported attribute type.')
+        batch.batch = torch.cat(batch.batch, dim=-1)
+
+        return batch.contiguous()
+
+    @property
+    def num_graphs(self):
+        """Returns the number of graphs in the batch."""
+        return self.batch[-1].item() + 1
+
+
+
+
+
+
+class DataLoader(torch.utils.data.DataLoader):
+    r"""Data loader which merges data objects from a
+    :class:`torch_geometric.data.dataset` to a mini-batch.
+
+    Args:
+        dataset (Dataset): The dataset from which to load the data.
+        batch_size (int, optional): How may samples per batch to load.
+            (default: :obj:`1`)
+        shuffle (bool, optional): If set to :obj:`True`, the data will be
+            reshuffled at every epoch (default: :obj:`True`)
+    """
+
+    def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
+        super(DataLoader, self).__init__(
+            dataset,
+            batch_size,
+            shuffle,
+            collate_fn=lambda data_list: Batch.from_data_list(data_list),
+            **kwargs)
+
+class DataLoader_batch(torch.utils.data.DataLoader):
+    # each item is a batch of data
+    r"""Data loader which merges data objects from a
+    :class:`torch_geometric.data.dataset` to a mini-batch.
+
+    Args:
+        dataset (Dataset): The dataset from which to load the data.
+        batch_size (int, optional): How may samples per batch to load.
+            (default: :obj:`1`)
+        shuffle (bool, optional): If set to :obj:`True`, the data will be
+            reshuffled at every epoch (default: :obj:`True`)
+    """
+
+    def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
+        super(DataLoader, self).__init__(
+            dataset,
+            batch_size,
+            shuffle,
+            collate_fn=lambda data_list: Batch.from_data_list_batch(data_list),
+            **kwargs)