Skip to content

Commit

Permalink
Init commit
Browse files Browse the repository at this point in the history
  • Loading branch information
JiaxuanYou committed Oct 10, 2019
1 parent d860521 commit cb94314
Show file tree
Hide file tree
Showing 47 changed files with 751,945 additions and 0 deletions.
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
.idea/
result_stats/
model/
logging/
graphs/
formulas/
fig/

*.py~
65 changes: 65 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# G2SAT: Learning to Generate SAT Formulas
This repository is the official PyTorch implementation of "G2SAT: Learning to Generate SAT Formulas".

[Jiaxuan You*](https://cs.stanford.edu/~jiaxuan/), [Haoze Wu*](https://anwu1219.github.io/), [Clark Barrett](https://theory.stanford.edu/~barrett/), [Raghuram Ramanujan](https://www.davidson.edu/people/raghu-ramanujan), [Jure Leskovec](https://cs.stanford.edu/people/jure/index.html), [Position-aware Graph Neural Networks](http://proceedings.mlr.press/v97/you19b/you19b.pdf), NeurIPS 2019.

## Installation

- Install PyTorch (tested on 1.0.0), please refer to the offical website for further details
```bash
conda install pytorch torchvision cudatoolkit=9.0 -c pytorch
```
- Install PyTorch Geometric (tested on 1.1.2), please refer to the offical website for further details
```bash
pip install --verbose --no-cache-dir torch-scatter
pip install --verbose --no-cache-dir torch-sparse
pip install --verbose --no-cache-dir torch-cluster
pip install --verbose --no-cache-dir torch-spline-conv (optional)
pip install torch-geometric
```
- Install networkx (tested on 2.3), make sure you are not using networkx 1.x version!
```bash
pip install networkx
```
- Install tensorboardx
```bash
pip install tensorboardX
```


## Run


1. Preprocess data
```bash
python
```

2. Train G2SAT
```bash
python main_train.py --model GCN --num_layers 3
```

3. Use G2SAT to generate Formulas
```bash
python main_test.py --model GCN --num_layers 3
```

4. Analyze results
```bash
python
```


You are highly encouraged to tune all kinds of hyper-parameters to get better performance. We only did very limited hyper-parameter tuning.

We recommend using tensorboard to monitor the training process. To do this, you may run
```bash
tensorboard --logdir runs
```

## Citation
If you find this work useful, please cite our paper:
```latex
```
68 changes: 68 additions & 0 deletions args.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from argparse import ArgumentParser
def make_args():
parser = ArgumentParser()
# general
parser.add_argument('--comment', dest='comment', default='0', type=str,
help='comment')
parser.add_argument('--task', dest='task', default='link', type=str,
help='link; node')
parser.add_argument('--model', dest='model', default='gcn', type=str,
help='model class name')
parser.add_argument('--dataset', dest='dataset', default='grid', type=str,
help='grid; caveman; barabasi, cora, citeseer, pubmed')
parser.add_argument('--loss', dest='loss', default='l2', type=str,
help='l2; cross_entropy')
parser.add_argument('--gpu', dest='gpu', action='store_true',
help='whether use gpu')
parser.add_argument('--cache_no', dest='cache', action='store_false',
help='whether use cache')
parser.add_argument('--cpu', dest='gpu', action='store_false',
help='whether use cpu')
parser.add_argument('--cuda', dest='cuda', default='0', type=str)

# dataset
parser.add_argument('--graph_test_ratio', dest='graph_test_ratio', default=0.2, type=float)
parser.add_argument('--feature_pre', dest='feature_pre', action='store_true',
help='whether pre transform feature')
parser.add_argument('--feature_pre_no', dest='feature_pre', action='store_false',
help='whether pre transform feature')
parser.add_argument('--dropout', dest='dropout', action='store_true',
help='whether dropout, default 0.5')
parser.add_argument('--dropout_no', dest='dropout', action='store_false',
help='whether dropout, default 0.5')
parser.add_argument('--speedup', dest='speedup', action='store_true',
help='whether speedup')
parser.add_argument('--speedup_no', dest='speedup', action='store_false',
help='whether speedup')
parser.add_argument('--recompute_template', dest='recompute_template', action='store_true',
help='whether save_template')
parser.add_argument('--load_model', dest='load_model', action='store_true',
help='whether load_model')


parser.add_argument('--batch_size', dest='batch_size', default=64, type=int) # implemented via accumulating gradient
parser.add_argument('--layer_num', dest='layer_num', default=3, type=int)
parser.add_argument('--feature_dim', dest='feature_dim', default=32, type=int)
parser.add_argument('--hidden_dim', dest='hidden_dim', default=32, type=int)
parser.add_argument('--output_dim', dest='output_dim', default=32, type=int)
parser.add_argument('--worker_num', dest='worker_num', default=6, type=int)

parser.add_argument('--lr', dest='lr', default=1e-3, type=float)
parser.add_argument('--yield_prob', dest='yield_prob', default=1, type=float)
parser.add_argument('--clause_ratio', dest='clause_ratio', default=1.1, type=float)
parser.add_argument('--epoch_num', dest='epoch_num', default=2000, type=int)
parser.add_argument('--epoch_log', dest='epoch_log', default=50, type=int) # test every
parser.add_argument('--epoch_test', dest='epoch_test', default=2001, type=int) # test start from when
parser.add_argument('--epoch_save', dest='epoch_save', default=50, type=int) # save every
parser.add_argument('--epoch_load', dest='epoch_load', default=1950, type=int) # test start from when
parser.add_argument('--gen_graph_num', dest='gen_graph_num', default=1, type=int) # graph num per template
parser.add_argument('--sample_size', dest='sample_size', default=20000, type=int) # number of action samples
parser.add_argument('--repeat', dest='repeat', default=0, type=int)
parser.add_argument('--sat_id', dest='sat_id', default=0, type=int)


parser.set_defaults(gpu=True, task='link', model='GCN', dataset='Cora', cache=True,
feature_pre=True, dropout=False, recompute_template=False, load_model=False,
speedup=False)
args = parser.parse_args()
return args
200 changes: 200 additions & 0 deletions batch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
import torch
from torch_geometric.data import Data
import torch.utils.data
import pdb
import re
import numpy as np


class Dataset_mine(torch.utils.data.Dataset):
def __init__(self, data_list):
super(Dataset_mine, self).__init__()
self.data = data_list
# self.num_features = self.data[0].x.shape[1]
# self.num_classes = len(np.unique(self.data[0].y))

def __getitem__(self, index):
return self.data[index]

def __len__(self):
return len(self.data)

@property
def num_features(self):
return self.data[0].x.shape[1]
@property
def num_classes(self):
return len(np.unique(self.data[0].y))




def __cat_dim__(key, value):
r"""Returns the dimension for which :obj:`value` of attribute
:obj:`key` will get concatenated when creating batches.
.. note::
This method is for internal use only, and should only be overridden
if the batch concatenation process is corrupted for a specific data
attribute.
"""
# `*index*` and `*face*` should be concatenated in the last dimension,
# everything else in the first dimension.
return -1 if bool(re.search('(index|face|mask_link)', key)) else 0

def __cumsum__(key, value):
r"""If :obj:`True`, :obj:`value` of attribute :obj:`key` is
cumulatively summed up when creating batches.
.. note::
This method is for internal use only, and should only be overridden
if the batch concatenation process is corrupted for a specific data
attribute.
"""
return bool(re.search('(index|face|mask_link)', key))

class Batch(Data):
r"""A plain old python object modeling a batch of graphs as one big
(dicconnected) graph. With :class:`torch_geometric.data.Data` being the
base class, all its methods can also be used here.
In addition, single graphs can be reconstructed via the assignment vector
:obj:`batch`, which maps each node to its respective graph identifier.
"""

def __init__(self, batch=None, **kwargs):
super(Batch, self).__init__(**kwargs)
self.batch = batch

@staticmethod
def from_data_list(data_list):
r"""Constructs a batch object from a python list holding
:class:`torch_geometric.data.Data` objects.
The assignment vector :obj:`batch` is created on the fly."""
keys = [set(data.keys) for data in data_list]
keys = list(set.union(*keys))
# don't take "dists"
keys = [key for key in keys if key!='dists']
assert 'batch' not in keys

batch = Batch()

for key in keys:
batch[key] = []
batch.batch = []

cumsum = 0
for i, data in enumerate(data_list):
num_nodes = data.num_nodes
batch.batch.append(torch.full((num_nodes, ), i, dtype=torch.long))
for key in keys:
item = data[key]
item = item + cumsum if __cumsum__(key, item) else item
batch[key].append(item)
cumsum += num_nodes
for key in keys:
item = batch[key][0]
if torch.is_tensor(item):
batch[key] = torch.cat(
batch[key], dim=__cat_dim__(key, item))
elif isinstance(item, int) or isinstance(item, float):
batch[key] = torch.tensor(batch[key])
else:
raise ValueError('Unsupported attribute type.')
batch.batch = torch.cat(batch.batch, dim=-1)

return batch.contiguous()

@staticmethod
def from_data_list_batch(data_list):
# load one batch at a time
r"""Constructs a batch object from a python list holding
:class:`torch_geometric.data.Data` objects.
The assignment vector :obj:`batch` is created on the fly."""
flatten = lambda l: [item for sublist in l for item in sublist]
data_list = flatten(data_list)
keys = [set(data.keys) for data in data_list]
keys = list(set.union(*keys))
# don't take "dists"
keys = [key for key in keys if key != 'dists']
assert 'batch' not in keys

batch = Batch()

for key in keys:
batch[key] = []
batch.batch = []

cumsum = 0
for i, data in enumerate(data_list):
num_nodes = data.num_nodes
batch.batch.append(torch.full((num_nodes,), i, dtype=torch.long))
for key in keys:
item = data[key]
item = item + cumsum if __cumsum__(key, item) else item
batch[key].append(item)
cumsum += num_nodes
for key in keys:
item = batch[key][0]
if torch.is_tensor(item):
batch[key] = torch.cat(
batch[key], dim=__cat_dim__(key, item))
elif isinstance(item, int) or isinstance(item, float):
batch[key] = torch.tensor(batch[key])
else:
raise ValueError('Unsupported attribute type.')
batch.batch = torch.cat(batch.batch, dim=-1)

return batch.contiguous()

@property
def num_graphs(self):
"""Returns the number of graphs in the batch."""
return self.batch[-1].item() + 1






class DataLoader(torch.utils.data.DataLoader):
r"""Data loader which merges data objects from a
:class:`torch_geometric.data.dataset` to a mini-batch.
Args:
dataset (Dataset): The dataset from which to load the data.
batch_size (int, optional): How may samples per batch to load.
(default: :obj:`1`)
shuffle (bool, optional): If set to :obj:`True`, the data will be
reshuffled at every epoch (default: :obj:`True`)
"""

def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
super(DataLoader, self).__init__(
dataset,
batch_size,
shuffle,
collate_fn=lambda data_list: Batch.from_data_list(data_list),
**kwargs)

class DataLoader_batch(torch.utils.data.DataLoader):
# each item is a batch of data
r"""Data loader which merges data objects from a
:class:`torch_geometric.data.dataset` to a mini-batch.
Args:
dataset (Dataset): The dataset from which to load the data.
batch_size (int, optional): How may samples per batch to load.
(default: :obj:`1`)
shuffle (bool, optional): If set to :obj:`True`, the data will be
reshuffled at every epoch (default: :obj:`True`)
"""

def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
super(DataLoader, self).__init__(
dataset,
batch_size,
shuffle,
collate_fn=lambda data_list: Batch.from_data_list_batch(data_list),
**kwargs)
Loading

0 comments on commit cb94314

Please sign in to comment.