# Example: gowalla
## Prepare the Dataset Instance

Here we present a user-item graph example. The used gowalla dataset can be found 
in our XGCN repository: ``data/raw_gowalla/``, which is copied from LightGCN's official code repository: 
https://github.com/gusye1234/LightGCN-PyTorch.

We recommend to arrange the data with a clear directory structure. 
To get started, you may manually setup a ``XGCN_data`` directory as follows: 
(It's recommended to put your ``XGCN_data`` somewhere else than in this repository.)

```
XGCN_data
└── dataset
    └── raw_gowalla
        ├── train.txt
        └── test.txt
```

In [1]:
import XGCN
from XGCN.data import io, csr
from XGCN.utils.utils import ensure_dir, set_random_seed
import os.path as osp

In [2]:
# set your own all_data_root:
all_data_root = '/home/sxr/code/XGCN_and_data/XGCN_data'

### Load the graph and the evaluation set

In [3]:
dataset = 'gowalla'
raw_data_root = osp.join(all_data_root, 'dataset/raw_' + dataset)
E_src, E_dst = io.load_txt_adj_as_edges(osp.join(raw_data_root, 'train.txt'))
print(E_src)
print(E_dst)

100%|██████████| 29858/29858 [00:00<00:00, 61243.08it/s]


[    0     0     0 ... 29857 29857 29857]
[   0    1    2 ... 1853  691  674]


In [4]:
info, indptr, indices = csr.from_edges_to_csr_with_info(
    E_src, E_dst, graph_type='user-item'
)
print(info)

# from_edges_to_csr ...
# remove_repeated_edges ...
## 0 edges are removed
{'graph_type': 'user-item', 'num_users': 29858, 'num_items': 40981, 'num_nodes': 70839, 'num_edges': 810128}


In [5]:
test_set = io.from_txt_adj_to_adj_eval_set(osp.join(raw_data_root, 'test.txt'))



  x = np.loadtxt(f, max_rows=1, dtype=np.int32, ndmin=1)


### Save the Dataset Instance

In [8]:
data_root = osp.join(all_data_root, 'dataset/instance_' + dataset)
ensure_dir(data_root)

io.save_yaml(osp.join(data_root, 'info.yaml'), info)
io.save_pickle(osp.join(data_root, 'indptr.pkl'), indptr)
io.save_pickle(osp.join(data_root, 'indices.pkl'), indices)
io.save_pickle(osp.join(data_root, 'test_set.pkl'), test_set)

## Run LightGCN

In [9]:
config_file = '/home/wuyao/songxiran/code/XGCN_coda_and_data/XGCN_library/config/LightGCN-full_graph-config.yaml'
config = io.load_yaml(config_file)
config

{'data_root': '',
 'results_root': '',
 'epochs': 200,
 'val_freq': 1,
 'key_score_metric': 'r100',
 'convergence_threshold': 20,
 'Dataset_type': 'NodeListDataset',
 'num_workers': 0,
 'NodeListDataset_type': 'LinkDataset',
 'pos_sampler': 'ObservedEdges_Sampler',
 'neg_sampler': 'RandomNeg_Sampler',
 'num_neg': 1,
 'BatchSampleIndicesGenerator_type': 'SampleIndicesWithReplacement',
 'train_batch_size': 1024,
 'epoch_sample_ratio': 0.1,
 'val_evaluator': '',
 'val_batch_size': 256,
 'file_val_set': '',
 'test_evaluator': '',
 'test_batch_size': 256,
 'file_test_set': '',
 'model': 'LightGCN',
 'seed': 1999,
 'graph_device': 'cuda:0',
 'emb_table_device': 'cuda:0',
 'gnn_device': 'cuda:0',
 'out_emb_table_device': 'cuda:0',
 'forward_mode': 'full_graph',
 'from_pretrained': 0,
 'file_pretrained_emb': '',
 'freeze_emb': 0,
 'use_sparse': 0,
 'emb_dim': 64,
 'emb_init_std': 0.1,
 'emb_lr': 0.005,
 'num_gcn_layers': 2,
 'stack_layers': 1,
 'loss_type': 'bpr',
 'L2_reg_weight': 0.0}

In [11]:
config['data_root'] = data_root
results_root = osp.join(all_data_root, 'model_output', dataset, 'LightGCN')
ensure_dir(results_root)
config['results_root'] = results_root

config['val_evaluator'] = 'WholeGraph_MultiPos_Evaluator'
config['file_val_set'] = osp.join(data_root, 'test_set.pkl')
config['test_evaluator'] = 'WholeGraph_MultiPos_Evaluator'
config['file_test_set'] = osp.join(data_root, 'test_set.pkl')

config['num_gcn_layers'] = 3
config['emb_lr'] = 0.001
config['L2_reg_weight'] = 1e-4

config['epochs'] = 5  # for demostration

In [12]:
io.save_yaml(osp.join(results_root, 'config.yaml'), config)

In [13]:
seed = config['seed'] if 'seed' in config else 1999
set_random_seed(seed)

In [14]:
data = {}  # containing some global data objects

model = XGCN.build_Model(config, data)

train_dl = XGCN.build_DataLoader(config, data)

val_evaluator = XGCN.build_val_Evaluator(config, data, model)
test_evaluator = XGCN.build_test_Evaluator(config, data, model)

trainer = XGCN.build_Trainer(config, data, model, train_dl,
                                val_evaluator, test_evaluator)

In [None]:
trainer.train_and_test()

## Run xGCN