Skip to content

Commit

Permalink
Merge pull request #42 from ComplexData-MILA/dtdg
Browse files Browse the repository at this point in the history
polishing tutorials & setup
  • Loading branch information
shenyangHuang committed Feb 20, 2024
2 parents 0fb4264 + ff8c073 commit cc70112
Show file tree
Hide file tree
Showing 5 changed files with 171 additions and 354 deletions.
74 changes: 6 additions & 68 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<!-- # TGX -->
![TGX logo](imgs/2023_TGX_logo.png)
![TGX logo](docs/2023_TGX_logo.png)

# Temporal Graph Analysis with TGX
<h4>
Expand All @@ -19,8 +19,8 @@ TGX implementation works with `python >= 3.9` and can be installed as follows.

1. Set up virtual environment (conda should work as well).
```
python -m venv ~/tgx_env/
source ~/tgx_env/bin/activate
python -m venv tgx_env/
source tgx_env/bin/activate
```

2. Install external packages
Expand All @@ -29,21 +29,17 @@ TGX implementation works with `python >= 3.9` and can be installed as follows.
```

3. Install local dependencies under root directory `/TGX`.
<!-- ```
pip install -e py-tgx
``` -->
```
pip install -e .
```



4. [alternatively] Install from `test-pypi`.
4. [Aternative] Install TGX from [`PyPi`](https://pypi.org/project/py-tgx/):

```
pip install -i https://test.pypi.org/simple/ py-tgx
pip install py-tgx
```
You can specify the version with `==`, note that the pypi version might not always be the most updated version


5. [optional] Install `mkdocs` dependencies to serve the documentation locally.
Expand All @@ -52,65 +48,7 @@ TGX implementation works with `python >= 3.9` and can be installed as follows.
```



## Data Loading
For detailed tutorial on how to load datasets as a `tgx.Graph`, see [`docs/tutorials/data_loader.ipynb`](https://github.com/ComplexData-MILA/TGX/blob/master/docs/tutorials/data_loader.ipynb).
Here are some simple examples on loading different datasets.

1. Load TGB datasets.
```
import tgx
dataset = tgx.tgb_data("tgbl-wiki")
ctdg = tgx.Graph(dataset)
```

2. Load built-in datasets.
```
dataset = tgx.builtin.uci()
ctdg = tgx.Graph(dataset)
```

3. Load custom datasets from `.csv`.
```
from tgx.io.read import read_csv
toy_fname = "docs/tutorials/toy_data.csv"
edgelist = read_csv(toy_fname, header=True,index=False, t_col=0,)
tgx.Graph(edgelist=edgelist)
```

## Visualization and Statistics
For detailed tutorial on how to generate visualizations and compute statistics for temporal graphs, see [`docs/tutorials/data_viz_stats.ipynb`](https://github.com/ComplexData-MILA/TGX/blob/master/docs/tutorials/data_viz_stats.ipynb)
Here are examples showing some of the TGX's functionalities.

1. **Discretize** the network (required for some visualization).
```
dataset = tgx.builtin.uci()
ctdg = tgx.Graph(dataset)
time_scale = "weekly"
dtdg, ts_list = ctdg.discretize(time_scale=time_scale, store_unix=True)
```

2. Plot the **number of nodes over time**.

```
tgx.degree_over_time(dtdg, network_name="uci")
```

3. Compute **novelty** index.
```
tgx.get_novelty(dtdg)
```


<!--
### Creating new branch
first create the branch on github
```
git fetch origin
git checkout -b test origin/test
``` -->
For tutorials on how to use TGX to generate visualizations and compute statistics for temporal graphs, see [`docs/tutorials/data_viz_stats.ipynb`](https://github.com/ComplexData-MILA/TGX/blob/master/docs/tutorials/data_viz_stats.ipynb)


### Citation
Expand Down
377 changes: 135 additions & 242 deletions docs/tutorials/data_viz_stats.ipynb

Large diffs are not rendered by default.

33 changes: 18 additions & 15 deletions examples/data_viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,28 @@
from tgx.utils.graph_utils import subsampling

"""
master example to show all visualization in tgx
A master example to show all visualization in TGX
"""

# === load built in datasets ===
dataset = tgx.builtin.uci()

#* load built in datasets
dataset = tgx.builtin.uci() #built in datasets

#* load the tgb datasets
# === load the tgb datasets ===
# data_name = "tgbl-wiki" #"tgbl-review"
# dataset = tgx.tgb_data(data_name) #tgb datasets


# initialize a Graph object from the loaded dataset
# & discretize its timestamps...
ctdg = tgx.Graph(dataset)
time_scale = "weekly" #"daily"
time_scale = "weekly" # other choices: "daily", "hourly", ...
dtdg = ctdg.discretize(time_scale=time_scale)[0]

#* example for subsampling
# === example for subsampling
sub_edges = subsampling(ctdg, selection_strategy="random", N=1000)
subgraph = tgx.Graph(edgelist=sub_edges)



#* plotting the statistics
# === plot the statistics
tgx.degree_over_time(dtdg, network_name=dataset.name)
tgx.nodes_over_time(dtdg, network_name=dataset.name)
tgx.edges_over_time(dtdg, network_name=dataset.name)
Expand All @@ -39,15 +39,18 @@
tgx.TET(dtdg,
network_name=dataset.name)

#tgx.TET(dtdg, network_name=dataset.name, figsize = (9, 5), axis_title_font_size = 24, ticks_font_size = 24)
# tgx.TET(dtdg,
# network_name=dataset.name,
# figsize = (9, 5),
# axis_title_font_size = 24,
# ticks_font_size = 24)


tgx.TEA(dtdg,
network_name=dataset.name)
# tgx.TEA(dtdg,
# network_name=dataset.name)



#* compute statistics
# === compute statistics
test_ratio = 0.15
tgx.get_reoccurrence(ctdg, test_ratio=test_ratio)
tgx.get_surprise(ctdg, test_ratio=test_ratio)
Expand Down
22 changes: 12 additions & 10 deletions examples/starting_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import sys

def get_args():
parser = argparse.ArgumentParser('*** discretizing time steps from datasets ***')
parser.add_argument('-d', '--data', type=str, help='Dataset name', default='tgbl-wiki')
parser = argparse.ArgumentParser('*** discretizing time steps of a TG dataset ***')
parser.add_argument('-d', '--data', type=str, help='dataset name', default='tgbl-wiki')
parser.add_argument('-t', '--time', type=str, help='time granularity', default='daily')

try:
Expand All @@ -18,24 +18,26 @@ def get_args():
args, _ = get_args()


#! load the datasets from tgb or builtin
# === load the datasets from tgb or built-in ===

# load a built-in dataset
# dataset = tgx.builtin.uci()

data_name = args.data #"tgbl-coin" #"tgbl-review" #"tgbl-wiki"
# load a TGB dataset
data_name = args.data # args.name can be supported TGB datasets, such as: "tgbl-coin", "tgbl-review", "tgbl-wiki"
dataset = tgx.tgb_data(data_name)



# make a Graph object from loaded dataset
ctdg = tgx.Graph(dataset)
# ctdg.save2csv("ctdg") #! save the graph to csv files

time_scale = args.time #"minutely" #"monthly" #"weekly" #"daily" #"hourly"
time_scale = args.time #choices are: "minutely", "monthly", "weekly", "daily", "hourly"
dtdg = ctdg.discretize(time_scale=time_scale)[0]
print ("discretize to ", time_scale)
print(f"INFO: Discretize {data_name} to `{time_scale}`")



#* plotting the statistics
# === plotting the statistics ===
tgx.degree_over_time(dtdg, network_name=dataset.name)
tgx.nodes_over_time(dtdg, network_name=dataset.name)
tgx.edges_over_time(dtdg, network_name=dataset.name)
Expand All @@ -53,7 +55,7 @@ def get_args():



#* compute statistics
# === compute statistics ===
test_ratio = 0.15
tgx.get_reoccurrence(ctdg, test_ratio=test_ratio)
tgx.get_surprise(ctdg, test_ratio=test_ratio)
Expand Down
19 changes: 0 additions & 19 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,4 @@ def readme():
keywords="Temporal Graph Visualization",
license="MIT",
packages=find_packages(),
install_requires=[
'appdirs',
'networkx',
'numpy',
'pandas',
'py-tgb',
'requests',
'scikit_learn',
'scipy',
'seaborn',
'sklearn',
'torch',
'torch-geometric-temporal',
'torch_geometric',
'torch_scatter',
'torch_sparse',
'tqdm',
'wandb',
],
)

0 comments on commit cc70112

Please sign in to comment.