# Node Classification Task

Note: Models are cached after the first run and only training/testing is run. Delete the model files in `out/.../model`directories to force re-run.

In [1]:
# Imports
from impl.run import run
from impl.main import Options
from impl.model.node2vec import Node2Vec, MethodOpts
from impl.model.node2vec_jumps import Node2VecJumps
from impl.model.node2vec_hubs import Node2VecHubs
from impl.utils import set_debug

# Enabling verbose logging
set_debug(False, False)

# Create necessary directories
!mkdir -p out/default/model
!mkdir -p out/jumps/model
!mkdir -p out/hubs/model

## node2vec Reproduction

In [2]:
# Paper
print("node2vec", run(
    Options(
        input='data/datasets/blogcatalog/bc_adjlist.txt',
        graph_format='adjlist',
        weighted=False,
        output=f'out/default/model/blogcatalog-p0.25-q0.25.model',
        label_file='data/datasets/blogcatalog/bc_labels.txt',
        # 50% training/test split
        training_ratio=0.5,
        # average score over 10 iterations
        clf_iterations=10
    ),
    Node2Vec(
        MethodOpts(
            dim=128,
            walk_length=80,
            num_walks=10,
            window=10
        ),
        p=0.25,
        q=0.25
    ),
    draw=False
))

100%|██████████| 10312/10312 [00:03<00:00, 3147.09it/s]
100%|██████████| 1599/1599 [23:46<00:00,  1.12it/s] 
Walk 1/10: 100%|██████████| 10312/10312 [01:31<00:00, 113.10it/s]
Walk 2/10: 100%|██████████| 10312/10312 [01:29<00:00, 115.70it/s]
Walk 3/10: 100%|██████████| 10312/10312 [01:27<00:00, 117.48it/s]
Walk 4/10: 100%|██████████| 10312/10312 [01:26<00:00, 119.42it/s]
Walk 5/10: 100%|██████████| 10312/10312 [01:27<00:00, 118.05it/s]
Walk 6/10: 100%|██████████| 10312/10312 [01:27<00:00, 117.61it/s]
Walk 7/10: 100%|██████████| 10312/10312 [01:26<00:00, 119.28it/s]
Walk 8/10: 100%|██████████| 10312/10312 [01:28<00:00, 116.76it/s]
Walk 9/10: 100%|██████████| 10312/10312 [01:28<00:00, 117.03it/s]
Walk 10/10: 100%|██████████| 10312/10312 [01:28<00:00, 115.94it/s]
100%|██████████| 103120/103120 [00:02<00:00, 43163.57it/s]
100%|██████████| 10/10 [00:28<00:00,  2.83s/it]


node2vec {'micro': {'mean': 39.931672856907454, 'std': 0.48235237604735176}, 'macro': {'mean': 26.71829166025585, 'std': 0.7208215511459021}, 'samples': {'mean': 40.219986003781585, 'std': 0.5139329329732457}, 'weighted': {'mean': 37.44890197749279, 'std': 0.41153820792880275}}


In [3]:
# DeepWalk
print("DeepWalk", run(
    Options(
        input='data/datasets/blogcatalog/bc_adjlist.txt',
        graph_format='adjlist',
        weighted=False,
        output=f'out/default/model/blogcatalog-p1-q1.model',
        label_file='data/datasets/blogcatalog/bc_labels.txt',
        # 50% training/test split
        training_ratio=0.5,
        # average score over 10 iterations
        clf_iterations=10
    ),
    Node2Vec(
        MethodOpts(
            dim=128,
            walk_length=80,
            num_walks=10,
            window=10
        ),
        p=1,
        q=1
    ),
    draw=False
))

100%|██████████| 10312/10312 [00:02<00:00, 3739.51it/s]
100%|██████████| 1599/1599 [22:45<00:00,  1.17it/s]
Walk 1/10: 100%|██████████| 10312/10312 [01:46<00:00, 97.19it/s] 
Walk 2/10: 100%|██████████| 10312/10312 [01:43<00:00, 99.83it/s] 
Walk 3/10: 100%|██████████| 10312/10312 [01:43<00:00, 99.28it/s] 
Walk 4/10: 100%|██████████| 10312/10312 [01:41<00:00, 101.23it/s]
Walk 5/10: 100%|██████████| 10312/10312 [01:43<00:00, 99.26it/s] 
Walk 6/10: 100%|██████████| 10312/10312 [01:42<00:00, 100.41it/s]
Walk 7/10: 100%|██████████| 10312/10312 [01:41<00:00, 101.32it/s]
Walk 8/10: 100%|██████████| 10312/10312 [01:44<00:00, 98.34it/s] 
Walk 9/10: 100%|██████████| 10312/10312 [01:43<00:00, 99.32it/s] 
Walk 10/10: 100%|██████████| 10312/10312 [01:42<00:00, 100.38it/s]
100%|██████████| 103120/103120 [00:02<00:00, 43042.82it/s]
100%|██████████| 10/10 [00:31<00:00,  3.12s/it]


DeepWalk {'micro': {'mean': 39.81677529262775, 'std': 0.31829090312943686}, 'macro': {'mean': 25.854255277356902, 'std': 0.5878904931145543}, 'samples': {'mean': 40.253212399401306, 'std': 0.2812475162314243}, 'weighted': {'mean': 37.14310620691002, 'std': 0.35817577527732636}}


## Jump Probability

In [4]:
def run_jump(prob):
    return run(
        Options(
            input='data/datasets/blogcatalog/bc_adjlist.txt',
            graph_format='adjlist',
            weighted=False,
            output=f'out/jumps/model/blogcatalog-p{prob}.model',
            label_file='data/datasets/blogcatalog/bc_labels.txt',
            # 50% training/test split
            training_ratio=0.5,
            # average score over 10 iterations
            clf_iterations=10
        ),
        Node2VecJumps(
            MethodOpts(
                dim=128,
                walk_length=80,
                num_walks=10,
                window=10
            ),
            jump_prob=prob
        ),
        draw=False
    );

In [5]:
print("100%", run_jump(1))

100%|██████████| 10312/10312 [00:03<00:00, 3361.66it/s]
100%|██████████| 1599/1599 [23:16<00:00,  1.14it/s]
Walk 1/10: 100%|██████████| 10312/10312 [02:26<00:00, 70.27it/s]
Walk 2/10: 100%|██████████| 10312/10312 [02:22<00:00, 72.37it/s]
Walk 3/10: 100%|██████████| 10312/10312 [02:21<00:00, 72.79it/s]
Walk 4/10: 100%|██████████| 10312/10312 [02:26<00:00, 70.31it/s]
Walk 5/10: 100%|██████████| 10312/10312 [02:25<00:00, 70.92it/s]
Walk 6/10: 100%|██████████| 10312/10312 [02:26<00:00, 70.52it/s]
Walk 7/10: 100%|██████████| 10312/10312 [02:26<00:00, 70.41it/s]
Walk 8/10: 100%|██████████| 10312/10312 [02:24<00:00, 71.41it/s]
Walk 9/10: 100%|██████████| 10312/10312 [02:24<00:00, 71.30it/s]
Walk 10/10: 100%|██████████| 10312/10312 [02:23<00:00, 71.92it/s]
100%|██████████| 103120/103120 [00:01<00:00, 82021.26it/s]
100%|██████████| 10/10 [00:21<00:00,  2.11s/it]


100% {'micro': {'mean': 14.029164457232202, 'std': 0.2833465421423977}, 'macro': {'mean': 3.6372694646458035, 'std': 0.14208599007204317}, 'samples': {'mean': 12.132061429433424, 'std': 0.318429680042795}, 'weighted': {'mean': 8.8956095693521, 'std': 0.16742099375806044}}


In [6]:
print("25%", run_jump(0.25))

100%|██████████| 10312/10312 [00:03<00:00, 3195.77it/s]
100%|██████████| 1599/1599 [22:46<00:00,  1.17it/s]
Walk 1/10: 100%|██████████| 10312/10312 [02:04<00:00, 82.93it/s]
Walk 2/10: 100%|██████████| 10312/10312 [02:03<00:00, 83.23it/s]
Walk 3/10: 100%|██████████| 10312/10312 [02:04<00:00, 82.66it/s]
Walk 4/10: 100%|██████████| 10312/10312 [02:06<00:00, 81.52it/s]
Walk 5/10: 100%|██████████| 10312/10312 [02:07<00:00, 81.04it/s]
Walk 6/10: 100%|██████████| 10312/10312 [02:05<00:00, 82.39it/s]
Walk 7/10: 100%|██████████| 10312/10312 [02:07<00:00, 80.62it/s]
Walk 8/10: 100%|██████████| 10312/10312 [02:06<00:00, 81.21it/s]
Walk 9/10: 100%|██████████| 10312/10312 [02:05<00:00, 81.85it/s]
Walk 10/10: 100%|██████████| 10312/10312 [02:04<00:00, 82.82it/s]
100%|██████████| 103120/103120 [00:02<00:00, 45467.27it/s]
100%|██████████| 10/10 [00:27<00:00,  2.74s/it]


25% {'micro': {'mean': 38.8560760702678, 'std': 0.3685043331901167}, 'macro': {'mean': 25.270405722526892, 'std': 0.6807124092403685}, 'samples': {'mean': 39.12787928604453, 'std': 0.3997904776008616}, 'weighted': {'mean': 36.12155375875807, 'std': 0.33320896774680475}}


In [7]:
print("10%", run_jump(0.1))

100%|██████████| 10312/10312 [00:03<00:00, 2933.00it/s]
100%|██████████| 1599/1599 [23:35<00:00,  1.13it/s]
Walk 1/10: 100%|██████████| 10312/10312 [01:55<00:00, 88.99it/s]
Walk 2/10: 100%|██████████| 10312/10312 [01:57<00:00, 87.75it/s] 
Walk 3/10: 100%|██████████| 10312/10312 [01:56<00:00, 88.33it/s]
Walk 4/10: 100%|██████████| 10312/10312 [01:56<00:00, 88.24it/s]
Walk 5/10: 100%|██████████| 10312/10312 [01:57<00:00, 88.07it/s]
Walk 6/10: 100%|██████████| 10312/10312 [01:55<00:00, 89.54it/s] 
Walk 7/10: 100%|██████████| 10312/10312 [01:55<00:00, 89.55it/s]
Walk 8/10: 100%|██████████| 10312/10312 [01:57<00:00, 87.56it/s] 
Walk 9/10: 100%|██████████| 10312/10312 [01:56<00:00, 88.56it/s]
Walk 10/10: 100%|██████████| 10312/10312 [01:59<00:00, 86.35it/s]
100%|██████████| 103120/103120 [00:02<00:00, 43604.80it/s]
100%|██████████| 10/10 [00:30<00:00,  3.01s/it]


10% {'micro': {'mean': 39.0712906233048, 'std': 0.1892082759162989}, 'macro': {'mean': 25.759881337367155, 'std': 0.5509774229965495}, 'samples': {'mean': 39.552068325390664, 'std': 0.25760288073733206}, 'weighted': {'mean': 36.515559554915406, 'std': 0.27786419696709297}}


## Hub Attention

In [8]:
def run_hubs(h):
    return run(
        Options(
            input='data/datasets/blogcatalog/bc_adjlist.txt',
            graph_format='adjlist',
            weighted=False,
            output=f'out/hubs/model/blogcatalog-h{h}.model',
            label_file='data/datasets/blogcatalog/bc_labels.txt',
            # 50% training/test split
            training_ratio=0.5,
            # average score over 10 iterations
            clf_iterations=10
        ),
        Node2VecHubs(
            MethodOpts(
                dim=128,
                walk_length=80,
                num_walks=10,
                window=10
            ),
            h=h
        ),
        draw=False
    );

In [9]:
print("0.5", run_hubs(0.5))

100%|██████████| 10312/10312 [00:03<00:00, 3270.19it/s]
100%|██████████| 1599/1599 [24:12<00:00,  1.10it/s] 
Walk 1/10: 100%|██████████| 10312/10312 [02:01<00:00, 84.85it/s]
Walk 2/10: 100%|██████████| 10312/10312 [02:01<00:00, 84.92it/s]
Walk 3/10: 100%|██████████| 10312/10312 [01:59<00:00, 85.94it/s]
Walk 4/10: 100%|██████████| 10312/10312 [02:02<00:00, 84.48it/s]
Walk 5/10: 100%|██████████| 10312/10312 [02:00<00:00, 85.68it/s]
Walk 6/10: 100%|██████████| 10312/10312 [02:02<00:00, 84.41it/s]
Walk 7/10: 100%|██████████| 10312/10312 [02:02<00:00, 84.42it/s] 
Walk 8/10: 100%|██████████| 10312/10312 [02:00<00:00, 85.91it/s]
Walk 9/10: 100%|██████████| 10312/10312 [01:59<00:00, 86.18it/s]
Walk 10/10: 100%|██████████| 10312/10312 [02:00<00:00, 85.34it/s]
100%|██████████| 103120/103120 [00:02<00:00, 48564.84it/s]
100%|██████████| 10/10 [00:30<00:00,  3.10s/it]


0.5 {'micro': {'mean': 37.606309963141626, 'std': 0.42885424605935235}, 'macro': {'mean': 23.37031759298582, 'std': 0.5857561957478299}, 'samples': {'mean': 37.8155863186895, 'std': 0.4956146076182944}, 'weighted': {'mean': 34.69136037745291, 'std': 0.4034720230292449}}


In [10]:
print("0.75", run_hubs(0.75))

100%|██████████| 10312/10312 [00:03<00:00, 2949.06it/s]
100%|██████████| 1599/1599 [23:59<00:00,  1.11it/s] 
Walk 1/10: 100%|██████████| 10312/10312 [01:54<00:00, 89.81it/s] 
Walk 2/10: 100%|██████████| 10312/10312 [01:54<00:00, 90.33it/s] 
Walk 3/10: 100%|██████████| 10312/10312 [01:56<00:00, 88.77it/s] 
Walk 4/10: 100%|██████████| 10312/10312 [01:54<00:00, 90.05it/s] 
Walk 5/10: 100%|██████████| 10312/10312 [01:55<00:00, 89.61it/s] 
Walk 6/10: 100%|██████████| 10312/10312 [01:53<00:00, 91.08it/s] 
Walk 7/10: 100%|██████████| 10312/10312 [01:53<00:00, 90.90it/s]
Walk 8/10: 100%|██████████| 10312/10312 [01:53<00:00, 90.94it/s] 
Walk 9/10: 100%|██████████| 10312/10312 [01:53<00:00, 90.51it/s] 
Walk 10/10: 100%|██████████| 10312/10312 [01:52<00:00, 91.60it/s] 
100%|██████████| 103120/103120 [00:02<00:00, 43650.33it/s]
100%|██████████| 10/10 [00:31<00:00,  3.12s/it]


0.75 {'micro': {'mean': 38.87139830414522, 'std': 0.42201462727447403}, 'macro': {'mean': 25.214078187040442, 'std': 0.6374166428889497}, 'samples': {'mean': 39.281497088808955, 'std': 0.4647289383088701}, 'weighted': {'mean': 36.17185214274774, 'std': 0.4110250674967426}}


In [11]:
print("4", run_hubs(4))

100%|██████████| 10312/10312 [00:03<00:00, 3295.31it/s]
100%|██████████| 1599/1599 [23:57<00:00,  1.11it/s] 
Walk 1/10: 100%|██████████| 10312/10312 [01:12<00:00, 143.09it/s]
Walk 2/10: 100%|██████████| 10312/10312 [01:08<00:00, 151.28it/s]
Walk 3/10: 100%|██████████| 10312/10312 [01:10<00:00, 146.82it/s]
Walk 4/10: 100%|██████████| 10312/10312 [01:10<00:00, 146.21it/s]
Walk 5/10: 100%|██████████| 10312/10312 [01:09<00:00, 149.04it/s]
Walk 6/10: 100%|██████████| 10312/10312 [01:08<00:00, 150.35it/s]
Walk 7/10: 100%|██████████| 10312/10312 [01:09<00:00, 149.39it/s]
Walk 8/10: 100%|██████████| 10312/10312 [01:09<00:00, 148.92it/s]
Walk 9/10: 100%|██████████| 10312/10312 [01:09<00:00, 147.91it/s]
Walk 10/10: 100%|██████████| 10312/10312 [01:10<00:00, 145.99it/s]
100%|██████████| 103120/103120 [00:02<00:00, 43592.74it/s]
100%|██████████| 10/10 [00:31<00:00,  3.14s/it]


4 {'micro': {'mean': 40.132530273229435, 'std': 0.24921085709991725}, 'macro': {'mean': 27.436664395335065, 'std': 0.6401859612035168}, 'samples': {'mean': 40.64996202185884, 'std': 0.24674019060766236}, 'weighted': {'mean': 37.807915497339614, 'std': 0.2394244820882224}}


In [12]:
print("8", run_hubs(8))

100%|██████████| 10312/10312 [00:03<00:00, 3007.05it/s]
100%|██████████| 1599/1599 [23:55<00:00,  1.11it/s] 
Walk 1/10: 100%|██████████| 10312/10312 [00:54<00:00, 188.47it/s]
Walk 2/10: 100%|██████████| 10312/10312 [00:54<00:00, 189.08it/s]
Walk 3/10: 100%|██████████| 10312/10312 [00:53<00:00, 192.42it/s]
Walk 4/10: 100%|██████████| 10312/10312 [00:54<00:00, 188.98it/s]
Walk 5/10: 100%|██████████| 10312/10312 [00:56<00:00, 182.92it/s]
Walk 6/10: 100%|██████████| 10312/10312 [00:53<00:00, 193.19it/s]
Walk 7/10: 100%|██████████| 10312/10312 [00:54<00:00, 190.09it/s]
Walk 8/10: 100%|██████████| 10312/10312 [00:54<00:00, 188.26it/s]
Walk 9/10: 100%|██████████| 10312/10312 [00:53<00:00, 192.44it/s]
Walk 10/10: 100%|██████████| 10312/10312 [00:54<00:00, 189.59it/s]
100%|██████████| 103120/103120 [00:02<00:00, 45292.01it/s]
100%|██████████| 10/10 [00:30<00:00,  3.06s/it]


8 {'micro': {'mean': 39.7281862974614, 'std': 0.36185431210230906}, 'macro': {'mean': 27.167915040932705, 'std': 0.5207322840708081}, 'samples': {'mean': 40.08238066131782, 'std': 0.30873271266637325}, 'weighted': {'mean': 37.47630973710576, 'std': 0.33074664332482645}}


In [13]:
print("10", run_hubs(10))

100%|██████████| 10312/10312 [00:02<00:00, 3468.25it/s]
100%|██████████| 1599/1599 [24:04<00:00,  1.11it/s] 
Walk 1/10: 100%|██████████| 10312/10312 [00:54<00:00, 190.75it/s]
Walk 2/10: 100%|██████████| 10312/10312 [00:50<00:00, 202.34it/s]
Walk 3/10: 100%|██████████| 10312/10312 [00:50<00:00, 206.17it/s]
Walk 4/10: 100%|██████████| 10312/10312 [00:51<00:00, 200.53it/s]
Walk 5/10: 100%|██████████| 10312/10312 [00:53<00:00, 194.49it/s]
Walk 6/10: 100%|██████████| 10312/10312 [00:49<00:00, 206.31it/s]
Walk 7/10: 100%|██████████| 10312/10312 [00:49<00:00, 206.97it/s]
Walk 8/10: 100%|██████████| 10312/10312 [00:51<00:00, 199.21it/s]
Walk 9/10: 100%|██████████| 10312/10312 [00:50<00:00, 204.01it/s]
Walk 10/10: 100%|██████████| 10312/10312 [00:51<00:00, 199.57it/s]
100%|██████████| 103120/103120 [00:02<00:00, 46405.04it/s]
100%|██████████| 10/10 [00:30<00:00,  3.07s/it]


10 {'micro': {'mean': 39.54918541828435, 'std': 0.2629806249031938}, 'macro': {'mean': 27.391148328426407, 'std': 0.5532794340473787}, 'samples': {'mean': 40.04806767665572, 'std': 0.2601776321933875}, 'weighted': {'mean': 37.37094240776509, 'std': 0.21900975627170766}}
