**Karate Club is an unsupervised machine learning extension library for NetworkX. Karate Club consists of state-of-the-art methods to do unsupervised learning on graph structured data. To put it simply it is a Swiss Army knife for small-scale graph mining research. First, it provides network embedding techniques at the node and graph level. Second, it includes a variety of overlapping and non-overlapping community detection methods.**

# 0 - Installation & Imports

In [1]:
%%time
!pip install -q torch_geometric
!pip install -q ogb
!pip install -q karateclub

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.5/64.5 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m45.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.8/16.8 MB[0m [31m83.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.5/11.5 MB[0m [31m93.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m46.6 MB/s[0m eta [3

In [2]:
import numpy as np
import scipy as sp
sp.errstate = np.errstate # Patch scipy to include errstate from numpy
from IPython.display import clear_output
import os
from os import path
from pandas import DataFrame
from torch import from_numpy,save

from ogb.linkproppred.dataset_pyg import PygLinkPropPredDataset

from torch import serialization
from torch_geometric.data.storage import GlobalStorage
from torch_geometric.data.data import DataEdgeAttr, DataTensorAttr
serialization.add_safe_globals([GlobalStorage, DataEdgeAttr, DataTensorAttr])

import networkx as nx

from torch_geometric.nn import Node2Vec
import torch_geometric.transforms as T
from torch_geometric.utils import to_networkx ,to_networkit,to_trimesh,get_embeddings

from karateclub import (
    DeepWalk,Node2Vec,SocioDim,Role2Vec,RandNE,GLEE,Diff2Vec,GraphWave,Walklets,NodeSketch,NetMF,BoostNE,GraRep,NMFADMM,LaplacianEigenmaps)

from karateclub import NNSED,DANMF,GEMSEC,NEU

# 1 - Data Loading and Preparation

In [3]:
dataset = PygLinkPropPredDataset(name='ogbl-ddi')
clear_output()
graph_data = dataset[0]
graph = to_networkx(graph_data).to_undirected()

In [4]:
embedding_save_path = 'KarateClub_Embedding/'
if not os.path.exists(embedding_save_path):
    os.makedirs(embedding_save_path)

---
---

# 2 - Graph Embedding using **Karate Club**

## 2.1 - Node2Vec

In [5]:
%%time
N2V_model = Node2Vec(dimensions=100, walk_number=700, walk_length=3, epochs=14)
N2V_model.fit(graph)
N2V_embedding = N2V_model.get_embedding()

N2V_embedding_tensor = from_numpy(N2V_embedding)
save(N2V_embedding_tensor, path.join(embedding_save_path,'Node2Vec___'+'__'.join([k+'_'+str(v) for k,v in N2V_model.get_params().items() if k!='seed'])+'.pt'))
N2V_embedding_tensor.shape

CPU times: user 1h 32min 6s, sys: 6.19 s, total: 1h 32min 12s
Wall time: 1h 22min 21s


torch.Size([4267, 100])

---
## 2.2 - DeepWalk

In [6]:
%%time
DW_model = DeepWalk(dimensions=100  , walk_number=1400,  walk_length=3 , epochs=16 )
DW_model.fit(graph)
DW_embedding = DW_model.get_embedding()

DW_embedding_tensor = from_numpy(DW_embedding)
save(DW_embedding_tensor, path.join(embedding_save_path,'DeepWalk___'+'__'.join([k+'_'+str(v) for k,v in DW_model.get_params().items() if k!='seed'])+'.pt'))
DW_embedding_tensor.shape

CPU times: user 38min 29s, sys: 8.73 s, total: 38min 37s
Wall time: 16min 44s


torch.Size([4267, 100])

---
## 2.3 - SocioDim

In [7]:
%%time
SD_model = SocioDim(dimensions=100)
SD_model.fit(graph)
SD_embedding = SD_model.get_embedding()

SD_embedding_tensor = from_numpy(SD_embedding)
save(SD_embedding_tensor, path.join(embedding_save_path,'SocioDim___'+'__'.join([k+'_'+str(v) for k,v in SD_model.get_params().items() if k!='seed'])+'.pt'))
SD_embedding_tensor.shape

CPU times: user 9.24 s, sys: 235 ms, total: 9.47 s
Wall time: 7 s


torch.Size([4267, 100])

---
## 2.4 - RandNE

In [8]:
%%time
RNE_model = RandNE(dimensions=100)
RNE_model.fit(graph)
RNE_embedding = RNE_model.get_embedding()

RNE_embedding_tensor = from_numpy(RNE_embedding)
save(RNE_embedding_tensor, path.join(embedding_save_path,'RandNE___'+'__'.join([k+'_'+str(v) for k,v in RNE_model.get_params().items() if k!='seed'])+'.pt'))
RNE_embedding_tensor.shape

CPU times: user 4.2 s, sys: 49 ms, total: 4.25 s
Wall time: 4.22 s


torch.Size([4267, 100])

---
## 2.5 - GLEE

In [9]:
%%time
GLEE_model = GLEE(dimensions=100)
GLEE_model.fit(graph)
GLEE_embedding = GLEE_model.get_embedding()

GLEE_embedding_tensor = from_numpy(GLEE_embedding)
save(GLEE_embedding_tensor, path.join(embedding_save_path,'GLEE___'+'__'.join([k+'_'+str(v) for k,v in GLEE_model.get_params().items() if k!='seed'])+'.pt'))
GLEE_embedding_tensor.shape

CPU times: user 13.9 s, sys: 86 ms, total: 14 s
Wall time: 9.2 s


torch.Size([4267, 101])

---
## 2.6 - Role2Vec

In [10]:
%%time
R2V_model = Role2Vec(dimensions=100 , epochs=24 ,walk_length=3 ,walk_number=1400 )
R2V_model.fit(graph)
R2V_embedding = R2V_model.get_embedding()

R2V_embedding_tensor = from_numpy(R2V_embedding)
save(R2V_embedding_tensor, path.join(embedding_save_path,'Role2Vec___'+'__'.join([k+'_'+str(v) for k,v in R2V_model.get_params().items() if k!='seed'])+'.pt'))
R2V_embedding_tensor.shape

CPU times: user 43min 40s, sys: 13.8 s, total: 43min 53s
Wall time: 17min 52s


torch.Size([4267, 100])

---
## 2.7 - NetMF

In [11]:
%%time
NMF_model = NetMF(dimensions=100 , iteration=700 , negative_samples=1 , order=5 )
NMF_model.fit(graph)
NMF_embedding = NMF_model.get_embedding()

NMF_embedding_tensor = from_numpy(NMF_embedding)
save(NMF_embedding_tensor, path.join(embedding_save_path,'NetMF___'+'__'.join([k+'_'+str(v) for k,v in NMF_model.get_params().items() if k!='seed'])+'.pt'))
NMF_embedding_tensor.shape

CPU times: user 13min 33s, sys: 1.43 s, total: 13min 35s
Wall time: 13min 35s


torch.Size([4267, 100])

---
## 2.8 - Diff2Vec

In [12]:
%%time
D2V_model = Diff2Vec(dimensions=100, diffusion_cover=60 )
D2V_model.fit(graph)
D2V_embedding = D2V_model.get_embedding()

D2V_embedding_tensor = from_numpy(D2V_embedding)
save(D2V_embedding_tensor, path.join(embedding_save_path,'Diff2Vec___'+'__'.join([k+'_'+str(v) for k,v in D2V_model.get_params().items() if k!='seed'])+'.pt'))
D2V_embedding_tensor.shape

CPU times: user 4min 20s, sys: 558 ms, total: 4min 20s
Wall time: 3min 53s


torch.Size([4267, 100])

---
## 2.9 - Laplacian Eigenmaps

In [13]:
%%time
LE_model = LaplacianEigenmaps(dimensions=100  , maximum_number_of_iterations =500000)
LE_model.fit(graph)
LE_embedding = LE_model.get_embedding()

LE_embedding_tensor = from_numpy(LE_embedding)
save(LE_embedding_tensor, path.join(embedding_save_path,'LaplacianEigenmaps___'+'__'.join([k+'_'+str(v) for k,v in LE_model.get_params().items() if k!='seed'])+'.pt'))
LE_embedding_tensor.shape

CPU times: user 21.2 s, sys: 127 ms, total: 21.3 s
Wall time: 12.8 s


torch.Size([4267, 100])

---
## 2.10 - GraRep

In [14]:
%%time
GR_model = GraRep(dimensions=100 , iteration=140)
GR_model.fit(graph)
GR_embedding = GR_model.get_embedding()

GR_embedding_tensor = from_numpy(GR_embedding)
save(GR_embedding_tensor, path.join(embedding_save_path,'GraRep___'+'__'.join([k+'_'+str(v) for k,v in GR_model.get_params().items() if k!='seed'])+'.pt'))
GR_embedding_tensor.shape

CPU times: user 13min 50s, sys: 1.76 s, total: 13min 52s
Wall time: 13min 51s


torch.Size([4267, 500])

---
## 2.11 - Walklets

In [15]:
%%time
Walklets_model = Walklets(dimensions=25, epochs=24 , walk_length=3 , walk_number=1000 )
Walklets_model.fit(graph)
Walklets_embedding = Walklets_model.get_embedding()

Walklets_embedding_tensor = from_numpy(Walklets_embedding)
save(Walklets_embedding_tensor, path.join(embedding_save_path,'Walklets___'+'__'.join([k+'_'+str(v) for k,v in Walklets_model.get_params().items() if k!='seed'])+'.pt'))
Walklets_embedding_tensor.shape

CPU times: user 1h 18min 25s, sys: 26.9 s, total: 1h 18min 52s
Wall time: 42min 6s


torch.Size([4267, 100])

---
## 2.12 - NMFADMM

In [16]:
%%time
NMFADMM_model = NMFADMM(dimensions=100)
NMFADMM_model.fit(graph)
NMFADMM_embedding = NMFADMM_model.get_embedding()

NMFADMM_embedding_tensor = from_numpy(NMFADMM_embedding)
save(NMFADMM_embedding_tensor, path.join(embedding_save_path,'NMFADMM___'+'__'.join([k+'_'+str(v) for k,v in NMFADMM_model.get_params().items() if k!='seed' and k!='W'])+'.pt'))
NMFADMM_embedding_tensor.shape

CPU times: user 4min 58s, sys: 2min 8s, total: 7min 6s
Wall time: 6min 26s


torch.Size([4267, 200])

---
## 2.13 - NodeSketch

In [17]:
%%time
NS_model = NodeSketch(dimensions=100)
NS_model.fit(graph)
NS_embedding = NS_model.get_embedding()

NS_embedding_tensor = from_numpy(NS_embedding)
save(NS_embedding_tensor, path.join(embedding_save_path,'NodeSketch___'+'__'.join([k+'_'+str(v) for k,v in NS_model.get_params().items() if k!='seed'])+'.pt'))
NS_embedding_tensor.shape

CPU times: user 35min 37s, sys: 5.03 s, total: 35min 42s
Wall time: 35min 42s


torch.Size([4267, 100])

---
## 2.14 - DANMF

In [18]:
%%time
DANMF_model = DANMF(pre_iterations=200 , iterations=300 , layers=[48,24])
DANMF_model.fit(graph)
DANMF_embedding = DANMF_model.get_embedding()

DANMF_embedding_tensor = from_numpy(DANMF_embedding)
save(DANMF_embedding_tensor, path.join(embedding_save_path,'DANMF___'+'__'.join([k+'_'+str(v) for k,v in DANMF_model.get_params().items() if k!='seed'])+'.pt'))
DANMF_embedding_tensor.shape



CPU times: user 7min 3s, sys: 12.3 s, total: 7min 15s
Wall time: 6min 58s


torch.Size([4267, 48])

---
## 2.15 - NEU

In [19]:
%%time
NEU_model = NEU(T=25)
NEU_model.fit(graph , SocioDim(dimensions=100) )
NEU_embedding = NEU_model.get_embedding()
NEU_embedding_tensor = from_numpy(NEU_embedding);
save(NEU_embedding_tensor, path.join(embedding_save_path,'NEU___'+'__'.join([k+'_'+str(v) for k,v in NEU_model.get_params().items() if k!='seed'])+'.pt'))
NEU_embedding_tensor.shape

CPU times: user 21.7 s, sys: 357 ms, total: 22.1 s
Wall time: 19.6 s


torch.Size([4267, 100])

---
## 2.16 - GraphWave
*Too long to execute!*

In [20]:
# %%time
# GW_model = GraphWave(sample_number=32)
# GW_model.fit(graph)
# GW_embedding = GW_model.get_embedding()

# GW_embedding_tensor = from_numpy(GW_embedding)
# save(GW_embedding_tensor, path.join(embedding_save_path,'GraphWave___'+'__'.join([k+'_'+str(v) for k,v in GW_model.get_params().items() if k!='seed'])+'.pt'))
# GW_embedding_tensor.shape