## Import Libraries

In [1]:
from karateclub import GraphReader
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

## Read the Graph

In [2]:
reader = GraphReader("facebook")

graph = reader.get_graph()
target = reader.get_target()

In [3]:
graph.number_of_nodes()

22470

## DeepWalk Graph Embedding and ML

In [4]:
from karateclub import DeepWalk
deepwalk = DeepWalk(walk_length=100, dimensions=256)
deepwalk.fit(graph)

In [5]:
from sklearn.model_selection import train_test_split
X = deepwalk.get_embedding()
y = target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Logistic Regression

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, accuracy_score, classification_report

lr = LogisticRegression()
lr.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [7]:
y_pred = lr.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print('Accuracy: {:.4f}'.format(acc))
print(classification_report(y_test, y_pred))

Accuracy: 0.7219
              precision    recall  f1-score   support

           0       0.74      0.77      0.76      1340
           1       0.64      0.51      0.57       671
           2       0.70      0.71      0.70      1125
           3       0.75      0.79      0.77      1358

    accuracy                           0.72      4494
   macro avg       0.71      0.70      0.70      4494
weighted avg       0.72      0.72      0.72      4494



### Random Forest

In [8]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [9]:
y_pred = rf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print('Accuracy: {:.4f}'.format(acc))
print(classification_report(y_test, y_pred))

Accuracy: 0.6313
              precision    recall  f1-score   support

           0       0.60      0.78      0.68      1340
           1       0.83      0.10      0.17       671
           2       0.66      0.56      0.60      1125
           3       0.64      0.81      0.71      1358

    accuracy                           0.63      4494
   macro avg       0.68      0.56      0.54      4494
weighted avg       0.66      0.63      0.59      4494



### Shallow Neural Network

In [10]:
from sklearn.neural_network import MLPClassifier
neural_net = MLPClassifier(hidden_layer_sizes=(100, 100, 100))
neural_net.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100, 100, 100), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [11]:
y_pred = neural_net.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print('Accuracy: {:.4f}'.format(acc))
print(classification_report(y_test, y_pred))

Accuracy: 0.7971
              precision    recall  f1-score   support

           0       0.80      0.80      0.80      1340
           1       0.69      0.69      0.69       671
           2       0.81      0.80      0.81      1125
           3       0.83      0.84      0.84      1358

    accuracy                           0.80      4494
   macro avg       0.78      0.78      0.78      4494
weighted avg       0.80      0.80      0.80      4494



### Deeper Neural Network with Keras

In [12]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
from keras.callbacks import *

Using TensorFlow backend.


In [13]:
model = Sequential()
model.add(Dense(128))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(4, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor="val_loss",
    patience=2,
    verbose=2,
    mode="min")
model.add(Dense(4, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor="val_loss",
    patience=2,
    verbose=2,
    mode="min")

In [14]:
y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)

model.fit(X_train, y_train_cat, 
          validation_data=(X_test, y_test_cat),
          batch_size=32, epochs=10,
         callbacks=[early_stopping])

Train on 17976 samples, validate on 4494 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7fe1946bb850>

In [15]:
evaluation = model.evaluate(X_test, y_test_cat)
print('Accuracy: {:.4f}'.format(evaluation[1]))

Accuracy: 0.8055


## Walklets Embedding and ML

In [16]:
from karateclub import Walklets
walklet_embedder = Walklets(dimensions=128)
walklet_embedder.fit(graph)

In [17]:
X = walklet_embedder.get_embedding()
y = target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Logistic Regression

In [18]:
lr = LogisticRegression()
lr.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [19]:
y_pred = lr.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print('Accuracy: {:.4f}'.format(acc))
print(classification_report(y_test, y_pred))

Accuracy: 0.9123
              precision    recall  f1-score   support

           0       0.91      0.92      0.92      1340
           1       0.88      0.85      0.87       671
           2       0.92      0.93      0.93      1125
           3       0.92      0.92      0.92      1358

    accuracy                           0.91      4494
   macro avg       0.91      0.91      0.91      4494
weighted avg       0.91      0.91      0.91      4494



### Shallow Neural Network

In [20]:
from sklearn.neural_network import MLPClassifier
neural_net = MLPClassifier(hidden_layer_sizes=(100, 100, 100))
neural_net.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100, 100, 100), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [21]:
y_pred = neural_net.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print('Accuracy: {:.4f}'.format(acc))
print(classification_report(y_test, y_pred))

Accuracy: 0.9357
              precision    recall  f1-score   support

           0       0.93      0.94      0.94      1340
           1       0.92      0.90      0.91       671
           2       0.95      0.94      0.95      1125
           3       0.93      0.94      0.94      1358

    accuracy                           0.94      4494
   macro avg       0.93      0.93      0.93      4494
weighted avg       0.94      0.94      0.94      4494



### Deep Neural Network with Keras

In [22]:
model = Sequential()
model.add(Dense(128))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(4, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor="val_loss",
    patience=2,
    verbose=2,
    mode="min")

In [23]:
y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)

model.fit(X_train, y_train_cat, 
          validation_data=(X_test, y_test_cat),
          batch_size=32, epochs=10,
         callbacks=[early_stopping])

Train on 17976 samples, validate on 4494 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 00008: early stopping


<keras.callbacks.callbacks.History at 0x7fe143f69610>

In [24]:
evaluation = model.evaluate(X_test, y_test_cat)
print('Accuracy: {:.4f}'.format(evaluation[1]))

Accuracy: 0.9250
