In [1]:
import os
import warnings
warnings.simplefilter('ignore')
import numpy as np
import pandas as pd
from pathlib import Path
from glob import glob
from tqdm.auto import tqdm
import joblib

import re
import cv2
import pickle
from scipy.stats import skew, kurtosis
import networkx as nx
from networkx.generators.ego import ego_graph


from collections import deque, defaultdict


def get_netsimile(path):
    with open(path, "rb") as f:
        link = pickle.load(f)
    features = []
    G = nx.Graph()
    for u, e in enumerate(link):
        for v in e:
            G.add_edge(u, v)

    # degree
    ids = [i for i, p in enumerate(link) if p]
    degrees = {i: nx.degree(G)[i] for i in ids}

    # clustering_coefficient
    clustering = nx.clustering(G)

    # neighbor(one-hop)
    neighbor_degrees = {}
    for u, e in enumerate(link):
        if not e: continue
        d = []
        for v in e:
            d.append(degrees[v])
        neighbor_degrees[u] = sum(d) / len(d)

    # clustering_neighbor(one-hop)
    neighbor_clustering = {}
    for u, e in enumerate(link):
        if not e: continue
        d = []
        for v in e:
            d.append(clustering[v])
        neighbor_clustering[u] = sum(d) / len(d)

    ego_in_degree = []
    ego_out_degree = []
    ego_neighbor_nodes = []

    for u, e in enumerate(link):
        if not e: continue
        ego1 = ego_graph(G, u, radius=1)
        ego2 = ego_graph(G, u, radius=2)

        # edges_in_egonet(one-hop)
        ego_in_degree.append(ego1.number_of_edges())

        # edges_outgoing_from_egonet(one-hop)
        ego_out_degree.append(sum(dict(ego1.degree()).values()) - ego1.number_of_edges())

        # neighbor_of_egonet(one-hop)
        ego_neighbor_nodes.append(ego2.number_of_nodes() - ego1.number_of_nodes())


    output = pd.DataFrame({
        'degree': degrees.values(),
        'clustering_coefficient': clustering.values(),
        'neighbor(one-hop)': neighbor_degrees.values(),
        'clustering_neighbor(one-hop)': neighbor_clustering.values(),
        'edges_in_egonet(one-hop)': ego_in_degree,
        'edges_outgoing_from_egonet(one-hop)': ego_out_degree,
        'neighbor_of_egonet(one-hop)': ego_neighbor_nodes
    })

    for col in output.columns:
        values = output[col].values
        features.append(np.mean(values))
        features.append(np.std(values))
        features.append(skew(values))
        features.append(kurtosis(values))

    return [path, features]

In [2]:
columns = [
    'degree',
    'clustering_coefficient',
    'neighbor(one-hop)',
    'clustering_neighbor(one-hop)',
    'edges_in_egonet(one-hop)',
    'edges_outgoing_from_egonet(one-hop)', 
    'neighbor_of_egonet(one-hop)'
]

statistics = [
    '_average',
    '_standard_deviation', 
    '_skewness', 
    '_kurtosis',
]

BASE_DIR = '../'
NETWORK_DIR = os.path.join(BASE_DIR, 'data/processed/network-non-treated-dataset')
link_paths = glob(os.path.join(NETWORK_DIR, 'node-link/*'))

feature_cols = []
for col in columns:
    for sta in statistics:
        feature_cols.append(col + sta)
        
feature_results = joblib.Parallel(n_jobs=-1)(
    joblib.delayed(get_netsimile)(path) for path in tqdm(link_paths)
)

paths = [l[0].split('/')[-1][:-4] for l in feature_results]
features = [l[1] for l in feature_results]

feature_df = pd.concat([
    pd.DataFrame({'path': paths}),
    pd.DataFrame(features, columns=feature_cols)
], axis=1)
feature_df.to_csv(os.path.join(NETWORK_DIR,  'netsimile_features.csv'), index=False)

  0%|          | 0/1 [00:00<?, ?it/s]

In [3]:
display(feature_df)

Unnamed: 0,path,degree_average,degree_standard_deviation,degree_skewness,degree_kurtosis,clustering_coefficient_average,clustering_coefficient_standard_deviation,clustering_coefficient_skewness,clustering_coefficient_kurtosis,neighbor(one-hop)_average,...,edges_in_egonet(one-hop)_skewness,edges_in_egonet(one-hop)_kurtosis,edges_outgoing_from_egonet(one-hop)_average,edges_outgoing_from_egonet(one-hop)_standard_deviation,edges_outgoing_from_egonet(one-hop)_skewness,edges_outgoing_from_egonet(one-hop)_kurtosis,neighbor_of_egonet(one-hop)_average,neighbor_of_egonet(one-hop)_standard_deviation,neighbor_of_egonet(one-hop)_skewness,neighbor_of_egonet(one-hop)_kurtosis
0,Zelkova_serrata_0,2.508331,0.933899,-0.69121,-0.793665,0.007724,0.051841,7.72542,73.453735,2.889092,...,-0.521707,-0.563398,2.534139,0.968582,-0.521707,-0.563398,4.551672,1.954041,-0.062643,-1.127
