In [15]:
import pandas as pd
import numpy as np
import networkx as nx
from sklearn.preprocessing import MinMaxScaler
from sklearn.manifold import TSNE
import utils

In [16]:
default_cluster_eps = 0.021
default_cluster_samples = 10

lower_cluster_eps = 0.012
lower_min_samples = 10

btc_edgelist = pd.read_csv('../struc2vec/graph/bitcoin-undirected.edgelist', header=0, sep=' ', names=['from', 'to'])
telenor_edgelist = pd.read_csv('../struc2vec/graph/telenor-undirected.edgelist', sep=' ', names=['from', 'to'])

G_btc = nx.from_pandas_edgelist(btc_edgelist, 'from', 'to')
G_telenor = nx.from_pandas_edgelist(telenor_edgelist, 'from', 'to')

In [17]:
graph = G_telenor
ids, ccs, gammas, degrees = utils.extract_graph_info(graph)
graph_df = pd.DataFrame({"Cluster Coefficient": ccs, "Gamma": gammas, "Node Degree": degrees}, index=ids)

100%|██████████| 9514/9514 [00:01<00:00, 5570.60it/s]


In [18]:
def create_projectable(emb):
    emb = emb.copy(deep=True)
    scaler = MinMaxScaler(feature_range=(0, 1))
    logs = np.log10(emb["Node Degree"].values)
    scaler.fit(logs.reshape(-1, 1))

    def do_transform(scaler):
        return lambda X: scaler.transform(np.log10(X))[0][0]
    
    emb["Node Degree"] = emb["Node Degree"].apply(do_transform(scaler))
    return emb

def save_baselines(baseline_projectable, baseline_2d, metadata, dataset):
    baseline_projectable.to_csv('emb/3d/baseline_{}_3d_projectable.csv'.format(dataset), header=False, sep='\t', index=False)
    baseline_2d.to_csv('emb/2d/baseline_{}.csv'.format(dataset), sep=' ')
    metadata.to_csv('emb/3d/baseline_{}_3d_metadata.csv'.format(dataset), sep='\t', index=False)

In [19]:
# Create projectable embedding and metadata
baseline_clusters_df = utils.enhance_with_clusterings(graph_df, eps=default_cluster_eps, min_samples=default_cluster_samples, all_cols=True)
projectable_baseline = create_projectable(graph_df)

In [20]:
# Create metadata
projectable_metadata = pd.DataFrame({'Id': graph_df.index.values, 'Cluster': baseline_clusters_df['Cluster'].values})

In [11]:
# Create 2D baseline
tsne = TSNE()
tsne_baseline = tsne.fit_transform(graph_df)

In [12]:
tsne_baseline_df = pd.DataFrame(tsne_baseline, columns=['X', 'Y'], index=baseline_clusters_df.index)

In [13]:
#utils.show_embedding_plot(tsne_baseline_df)
tsne_baseline_df

Unnamed: 0,X,Y
6,17.429445,41.266598
5,67.055405,-12.275494
10,-3.626526,68.431259
4,14.626187,45.100266
32,-26.321480,37.569317
7,9.385465,52.422543
35,8.759176,53.362858
114,-1.797700,69.554955
77,39.924179,21.393282
173,21.754457,64.176331


In [14]:
save_baselines(projectable_baseline, tsne_baseline_df, projectable_metadata, 'telenor')