## Import Modules

In [46]:
import pandas as pd
import numpy as np
import urllib.request
import os
from progressbar import ProgressBar
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib import colors as mcolors
import PIL.ImageOps    
from PIL import Image
import itertools
import math
import sys

result_path = './results/'

## GET result directroies

In [2]:
files = [f for f in os.listdir('./results') if not os.path.isfile(f)]

## Test for one

In [3]:
target = result_path+files[0]+'/'

In [4]:
target

'./results/simple_AE_100d_16/'

## Read CSV

In [9]:
edges = pd.read_csv(target+'edge.csv')
nodes = pd.read_csv(target+'node.csv')

In [27]:
edges.head()

Unnamed: 0,source,target,weight
0,246420,12180,0.129951
1,246420,311560,0.201284
2,246420,319510,0.349951
3,246420,691630,0.1531
4,246420,300380,0.040233


In [28]:
nodes.head()

Unnamed: 0,id,group
0,246420,23
1,12180,56
2,311560,28
3,319510,0
4,691630,1


### Rename columns

In [19]:
edges.rename(columns={'Source':'source',
                      'Target':'target',
                      'Weight':'weight'},
             inplace=True)
nodes.rename(columns={'Id':'id', 'modularity_class':'group'},
             inplace=True)

### Reindex nodes

In [108]:
nodes.set_index('id',inplace=True)

In [109]:
len(nodes)

259

### Drop unuse columns

In [26]:
edges.drop(columns=['Type', 'Id', 'Label', 'timeset'], inplace=True)
nodes.drop(columns=['Label', 'timeset', 'pageranks'], inplace=True)

## Constract Graph

In [40]:
def constract_graph(source, target, weight):
    if not len(source)==len(target)==len(weight):
        raise Exception('Source, target and weight not match.')
    G = nx.Graph()
    for i in range(len(source)):
        G.add_weighted_edges_from(source[i], target[i], weight=weight[i])
    return G        

In [43]:
G = constract_graph(list(edges['source']), list(edges['target']), list(edges['weight']))

In [None]:
plt.figure(figsize=(17, 15))
pos = nx.spring_layout(G)
nx.draw(G, pos, with_labels=True)

## Read Parameters

In [50]:
constrains = pd.read_csv(target+'parameter.csv')

In [59]:
w_threhold = 0
d_threhold = 0
try:
    w_threhold = constrains.loc[0]['weight']
except:
    print('no weight constrain')
try:
    d_threhold = constrains.loc[0]['degree']
except:
    print('no degree constrain')

no degree constrain


In [60]:
w_threhold

0.027000000000000003

In [61]:
d_threhold

0

## Remove nodes and edges by constrains

In [62]:
nx.number_of_nodes(G)

259

In [63]:
nx.number_of_edges(G)

31811

In [78]:
pbar = ProgressBar()
remove_edges = list()
G_weights = nx.get_edge_attributes(G, 'weight')
for edge in pbar(nx.edges(G)):
    if G_weights[edge] > w_threhold:
        remove_edges.append(edge)

100% |########################################################################|


In [80]:
len(remove_edges)

30843

In [81]:
G.remove_edges_from(remove_edges)

In [82]:
nx.number_of_nodes(G)

259

In [83]:
nx.number_of_edges(G)

968

## Save the processed data

In [90]:
nx.write_weighted_edgelist(G, 'tempt.csv', comments='#', delimiter=',')

In [103]:
new_edges = pd.read_csv('tempt.csv', header=None).rename(columns={0:'source',1:'targre',2:'weight'})

In [119]:
new_nodes = pd.DataFrame(list(nx.nodes(G))).rename(columns={0:'id'}).set_index('id')

In [123]:
new_nodes = new_nodes.join(nodes)

In [125]:
new_edges.to_csv(target+'pro_edges.csv', index=False)
new_nodes.to_csv(target+'pro_nodes.csv')