Each directory should contain three files:
- `attribute.txt`
- `graph_ic.inf`
- `graph.txt`

The file `attribute.txt` has a format like this:
```
n=500
m=1689
```
where `n` is the number of nodes and `m` is the number of edges

The file `graph_ic.inf` has the following format:
```
0 1 0.2
0 2 0.16666666666666666
1 0 0.25
...
...
498 493 0.125
499 498 0.25
499 495 0.125
```
where the first and second numbers are the node ids, and the third number is the probability of the directed edge. 

The file `graph.txt` is the same as `graph_ic.inf`, except that the third column is dropped:
```
0 1
0 2
1 0
...
...
498 493 0.125
499 498 0.25
499 495 0.125
```

In [9]:
import os
import pickle
import shutil
import json
from os.path import join as ojoin

In [10]:
data_dir = os.path.abspath(ojoin('..','..','..','code','fairim','networks'))

In [11]:
def get_attribute_values(network):
    data_fname = ojoin(data_dir, '%s.pickle'%network)
    with open(data_fname, 'rb') as f:
        data = pickle.load(f)    

    attribute_values = dict()
    for node_id, attributes in data.nodes(data=True):
        for k, v in attributes.items():
            if not k in attribute_values:
                attribute_values[k] = set()
            attribute_values[k].add(v)
            
    return attribute_values

In [12]:
attribute_values = get_attribute_values('graph_spa_500_0')
for i in range(24):
    network = 'graph_spa_500_%d'%i
    av = get_attribute_values(network)
    assert av == attribute_values

for k, v in attribute_values.items():
    attribute_values[k] = list(v)

print(attribute_values)

{'region': ['northwest_palmdale', 'desert_view_highlands', 'northeast_antelope_valley', 'palmdale', 'southeast_antelope_valley', 'littlerock', 'northwest_antelope_valley', 'acton', 'lancaster', 'lake_los_angeles', 'sun_village', 'leona_valley', 'quartz_hill'], 'ethnicity': ['latino', 'black', 'other', 'white', 'asian'], 'age': ['30-39', '50-59', '25-29', '65+', '18-24', '60-64', '40-49'], 'gender': ['male', 'female'], 'status': ['overweight', 'obese', 'normal']}


In [13]:
with open('attribute_values.json', 'w') as f:
    json.dump(attribute_values, f)

In [14]:
def mkdir_fresh(dir_path):
    if os.path.isdir(dir_path):
        shutil.rmtree(dir_path)
    os.mkdir(dir_path)
    
# create one directory for each network
rootdir = os.path.abspath(ojoin('..', 'datasets'))
for network_id in range(24):
    network_name = 'graph_spa_500_%d'%network_id
    network_dir = ojoin(rootdir, network_name)
    mkdir_fresh(network_dir)

    # create one directory for each attribute
    for attribute in attribute_values:
        attribute_dir = ojoin(network_dir, attribute)
        mkdir_fresh(attribute_dir)
        
        # create one directory for each value
        for value in attribute_values[attribute]:
            value_dir = ojoin(attribute_dir, value)
            mkdir_fresh(value_dir)
            keep_file = ojoin(value_dir, '.keep')
            open(keep_file, 'a').close()