In [1]:
# Reformat datasets into adjacency matrices

# Facebook data from: https://snap.stanford.edu/data/ego-Facebook.html

In [2]:
import gzip
import numpy as np
import os
import requests
import tarfile


In [3]:
# Downloads individual facebook networks
link_ind = "https://snap.stanford.edu/data/facebook.tar.gz"
    
if not os.path.isfile("facebook.tar.gz"):
    r = requests.get(link_ind, allow_redirects=True)
    open("facebook.tar.gz", "wb").write(r.content)
    print("File downloaded from", link_ind)
    
if not os.path.isdir("facebook"):
    tar = tarfile.open("facebook.tar.gz", "r:gz")
    tar.extractall()
    tar.close()
    print("Files extracted for facebook.tar.gz")


In [4]:
# Facebook graph files
# 3980.edges - graph size: (52, 52)
# 698.edges - graph size: (61, 61)
# 414.edges - graph size: (150, 150)
# 686.edges - graph size: (168, 168)
# 348.edges - graph size: (224, 224)
# 0.edges - graph size: (333, 333)
# 3437.edges - graph size: (534, 534)
# 1912.edges - graph size: (747, 747)
# 1684.edges - graph size: (786, 786)
# 107.edges - graph size: (1034, 1034)

# returns adjacency matrix for given graph file
def partial_facebook(file):
    with open(file) as f:
        content = f.readlines()
        nodes = dict()
        i = 0
        for line in content:
            edge = line.split()
            if edge[0] not in nodes:
                nodes[edge[0]] = i
                i += 1
            if edge[1] not in nodes:
                nodes[edge[1]] = i
                i += 1

        matrix = np.zeros((i, i), dtype=int)
        for line in content:
            edge = line.split()
            matrix[nodes[edge[0]]][nodes[edge[1]]] = 1
            matrix[nodes[edge[1]]][nodes[edge[0]]] = 1

        return matrix

In [5]:
# Facebook data: facebook.tar.gz, contains 10 separate networks
# prints files and size of graphs
files = os.listdir("facebook/")
for file in files:
    if ".edges" in file:
        print(file)
        print(partial_facebook("facebook/" + file).shape)


0.edges
(333, 333)
107.edges
(1034, 1034)
1684.edges
(786, 786)
1912.edges
(747, 747)
3437.edges
(534, 534)
348.edges
(224, 224)
3980.edges
(52, 52)
414.edges
(150, 150)
686.edges
(168, 168)
698.edges
(61, 61)
