In [18]:
# Reformat datasets into adjacency matrices

# Facebook data from: https://snap.stanford.edu/data/ego-Facebook.html

In [19]:
import gzip
import numpy as np
import os
import requests
import tarfile


In [20]:
# Downloads combined and individual facebook networks
link_comb = "https://snap.stanford.edu/data/facebook_combined.txt.gz"
link_ind = "https://snap.stanford.edu/data/facebook.tar.gz"

if not os.path.isfile("facebook_combined.txt.gz"):
    r = requests.get(link_comb, allow_redirects=True)
    open("facebook_combined.txt.gz", "wb").write(r.content)
    print("File downloaded from", link_comb)
    
if not os.path.isfile("facebook.tar.gz"):
    r = requests.get(link_ind, allow_redirects=True)
    open("facebook.tar.gz", "wb").write(r.content)
    print("File downloaded from", link_ind)
    
if not os.path.isdir("facebook"):
    tar = tarfile.open("facebook.tar.gz", "r:gz")
    tar.extractall()
    tar.close()
    print("Files extracted for facebook.tar.gz")


In [21]:
# Combined Facebook data, 4039 nodes
matrix = np.zeros((4039, 4039), dtype=int)
with gzip.open("facebook_combined.txt.gz", "rt") as f:
    content = f.readlines()
    
    for line in content:
        friend = line.split()
        matrix[int(friend[0])][int(friend[1])] += 1
        matrix[int(friend[1])][int(friend[0])] += 1
    f.close()
    
print(matrix)


[[0 1 1 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [22]:
# Facebook data: facebook.tar.gz, contains 10 separate networks
# prints all edge files of facebook data
files = os.listdir("facebook/")
for file in files:
    if ".edges" in file:
        print(file)


0.edges
107.edges
1684.edges
1912.edges
3437.edges
348.edges
3980.edges
414.edges
686.edges
698.edges


In [24]:
# Creates adjacency matrix representation for 
# smallest graph in Facebook data
matrix2 = np.zeros((52, 52), dtype=int)
with open("facebook/3980.edges", "rt") as f:
    content = f.readlines()
    nodes = dict()
    i = 0
    for line in content:
        edge = line.split()
        if edge[0] not in nodes:
            nodes[edge[0]] = i
            i += 1
        if edge[1] not in nodes:
            nodes[edge[1]] = i
            i += 1
        matrix2[nodes[edge[0]]][nodes[edge[1]]] = 1
        matrix2[nodes[edge[1]]][nodes[edge[0]]] = 1

print(matrix2)


[[0 1 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 0 0 0]]
