# Parse dataset into dict

In [90]:
def parse_txt(filename, oriented=True):
    '''
    Via sets, but it is not JSON serializable
    '''
    data = {}
    with open(filename) as file:
        
        line = file.readline()
        while line:
            
            # skip comments
            if line[0] == '#':
                line = file.readline()
                continue
            
            parent, child = line.split()
            parent = int(parent)
            child = int(child)
            
            # rows in data file can be duplicated
            if parent in data:
                data[parent]['linked'].add(child)
                data[parent]['degree'] += 1
            else:
                data[parent] = {
                    'linked': {child},
                    'degree': 1,
                    'marked': False
                }
            
            # add reversed link on parent vertice for not oriented graph
            if not oriented:
                if child in data:
                    data[child]['linked'].add(parent)
                    data[child]['degree'] += 1
                else:
                    data[child] = {
                        'linked': {parent},
                        'degree': 1,
                        'marked': False
                    }

            line = file.readline()
            
    return data


def parse(filename, oriented=True):
    if filename.split('.')[-1] == 'txt':
        return parse_txt(filename, oriented)

In [31]:
def parse_txt(filename, oriented=True):
    '''
    Via lists
    '''
    data = {}
    with open(filename) as file:
        
        line = file.readline()
        while line:
            
            # skip comments
            if line[0] == '#':
                line = file.readline()
                continue
            
            parent, child = line.split()
            parent = int(parent)
            child = int(child)
            
            # rows in data file can be duplicated
            if parent in data:
                if child not in data[parent]['linked']:
                    data[parent]['linked'].append(child)
                    data[parent]['degree'] += 1
            else:
                data[parent] = { 
                    'linked': [child],
                    'distances': {},
                    'degree': 1,
                    'marked': False
                }
                
            if not oriented:
                if child in data:
                    if parent not in data[child]['linked']:
                        data[child]['linked'].append(parent)
                        data[child]['degree'] += 1

                else:    
                    data[child] = {
                        'linked': [parent],
                        'distances': {},
                        'degree': 1,
                        'marked': False
                    }

            line = file.readline()
            
    return data
def parse(filename, oriented=True):
    if filename.split('.')[-1] == 'txt':
        return parse_txt(filename, oriented)

In [42]:
data = parse('test.txt', oriented=False)

In [45]:
data_sorted = sorted(data.items(), key=lambda x: x[1]['degree'], reverse=True)
graph_size = len(data_sorted)

percent_of_landmarks = 10
number_of_landmarks = int(graph_size * (percent_of_landmarks / 100))

landmarks = [i[0] for i in data_sorted[:number_of_landmarks]]

print(graph_size)
print(number_of_landmarks)
print(landmarks)
# print(data_sorted)

438
43
[106274, 94138, 84424, 127393, 60471, 89308, 63225, 32432, 61571, 73543, 104802, 21937, 49934, 64124, 89994, 109016, 113138, 10976, 23986, 28516, 36907, 38921, 46847, 47856, 64054, 69582, 94235, 39902, 57772, 110210, 121399, 26713, 1086, 5866, 5959, 6358, 6540, 7026, 8381, 8833, 12203, 13219, 14651]


2

In [17]:
print(data[50641]['linked'])
print(data[26325]['linked'])

[57507, 26325, 49676]
[57507, 49676, 50641]


In [24]:
from collections import deque 

def count_distance(vertice, data):
    '''
    counts distances form given vertice to all other in connectivity component if that vertice
    based on BFS
    vertice: index of source vertice
    data: dict with information about graph
    '''
    current_distance = 0
    d0 = deque()
    d1 = deque()
    
    d0.append(vertice)
    data[vertice]['marked'] = True
    
    while True:
        
        if not d0 and not d1:
            break
        
        if current_distance % 2 == 0:
            
            v = d0.pop()

            data[v]['distances'][vertice] = current_distance
                
            for i in data[v]['linked']:
                if not data[i]['marked']:
                    d1.append(i)
                    data[i]['marked'] = True
            
            if not d0:
                current_distance += 1
                
        else:
            
            v = d1.pop()

            data[v]['distances'][vertice] = current_distance
                
            for i in data[v]['linked']:
                if not data[i]['marked']:
                    d0.append(i)
                    data[i]['marked'] = True
            
            if not d1:
                current_distance += 1

        
    for key, value in data.items():
        value['marked'] = False

    
def count_distances(data):
    for key, value in data.items():
        count_distance(key, data)
        
def bfs(source, stock, data):
    '''
    Count distance from source to stock.
    '''
    current_distance = 0
    d0 = deque()
    d1 = deque()
    
    d0.append(source)
    data[source]['marked'] = True
    
    while True:
        
        if not d0 and not d1:
            current_distance = -1
            break
        
        if current_distance % 2 == 0:
            
            v = d0.pop()
            
            if v == stock:
                break
                
            for i in data[v]['linked']:                
                if not data[i]['marked']:
                    d1.append(i)
                    data[i]['marked'] = True
            
            if not d0:
                current_distance += 1
                
        else:
            
            v = d1.pop()
            
            if v == stock:
                break
                
            for i in data[v]['linked']:
                if not data[i]['marked']:
                    d0.append(i)
                    data[i]['marked'] = True
            
            if not d1:
                current_distance += 1 
        
    for key, value in data.items():
        value['marked'] = False
        
    return current_distance

In [29]:
d = {
    1: {
        'linked': [2,3,4,10],
        'distances':{},
        'marked': False
    },
    2: {
        'linked': [1,5,6,7],
        'distances':{},
        'marked': False
    },
    3: {
        'linked': [1,8],
        'distances':{},
        'marked': False
    },
    4: {
        'linked': [1,9,10],
        'distances':{},
        'marked': False
    },
    5: {
        'linked': [2],
        'distances':{},
        'marked': False
    },
    6: {
        'linked': [2],
        'distances':{},
        'marked': False
    },
    7: {
        'linked': [2],
        'distances':{},
        'marked': False
    },
    8: {
        'linked': [3],
        'distances':{},
        'marked': False
    },
    9: {
        'linked': [4],
        'distances':{},
        'marked': False
    },
    10: {
        'linked': [4,1],
        'distances':{},
        'marked': False
    }
}

print(bfs(1, 3, d))

0
