In [2]:
import csv

def parse_txt(filename, oriented=True):
    """
    Parse data from txt file into dict python type.
    JSON serializable.
    """
    data = {}
    with open(filename) as file:
        
        line = file.readline()
        while line:
            
            # skip comments
            if line[0] == '#':
                line = file.readline()
                continue
            
            parent, child = line.split()
            parent = int(parent)
            child = int(child)
            
            # rows in data file can be duplicated
            if parent in data:
                if child not in data[parent]['linked']:
                    data[parent]['linked'].append(child)
                    data[parent]['degree'] += 1
            else:
                data[parent] = { 
                    'linked': [child],
                    'distances': {},
                    'degree': 1,
                    'centrality': 0,
                    'marked': False,
                    'active': True
                }
                
            if oriented:
                if child not in data:
                    data[child] = { 
                    'linked': [],
                    'distances': {},
                    'degree': 1,
                    'centrality': 0,
                    'marked': False,
                    'active': True
                }
                
            else:
                if child in data:
                    if parent not in data[child]['linked']:
                        data[child]['linked'].append(parent)
                        data[child]['degree'] += 1

                else:    
                    data[child] = {
                        'linked': [parent],
                        'distances': {},
                        'degree': 1,
                        'centrality': 0,
                        'marked': False,
                        'active': True
                    }

            line = file.readline()

    return data

def parse_csv(filename, oriented=True):
    data = {}
    
    with open(filename) as file:
        reader = csv.reader(file)
        next(reader)
        
        for row in reader:
            
            parent = int(row[0])
            child = int(row[1])
            
            if parent in data:
                if child not in data[parent]['linked']:
                    data[parent]['linked'].append(child)
                    data[parent]['degree'] += 1
            else:
                data[parent] = { 
                    'linked': [child],
                    'distances': {},
                    'degree': 1,
                    'centrality': 0,
                    'marked': False,
                    'active': True
                }
                
            if oriented:
                if child not in data:
                    data[child] = { 
                    'linked': [],
                    'distances': {},
                    'degree': 1,
                    'centrality': 0,
                    'marked': False,
                    'active': True
                }
                
            else:
                if child in data:
                    if parent not in data[child]['linked']:
                        data[child]['linked'].append(parent)
                        data[child]['degree'] += 1

                else:    
                    data[child] = {
                        'linked': [parent],
                        'distances': {},
                        'degree': 1,
                        'centrality': 0,
                        'marked': False,
                        'active': True
                    }
                    
    return data

def parse(filename, oriented=True):
    if filename.split('.')[-1] == 'txt':
        return parse_txt(filename, oriented)
    elif filename.split('.')[-1] == 'csv':
        return parse_csv(filename, oriented)

In [3]:
FILENAME = 'test-vk.csv'
ORIENTED = False
LANDMARKS_PERCENT = 10
TEST_VERTICES_PERCENT = 10

In [4]:
data = parse(FILENAME, ORIENTED)
print(data)

{3248374: {'linked': [11431799], 'distances': {}, 'degree': 1, 'centrality': 0, 'marked': False, 'active': True}, 11431799: {'linked': [3248374], 'distances': {}, 'degree': 1, 'centrality': 0, 'marked': False, 'active': True}, 3429130: {'linked': [4795235], 'distances': {}, 'degree': 1, 'centrality': 0, 'marked': False, 'active': True}, 4795235: {'linked': [3429130], 'distances': {}, 'degree': 1, 'centrality': 0, 'marked': False, 'active': True}, 105512: {'linked': [8910840], 'distances': {}, 'degree': 1, 'centrality': 0, 'marked': False, 'active': True}, 8910840: {'linked': [105512], 'distances': {}, 'degree': 1, 'centrality': 0, 'marked': False, 'active': True}, 7900466: {'linked': [9739979], 'distances': {}, 'degree': 1, 'centrality': 0, 'marked': False, 'active': True}, 9739979: {'linked': [7900466], 'distances': {}, 'degree': 1, 'centrality': 0, 'marked': False, 'active': True}, 7296485: {'linked': [9838285], 'distances': {}, 'degree': 1, 'centrality': 0, 'marked': False, 'active'