In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from graphlib.structures import Graph
import graphlib.algorithms as alg
import graphlib.tools as tls
from time import time
import pandas as pd

# Загрузка данных: soc-wiki-Vote и Middlebury

In [19]:
G1 = Graph('soc-wiki-Vote')
with open('soc-wiki-Vote.txt', mode='r') as f:
    for line in f:
        u, v = line.split()
        G1.add_edge(u, v)

G2 = Graph('Middlebury')
with open('socfb-Middlebury45.txt', mode='r') as f:
    for line in f:
        u, v = line.split()
        G2.add_edge(u, v)

print(G1)        
print(G2)

Граф <soc-wiki-Vote> с 890 вершинами and 2914 ребрами
Граф <Middlebury> с 3076 вершинами and 124610 ребрами


# Генерация пар для сравнения и вычисления точности

In [20]:
data1 = tls.make_data_pairs(G1, number_of_pairs=500)
data2 = tls.make_data_pairs(G2, number_of_pairs=500)

# Отбор ландмарков
## soc-wiki-Vote

In [31]:
dictionary_with_selection_times = dict()

In [32]:
#  время для сэмплирования по наибольшой степени

start_time = time()
landmarks = alg.select_landmarks(graph=G1, method='degree', number_of_landmarks=100)
finish_time=time()

dictionary_with_selection_times['degree'] = finish_time-start_time

In [33]:
#  время для сэмплирования по closest centrality

start_time = time()
landmarks = alg.select_landmarks(graph=G1, method='centrality', number_of_landmarks=100)
finish_time=time()

dictionary_with_selection_times['centrality'] = finish_time-start_time

In [34]:
#  время для сэмплирования по наилучшему покрытию при разном числе пар M

dictionary_with_selection_times['coverage'] = dict()
for M in [100, 200, 300]:
    start_time = time()
    landmarks = alg.select_landmarks(graph=G1, method='coverage', number_of_landmarks=100, number_of_pairs=M)
    finish_time=time()

    dictionary_with_selection_times['coverage'][M] = finish_time-start_time

In [35]:
print('Для датасета soc-wiki-Vote: ')
for key in dictionary_with_selection_times:
    print(f'Стратегия отбора <{key}> требует времени: {dictionary_with_selection_times[key]}')

Для датасета soc-wiki-Vote: 
Стратегия отбора <degree> требует времени: 0.0009949207305908203
Стратегия отбора <centrality> требует времени: 1.0647609233856201
Стратегия отбора <coverage> требует времени: {100: 0.0724797248840332, 200: 0.08717155456542969, 300: 0.15799188613891602}


## Middlebury

In [48]:
dictionary_with_selection_times = dict()

In [49]:
#  время для сэмплирования по наибольшой степени

start_time = time()
landmarks = alg.select_landmarks(graph=G2, method='degree', number_of_landmarks=100)
finish_time=time()

dictionary_with_selection_times['degree'] = finish_time-start_time

In [50]:
#  время для сэмплирования по closest centrality

start_time = time()
landmarks = alg.select_landmarks(graph=G2, method='centrality', number_of_landmarks=100)
finish_time=time()

dictionary_with_selection_times['centrality'] = finish_time-start_time

In [52]:
#  время для сэмплирования по наилучшему покрытию при разном числе пар M

dictionary_with_selection_times['coverage'] = dict()
for M in [100, 200, 300]:
    start_time = time()
    landmarks = alg.select_landmarks(graph=G2, method='coverage', number_of_landmarks=100, number_of_pairs=M)
    finish_time=time()

    dictionary_with_selection_times['coverage'][M] = finish_time-start_time

In [53]:
print('Для датасета Middlebury: ')
for key in dictionary_with_selection_times:
    print(f'Стратегия отбора <{key}> требует времени: {dictionary_with_selection_times[key]}')

Для датасета Middlebury: 
Стратегия отбора <degree> требует времени: 0.002082347869873047
Стратегия отбора <centrality> требует времени: 102.06013298034668
Стратегия отбора <coverage> требует времени: {100: 0.4838860034942627, 200: 1.0908312797546387, 300: 1.4257149696350098}


# Оценка расстояний
## soc-wiki-Vote

In [38]:
results = {'Algorithm':[],
           'Selection strategy': [],
           'Number of Landmarks': [],
           'Approximation Error': [],
           't, estimation': [],
           't, building': []}

algorithms = ['basic', 'LCA']
strategies = ['random', 'degree', 'coverage']
number_of_landmarks = [40, 60, 100]
number_of_pairs_for_coverage=250

In [39]:
for algorithm in algorithms:
    for strategy in strategies:
        for n in number_of_landmarks:
            results['Algorithm'].append(algorithm)
            results['Selection strategy'].append(strategy)
            results['Number of Landmarks'].append(n) 
            approx_error, dict_with_times = tls.experiment(G1, data, algorithm, strategy, n, number_of_pairs_for_coverage, with_time=True)
            results['Approximation Error'].append(approx_error)
            results['t, building'].append(dict_with_times['building'])
            results['t, estimation'].append(dict_with_times['estimation'])

In [40]:
results = pd.DataFrame.from_dict(results)
results

Unnamed: 0,Algorithm,Selection strategy,Number of Landmarks,Approximation Error,"t, estimation","t, building"
0,basic,random,40,0.296155,0.005945,0.05165
1,basic,random,60,0.246324,0.015671,0.058134
2,basic,random,100,0.143152,0.015752,0.117907
3,basic,degree,40,0.0523,0.0,0.046862
4,basic,degree,60,0.0396,0.0,0.078119
5,basic,degree,100,0.025233,0.0,0.124984
6,basic,coverage,40,0.0411,0.0,0.046866
7,basic,coverage,60,0.048367,0.0,0.071623
8,basic,coverage,100,0.0294,0.009966,0.166111
9,LCA,random,40,0.011471,0.047644,0.250581


In [41]:
#  построение эмбеддингов для BASIC; построение деревьев для LCA
results.groupby('Algorithm').aggregate({'t, building': 'mean'})

Unnamed: 0_level_0,"t, building"
Algorithm,Unnamed: 1_level_1
LCA,0.240482
basic,0.084695


In [55]:
#  время оценивания для BASIC; время оценивания для LCA
results.groupby('Algorithm').aggregate({'t, estimation': 'mean'})

Unnamed: 0_level_0,"t, estimation"
Algorithm,Unnamed: 1_level_1
LCA,0.085892
basic,0.013705


## Middlebury

In [43]:
results = {'Algorithm':[],
           'Selection strategy': [],
           'Number of Landmarks': [],
           'Approximation Error': [],
           't, estimation': [],
           't, building': []}

algorithms = ['basic', 'LCA']
strategies = ['random', 'degree', 'coverage']
number_of_landmarks = [40, 60, 100]
number_of_pairs_for_coverage=250

In [44]:
for algorithm in algorithms:
    for strategy in strategies:
        for n in number_of_landmarks:
            results['Algorithm'].append(algorithm)
            results['Selection strategy'].append(strategy)
            results['Number of Landmarks'].append(n) 
            approx_error, dict_with_times = tls.experiment(G2, data, algorithm, strategy, n, number_of_pairs_for_coverage, with_time=True)
            results['Approximation Error'].append(approx_error)
            results['t, building'].append(dict_with_times['building'])
            results['t, estimation'].append(dict_with_times['estimation'])

In [45]:
results = pd.DataFrame.from_dict(results)
results

Unnamed: 0,Algorithm,Selection strategy,Number of Landmarks,Approximation Error,"t, estimation","t, building"
0,basic,random,40,6190.417383,0.00797,1.324379
1,basic,random,60,6190.36595,0.018556,2.096325
2,basic,random,100,6190.346855,0.01562,3.373344
3,basic,degree,40,6190.293317,0.0,1.332683
4,basic,degree,60,6190.270912,0.018502,2.024051
5,basic,degree,100,6190.247179,0.03125,3.321882
6,basic,coverage,40,6190.317488,0.015822,1.316197
7,basic,coverage,60,6190.29904,0.0,1.975692
8,basic,coverage,100,6190.275145,0.015623,3.315771
9,LCA,random,40,6190.291448,0.04687,1.716297


In [46]:
#  построение эмбеддингов для BASIC; построение деревьев для LCA
results.groupby('Algorithm').aggregate({'t, building': 'mean'})

Unnamed: 0_level_0,"t, building"
Algorithm,Unnamed: 1_level_1
LCA,2.678601
basic,2.231147


In [47]:
#  время оценивания для BASIC; время оценивания для LCA
results.groupby('Algorithm').aggregate({'t, estimation': 'mean'})

Unnamed: 0_level_0,"t, estimation"
Algorithm,Unnamed: 1_level_1
LCA,0.085892
basic,0.013705
