In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import tensorflow as tf
import tensorflow.distributions
from tensorflow.distributions import Dirichlet, Multinomial
from scipy.stats import entropy
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d, Axes3D
np.set_printoptions(suppress=True)

import data_handler as dh
import sinkhorn as skh
import oracles as oracle
import model as md
import time
import pickle

In [2]:
np.version.version

'1.18.2'

In [3]:
net_name = 'data/EMA_net.tntp'
trips_name = 'data/EMA_trips.tntp'

handler = dh.DataHandler()
graph_data = handler.GetGraphData(net_name, columns_order = np.array([0, 1, 2, 3, 4]))
graph_data['graph_table']['Capacity'] = 2.5 * graph_data['graph_table']['Capacity']

graph_correspondences, total_od_flow = handler.GetGraphCorrespondences(trips_name)

graph_data['graph_table'].head()
n = np.max(graph_data['graph_table']['Init node'].as_matrix())

In [4]:
graph_data['graph_table']['Orig Free Flow Time'] = graph_data['graph_table']['Free Flow Time'] 

In [5]:
graph_data['graph_table'].head()

Unnamed: 0,Init node,Term node,Capacity,length,Free Flow Time,Orig Free Flow Time
0,1,3,12345.154297,16.106817,0.238965,0.238965
1,3,1,13135.321289,16.057131,0.240297,0.240297
2,1,7,18274.5625,16.058741,0.222813,0.222813
3,7,1,18471.566406,15.865263,0.218902,0.218902
4,1,9,2910.937256,17.4554,0.402046,0.402046


In [6]:
df = graph_data['graph_table']
df[df['Init node'] == 1].head()

Unnamed: 0,Init node,Term node,Capacity,length,Free Flow Time,Orig Free Flow Time
0,1,3,12345.154297,16.106817,0.238965,0.238965
2,1,7,18274.5625,16.058741,0.222813,0.222813
4,1,9,2910.937256,17.4554,0.402046,0.402046


In [7]:
correspondence_matrix = handler.from_dict_to_cor_matr(graph_correspondences, n)
T = handler.create_C(df, n, column_name='Free Flow Time')
np.savetxt('data/T.csv', T, delimiter=',')

L = np.nansum(correspondence_matrix, axis=1)
W = np.nansum(correspondence_matrix, axis=0)

people_num = np.nansum(L)

L = handler.distributor_L_W(L)
W = handler.distributor_L_W(W)

L = L / np.nansum(L)
W = W / np.nansum(W)

print('people_num: ', people_num)

people_num:  65576.37543100001


In [8]:
T[0][6]

0.22281299531459808

##### Подумаем над T
В данных по Москве время в минутах. В этих данных время по часам! ('Time: hours, distance: miles'). Поэтому альфа получается таким большим, оно масштабирует. В Москве время было в минутах. Также в T много нулей, поэтому там, где стоят nan, поставим по 100 часов, типа много затрат и поэтому никто так не ездит (сделано там, где инициализируется cost_matrix)

##### Запускаем Синхорна-Универсальный

In [16]:
for i, eps_abs in enumerate(np.logspace(1,3,1)):
    print(i, eps_abs)

0 10.0


In [None]:
multistage_er = np.inf
eps = 10**(-3)

while multistage_er > eps:
    
    num_iter = 1 #2500 # for sink
    max_iter = 1 #10000 # for univ

    alpha, beta = 1.0, 1.0
    alpha_range  = np.arange(0.0, 7500.0, 500.0)

    best_matrix = None
    min_reconstruction_error = np.inf
    best_alpha, best_beta = np.nan, np.nan
    er_list = []

    s = skh.Sinkhorn(n, L, W, people_num, num_iter, eps)
    
    for alpha_idx in range(len(alpha_range)):
        alpha = alpha_range[alpha_idx]
        cost_matrix = np.nan_to_num(T ** beta * alpha, nan=100)

        rec = s.iterate(cost_matrix)    
        er = np.linalg.norm(rec - np.nan_to_num(correspondence_matrix, nan=0.0))
        er_list.append(er)

        if er < min_reconstruction_error:
            min_reconstruction_error = er
            best_alpha = alpha_range[alpha_idx]
            best_matrix = rec
    
    best_correcpondences_dict = handler.from_cor_matrix_to_dict(best_matrix)
    print('Sinkhorn end, univ start \n')
    print('best alpha: ', best_alpha)

    model = md.Model(graph_data, best_correcpondences_dict, 
                     total_od_flow, mu = 0.25, rho = 0.15) # B, for stable D mu=0.0
    
    for i, eps_abs in enumerate(np.logspace(1,3,1)):
        
        print('eps_abs =', eps_abs)
        solver_kwargs = {'eps_abs': eps_abs,
                         'max_iter': max_iter}
        tic = time.time()
        result = model.find_equilibrium(solver_name = 'ustf', 
                                        solver_kwargs = solver_kwargs, 
                                        verbose = False)
        
    toc = time.time()
    print('Elapsed time: {:.0f} sec'.format(toc - tic))
    print('Time ratio =', np.max(result
                                        ['times'] / graph_data['graph_table']
                                        ['Free Flow Time']))

    print('Flow excess =', np.max(result
                                        ['flows'] / graph_data['graph_table']
                                        ['Capacity']) - 1, end = '\n\n')
        
    graph_data['graph_table']['Free Flow Time'] = result['times']
    T = handler.create_C(df, n, column_name='Free Flow Time')
        
    T_orig = df['Orig Free Flow Time'].as_matrix()
    T_pred = df['Free Flow Time'].as_matrix()

    multistage_er = np.linalg.norm(T_orig - T_pred)
    print('C-error: ' , multistage_er)

Sinkhorn end, univ start 

best alpha:  7000.0
eps_abs = 10.0
start!

Elapsed time: 3 sec
Time ratio = 1.0006910081020643
Flow excess = 3.827145369461123

