## Get data

In [1]:
import pandas as pd
df = pd.read_csv ('../data/historical_data.csv')
print(df)

              Date          High           Low          Open         Close  \
0       2022-05-01  38627.859375  37585.789062  37713.265625  38469.093750   
1       2022-05-02  39074.972656  38156.562500  38472.187500  38529.328125   
2       2022-05-03  38629.996094  37585.621094  38528.109375  37750.453125   
3       2022-05-04  39902.949219  37732.058594  37748.011719  39698.371094   
4       2022-05-05  39789.281250  35856.515625  39695.746094  36575.140625   
...            ...           ...           ...           ...           ...   
128323  2022-05-09      0.011089      0.006107      0.008104      0.006159   
128324  2022-05-10      0.007430      0.006019      0.006157      0.006023   
128325  2022-05-11      0.006550      0.004862      0.006023      0.004865   
128326  2022-05-12      0.004941      0.004618      0.004865      0.004663   
128327  2022-05-13      0.009424      0.004643      0.004666      0.006798   

              Volume     Adj Close ticker  
0       2.700276e+1

In [2]:
df.Date.unique()

array(['2022-05-01', '2022-05-02', '2022-05-03', '2022-05-04',
       '2022-05-05', '2022-05-06', '2022-05-07', '2022-05-08',
       '2022-05-09', '2022-05-10', '2022-05-11', '2022-05-13',
       '2022-05-12'], dtype=object)

In [6]:
df_date = df[df['Date']=='2022-05-12']
print(df_date.shape)
df_date.head()

(9726, 8)


Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close,ticker
1193,2022-05-12,1.275409,1.053248,1.226878,1.134983,69722642.0,1.134983,GLMR
1242,2022-05-12,2.200926,1.712324,2.138495,1.842947,20852633.0,1.842947,SCRT
1338,2022-05-12,0.286789,0.206049,0.264309,0.275917,3879599.0,0.275917,CHSB
1351,2022-05-12,1.193851,0.979395,1.150548,1.036172,75239860.0,1.036172,SRM
1411,2022-05-12,0.028293,0.021622,0.026671,0.025651,37706601.0,0.025651,IOTX


In [9]:
df_random = df_date.sample(n = 100)
print(df_random.shape)
df_random.head()

(100, 8)


Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close,ticker
111690,2022-05-12,0.001502,0.001318,0.001447,0.001452,0.0,0.001452,AXIOM
97802,2022-05-12,3e-06,3e-06,3e-06,3e-06,288.0,3e-06,BMBO
78276,2022-05-12,1.670596,1.277353,1.575393,1.513997,6840.0,1.513997,ELONBANK
82103,2022-05-12,1.173182,1.055553,1.171831,1.110221,4025.0,1.110221,CE
108404,2022-05-12,0.00129,0.001177,0.001289,0.001178,37.0,0.001178,OUR


In [15]:
price='Open'
data = df_random[["ticker", price]]
data.columns = ['Símbolo', 'Precio']
data.head()

Unnamed: 0,Símbolo,Precio
111690,AXIOM,0.001447
97802,BMBO,3e-06
78276,ELONBANK,1.575393
82103,CE,1.171831
108404,OUR,0.001289


In [13]:
from datetime import date
today = date.today().strftime("%Y-%m-%d")
today

'2022-05-14'

In [None]:
#NOTA: también se podría modificar la función para pasar una lista de monedas
c = pd.DataFrame(df[df.symbol.isin(['LTC','NEO','XMR','EOS','BCH','DASH','ETC'])].groupby('symbol').price_usd.mean()).reset_index()
c


In [124]:
from datetime import date
import pandas as pd

#NOTA: también se podría modificar la función para pasar una lista de monedas
def get_data(data_dir, date=date.today().strftime("%Y-%m-%d"), tam_data=100, price='Open'):
    """
    Obtiene datos de un directorio de una fecha dada
    param:
    data_dir: directorio de datos
    date: fecha a analizar, por default toma la fecha actual si no se especifica
    tam_data: analiza cierto tamaño del dataset de forma aleatoria, por default son 100 monedas a analizar
    price: selecciona con que precio se hará el ejercicio(High,Low,Open,Close), por default es Open
    return:
    dataframe: con datos especificados
    """
    df = pd.read_csv(data_dir)
    df_date = df[df['Date']==date]
    df_date = df_date[df_date[price] > 0]
    
    if(tam_data == None):
        df_random = df_date
    else:
        df_random = df_date.sample(n = tam_data)
    
    
    df_random = df_random.reset_index()
    data = df_random[["ticker", price]]
    data.columns = ['Símbolo', 'Precio']
    
    return data 

In [171]:
#Visualización de datos
df = get_data('../data/historical_data.csv', '2022-05-12')
df.head(15)

Unnamed: 0,index,Símbolo,Precio
0,113256,SGC,0.009959
1,103574,RYIU,0.012832
2,97906,EAGON,0.000459
3,65642,SHL,7e-06
4,82933,CMP,1e-06
5,12021,KMON,0.025789
6,7565,RARI,2.976159
7,96217,EXRT,0.00131
8,60835,PSWAP,0.002131
9,66367,FOOTIE,0.000691


## Transform data

### Exchange Rate Matrix Representation

In [172]:
import random
import numpy as np 

n = df.shape[0]

max_spread_pct = 0.05 # maximum bid-ask spread in pct of bid, 0.05 for 5%

c1 = df[['Precio']]
aux = c1.copy()
random.seed(10)
for i in range(n):
    c1[i] = aux/c1[['Precio']].values[i]*(1+random.uniform(0,max_spread_pct))
c1.drop(columns=['Precio'],inplace=True)
for i in range(len(c1.index)):
    for j in range(len(c1.columns)):
        if i==j:
            c1.loc[i,j] = 1
c1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  c1[i] = aux/c1[['Precio']].values[i]*(1+random.uniform(0,max_spread_pct))


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,1.000000,0.792750,22.324315,1437.375234,10363.993513,0.402075,3.455594e-03,7.663196,4.795058,14.648646,...,0.533697,0.010277,0.015947,10168.355978,0.226480,0.013937,850.245283,590.679902,0.746518,1.867195
1,1.325295,1.000000,28.764495,1852.033195,13353.826866,0.518066,4.452473e-03,9.873896,6.178349,18.874528,...,0.687660,0.013241,0.020547,13101.751277,0.291816,0.017958,1095.526381,761.080864,0.961875,2.405848
2,0.047406,0.036537,1.000000,66.247136,477.665727,0.018531,1.592647e-04,0.353189,0.220999,0.675141,...,0.024598,0.000474,0.000735,468.648995,0.010438,0.000642,39.186924,27.223825,0.034406,0.086057
3,0.000723,0.000557,0.015691,1.000000,7.284663,0.000283,2.428874e-06,0.005386,0.003370,0.010296,...,0.000375,0.000007,0.000011,7.147152,0.000159,0.000010,0.597622,0.415178,0.000525,0.001312
4,0.000103,0.000080,0.002242,0.144329,1.000000,0.000040,3.469820e-07,0.000769,0.000481,0.001471,...,0.000054,0.000001,0.000002,1.021022,0.000023,0.000001,0.085375,0.059311,0.000075,0.000187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,75.787719,58.411865,1644.913531,105909.540898,763645.962784,29.625913,2.546171e-01,564.644186,353.312302,1079.350279,...,39.324199,0.757219,1.175016,749230.881089,16.687652,1.000000,62648.281006,43522.829451,55.005362,137.579760
96,0.001239,0.000955,0.026899,1.731951,12.487993,0.000484,4.163784e-06,0.009234,0.005778,0.017651,...,0.000643,0.000012,0.000019,12.252262,0.000273,0.000017,1.000000,0.711734,0.000900,0.002250
97,0.001756,0.001353,0.038108,2.453598,17.691323,0.000686,5.898694e-06,0.013081,0.008185,0.025005,...,0.000911,0.000018,0.000027,17.357370,0.000387,0.000024,1.451368,1.000000,0.001274,0.003187
98,1.408229,1.085364,30.564518,1967.929642,14189.482022,0.550486,4.731100e-03,10.491784,6.564977,20.055657,...,0.730692,0.014070,0.021833,13921.632059,0.310077,0.019082,1164.082180,808.707747,1.000000,2.556401


### Log-Transformed Representations

In [129]:
df_ln = np.round(-np.log(c1),2)
df_ln

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,-0.00,1.22,-1.45,0.31,0.14,-8.44,2.36,4.31,-1.49,-1.03,...,-1.21,3.62,2.06,-0.43,-1.85,-0.83,0.92,-1.35,1.80,11.14
1,-1.27,-0.00,-2.69,-0.94,-1.11,-9.69,1.11,3.07,-2.74,-2.27,...,-2.45,2.37,0.81,-1.68,-3.09,-2.07,-0.33,-2.60,0.55,9.90
2,1.39,2.64,-0.00,1.73,1.56,-7.02,3.78,5.73,-0.07,0.39,...,0.21,5.04,3.48,0.99,-0.43,0.59,2.34,0.07,3.22,12.56
3,-0.34,0.91,-1.76,-0.00,-0.18,-8.76,2.04,4.00,-1.81,-1.34,...,-1.52,3.30,1.74,-0.74,-2.16,-1.14,0.60,-1.67,1.48,10.83
4,-0.21,1.05,-1.63,0.13,-0.00,-8.62,2.18,4.14,-1.67,-1.20,...,-1.39,3.44,1.88,-0.61,-2.02,-1.01,0.74,-1.53,1.62,10.96
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.77,2.03,-0.65,1.11,0.94,-7.64,3.16,5.12,-0.69,-0.22,...,-0.41,4.42,2.86,0.37,-1.04,-0.00,1.72,-0.55,2.60,11.94
96,-0.97,0.28,-2.39,-0.64,-0.80,-9.38,1.42,3.37,-2.44,-1.97,...,-2.15,2.67,1.11,-1.37,-2.79,-1.77,-0.00,-2.30,0.86,10.20
97,1.32,2.57,-0.10,1.65,1.49,-7.09,3.71,5.66,-0.15,0.32,...,0.14,4.96,3.40,0.92,-0.50,0.52,2.26,-0.00,3.15,12.49
98,-1.85,-0.60,-3.27,-1.52,-1.68,-10.26,0.54,2.49,-3.32,-2.85,...,-3.03,1.80,0.24,-2.25,-3.67,-2.65,-0.90,-3.18,-0.00,9.32


In [130]:
a = []

In [131]:
# Covert to formatto use un graph
for i in range(n):
    for j in range(n):
        if (i != j):# or abs(df_ln.loc[i][j]) != 0.0):
            a.append([str(i), str(j), df_ln.loc[i][j]])
            
a

[['0', '1', 1.22],
 ['0', '2', -1.45],
 ['0', '3', 0.31],
 ['0', '4', 0.14],
 ['0', '5', -8.44],
 ['0', '6', 2.36],
 ['0', '7', 4.31],
 ['0', '8', -1.49],
 ['0', '9', -1.03],
 ['0', '10', -1.72],
 ['0', '11', -1.48],
 ['0', '12', 7.19],
 ['0', '13', 2.74],
 ['0', '14', 2.6],
 ['0', '15', 3.3],
 ['0', '16', 3.37],
 ['0', '17', 3.1],
 ['0', '18', -7.74],
 ['0', '19', 3.49],
 ['0', '20', -3.81],
 ['0', '21', 8.09],
 ['0', '22', 0.21],
 ['0', '23', 1.63],
 ['0', '24', 2.07],
 ['0', '25', 0.8],
 ['0', '26', 0.25],
 ['0', '27', -1.66],
 ['0', '28', -2.13],
 ['0', '29', 1.78],
 ['0', '30', 5.58],
 ['0', '31', -2.39],
 ['0', '32', 2.13],
 ['0', '33', -1.75],
 ['0', '34', -1.83],
 ['0', '35', -1.63],
 ['0', '36', -2.21],
 ['0', '37', -7.02],
 ['0', '38', 6.11],
 ['0', '39', 2.53],
 ['0', '40', -1.13],
 ['0', '41', 0.84],
 ['0', '42', 1.88],
 ['0', '43', -3.59],
 ['0', '44', -2.53],
 ['0', '45', 3.9],
 ['0', '46', 0.49],
 ['0', '47', -1.8],
 ['0', '48', -2.37],
 ['0', '49', -1.3],
 ['0', '50', 2

In [132]:
G = nx.DiGraph()        
G.add_weighted_edges_from(a)

In [133]:
import random
import numpy as np 

def exchange_rate_matrix(data):
    """
    Exchange Rate Matrix Representation
    param:
        dataframe
    return:
        dataframe
    """
    n = data.shape[0]

    max_spread_pct = 0.05 # maximum bid-ask spread in pct of bid, 0.05 for 5%

    c1 = data[['Precio']]
    aux = c1.copy()
    random.seed(10)
    for i in range(n):
        c1[i] = aux/c1[['Precio']].values[i]*(1+random.uniform(0,max_spread_pct))
    c1.drop(columns=['Precio'],inplace=True)
    for i in range(len(c1.index)):
        for j in range(len(c1.columns)):
            if i==j:
                c1.loc[i,j] = 1
    
    return c1

def log_transformed_rep(data):
    """
    Log-Transformed Representations
    param:
        dataframe
    return:
        dataframe
    """
    
    df_ln = np.round(-np.log(data),2)
    return df_ln

def create_grap(data):
    """
    Crea grafo a partir de los datos de cripomonedas y precio
    param:
        dataframe
    return:
        grafo
    """
    df = exchange_rate_matrix(data)
    df_ln = log_transformed_rep(df)
    
    n = df_ln.shape[0]
    
    edge = []
    # Covert to formatto use un graph
    for i in range(n):
        for j in range(n):
            if (i != j):
                edge.append([str(i), str(j), df_ln.loc[i][j]])

    G = nx.DiGraph()        
    G.add_weighted_edges_from(edge)
    
    return G

In [50]:
exchange_rate_matrix(df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  c1[i] = aux/c1[['Precio']].values[i]*(1+random.uniform(0,max_spread_pct))


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,1.00,0.00,0.00,inf,0.00,0.00,0.00,inf,0.05,0.02,...,0.03,0.08,0.15,0.00,0.19,0.01,2.61,0.02,0.00,7.21
1,940.77,1.00,2.25,inf,1.55,3.76,1.44,inf,43.69,17.90,...,23.32,74.59,139.35,0.04,172.80,8.59,2390.55,16.30,0.58,6596.48
2,429.83,0.47,1.00,inf,0.71,1.72,0.66,inf,19.96,8.18,...,10.66,34.08,63.67,0.02,78.95,3.92,1092.22,7.45,0.27,3013.86
3,0.00,0.00,0.00,1.0,0.00,0.00,0.00,,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
4,632.19,0.69,1.51,inf,1.00,2.52,0.97,inf,29.36,12.03,...,15.67,50.12,93.64,0.02,116.12,5.77,1606.44,10.95,0.39,4432.80
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,112.48,0.12,0.27,inf,0.19,0.45,0.17,inf,5.22,2.14,...,2.79,8.92,16.66,0.00,20.66,1.00,285.83,1.95,0.07,788.73
96,0.40,0.00,0.00,inf,0.00,0.00,0.00,inf,0.02,0.01,...,0.01,0.03,0.06,0.00,0.07,0.00,1.00,0.01,0.00,2.83
97,58.21,0.06,0.14,inf,0.10,0.23,0.09,inf,2.70,1.11,...,1.44,4.62,8.62,0.00,10.69,0.53,147.91,1.00,0.04,408.15
98,1647.15,1.79,3.94,inf,2.71,6.58,2.53,inf,76.49,31.34,...,40.83,130.59,243.99,0.06,302.55,15.04,4185.53,28.53,1.00,11549.54


In [51]:
log_transformed_rep(exchange_rate_matrix(df))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  c1[i] = aux/c1[['Precio']].values[i]*(1+random.uniform(0,max_spread_pct))
  result = func(self.values, **kwargs)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,-0.000000,-0.270027,-inf,-3.324676,2.659260,-9.620256,1.427116,-5.169063,-inf,2.525729,...,inf,-3.498022,inf,-4.927906,-4.290596,0.186330,inf,-2.653946,1.078810,-5.500074
1,0.223144,-0.000000,-inf,-3.078694,2.813411,-9.374150,1.660731,-4.922969,-inf,2.813411,...,inf,-3.251924,inf,-4.681761,-4.044454,0.430783,inf,-2.407846,1.347074,-5.253947
2,inf,inf,-0.0,inf,inf,inf,inf,inf,,inf,...,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf
3,3.218876,2.995732,-inf,-0.000000,inf,-6.305655,4.605170,-1.854734,-inf,inf,...,inf,-0.182322,inf,-1.613430,-0.974560,3.506558,inf,0.653926,4.605170,-2.184927
4,-2.698673,-2.938103,-inf,-5.995631,-0.000000,-12.291037,-1.261298,-7.839821,-inf,-0.086178,...,inf,-6.168690,3.218876,-7.598660,-6.961391,-2.487404,3.506558,-5.324521,-1.585145,-8.170845
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,-0.239017,-0.476234,-inf,-3.534854,2.407946,-9.830225,1.203973,-5.379021,-inf,2.407946,...,inf,-3.707947,inf,-5.137855,-4.500587,-0.000000,inf,-2.863914,0.867501,-5.710030
96,-6.293234,-6.532393,-inf,-9.589919,-3.634159,-15.885322,-4.855073,-11.434107,-inf,-3.679839,...,2.120264,-9.762984,-0.398776,-11.192943,-10.555675,-6.081671,-0.000000,-8.918826,-5.179478,-11.765131
97,2.659260,2.407946,-inf,-0.678034,4.605170,-6.974750,3.912023,-2.523326,-inf,4.605170,...,inf,-0.854415,inf,-2.282382,-1.644805,2.813411,inf,-0.000000,3.912023,-2.854745
98,-1.134623,-1.373716,-inf,-4.432244,1.514128,-10.727652,0.301105,-6.276437,-inf,1.469676,...,inf,-4.605270,4.605170,-6.035266,-5.398027,-0.924259,4.605170,-3.761200,-0.000000,-6.607461


In [134]:
G = create_grap(df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  c1[i] = aux/c1[['Precio']].values[i]*(1+random.uniform(0,max_spread_pct))


# Iniciando perfilamiento

**Función trabajada hasta el momento**

In [135]:
import numpy as np 
import networkx as nx

def bf_negative_cycle(graph, node_ini=None, distance_ini=np.inf):
    
    assert distance_ini>=1, f"La distancia inicial debe de ser mayor o igual a 1. El parámetro fue igual a {distance_ini}"
    
    if node_ini is None:
        n_nodes = len(graph.nodes())
    else:
        assert node_ini <= len(graph.nodes), f"El nodo definido es mayor a los del grafo. Deberia de ser menor a {len(graph.nodes)}."
        n_nodes = node_ini
            
    n = len(graph.nodes()) + 1
    # Remove nan borders inside graph
    edges = []
    for edge in graph.edges().data():
        if ~np.isnan(edge[2]['weight']):
            edges.append(edge)

    # Add a start node and add zero weighted edges to all other nodes
    for i in range(n-1):
        edges.append((n-1, i, {'weight': 0}))

    # Initialize distances of nodes and predecessors
    distance= np.ones(n) * distance_ini # Starting distances with infinite values
    distance[n_nodes] = 0  # Starting node has zero distance
    predecessors = np.ones(n) * -1  # Starting predecessors with -1 values
    
    list_val=[]

    for i in range(n):  
        x = -1
        for edge in edges:
            if distance[int(edge[0])] + edge[2]['weight'] < distance[int(edge[1])]:                
                a = [distance[int(edge[0])] + edge[2]['weight'], distance[int(edge[1])],predecessors[int(edge[1])],int(edge[1])]
                list_val.append(a)
                distance[int(edge[1])] = distance[int(edge[0])] + edge[2]['weight']
                predecessors[int(edge[1])] = int(edge[0])
                x = int(edge[1])
        if x == -1:  # If relaxation is not possible, there is no negative cycle
            return None
        
    # Identify negative cycle
    for i in range(n):
        x = predecessors[int(x)]
    cycle = []
    v = x
    while True:
        cycle.append(int(v))
        if v == x and len(cycle) > 1:
            break
        v = predecessors[int(v)]
    
    cycle.reverse() # reverse list
    #return cycle, list_val, predecessors
    return cycle

## %TIME

In [136]:
%time bf_negative_cycle(G,0)

CPU times: user 6.02 s, sys: 226 ms, total: 6.25 s
Wall time: 6.28 s


[99, 98, 99]

**Propuestas de modificaciones al código**

In [137]:
def bf_negative_cycle2(graph, node_ini=None, distance_ini=np.inf):
    
    assert distance_ini>=1, f"La distancia inicial debe de ser mayor o igual a 1. El parámetro fue igual a {distance_ini}"
    
    if node_ini is None:
        n_nodes = len(graph.nodes())
    else:
        assert node_ini <= len(graph.nodes), f"El nodo definido es mayor a los del grafo. Deberia de ser menor a {len(graph.nodes)}."
        n_nodes = node_ini
            
    n = len(graph.nodes()) + 1
    # Remove nan borders inside graph --> CAMBIO 1
    #edges = []
    #for edge in graph.edges().data():
    #    if ~np.isnan(edge[2]['weight']):
    #        edges.append(edge)
    
    n = len(graph.nodes()) + 1
    edges = [edge for edge in graph.edges().data() if ~np.isnan(edge[2]['weight'])]
    

    # Add a start node and add zero weighted edges to all other nodes
    for i in range(n-1):
        edges.append((n-1, i, {'weight': 0}))

    # Initialize distances of nodes and predecessors
    distance= np.ones(n) * distance_ini # Starting distances with infinite values
    distance[n_nodes] = 0  # Starting node has zero distance
    predecessors = np.ones(n) * -1  # Starting predecessors with -1 values
    
    list_val=[]

    for i in range(n):  
        x = -1
        for edge in edges:
            if distance[int(edge[0])] + edge[2]['weight'] < distance[int(edge[1])]:                
                a = [distance[int(edge[0])] + edge[2]['weight'], distance[int(edge[1])],predecessors[int(edge[1])],int(edge[1])]
                list_val.append(a)
                distance[int(edge[1])] = distance[int(edge[0])] + edge[2]['weight']
                predecessors[int(edge[1])] = int(edge[0])
                x = int(edge[1])
        if x == -1:  # If relaxation is not possible, there is no negative cycle
            return None
        
    # Identify negative cycle
    for i in range(n):
        x = predecessors[int(x)]
    cycle = []
    v = x
    while True:
        cycle.append(int(v))
        if v == x and len(cycle) > 1:
            break
        v = predecessors[int(v)]
    
    cycle.reverse() # reverse list
    return cycle#, list_val, predecessors

In [138]:
%time bf_negative_cycle2(G)

CPU times: user 6.34 s, sys: 211 ms, total: 6.56 s
Wall time: 6.6 s


[99, 98, 99]

In [139]:
def bf_negative_cycle3(graph, node_ini=None, distance_ini=np.inf):
    
    assert distance_ini>=1, f"La distancia inicial debe de ser mayor o igual a 1. El parámetro fue igual a {distance_ini}"
    
    if node_ini is None:
        n_nodes = len(graph.nodes())
    else:
        assert node_ini <= len(graph.nodes), f"El nodo definido es mayor a los del grafo. Deberia de ser menor a {len(graph.nodes)}."
        n_nodes = node_ini
            
    n = len(graph.nodes()) + 1
    # Remove nan borders inside graph --> CAMBIO 1
    #edges = []
    #for edge in graph.edges().data():
    #    if ~np.isnan(edge[2]['weight']):
    #        edges.append(edge)
    
    n = len(graph.nodes()) + 1
    edges = [edge for edge in graph.edges().data() if ~np.isnan(edge[2]['weight'])]
    

    # Add a start node and add zero weighted edges to all other nodes--> cambio 2
    #for i in range(n-1):
    #    edges.append((n-1, i, {'weight': 0}))
    start_node_edges = [(n-1, i, {'weight': 0}) for i in range(n-1)]
    edges = edges + start_node_edges
    
    

    # Initialize distances of nodes and predecessors
    distance= np.ones(n) * distance_ini # Starting distances with infinite values
    distance[n_nodes] = 0  # Starting node has zero distance
    predecessors = np.ones(n) * -1  # Starting predecessors with -1 values
    
    list_val=[]

    for i in range(n):  
        x = -1
        for edge in edges:
            if distance[int(edge[0])] + edge[2]['weight'] < distance[int(edge[1])]:                
                a = [distance[int(edge[0])] + edge[2]['weight'], distance[int(edge[1])],predecessors[int(edge[1])],int(edge[1])]
                list_val.append(a)
                distance[int(edge[1])] = distance[int(edge[0])] + edge[2]['weight']
                predecessors[int(edge[1])] = int(edge[0])
                x = int(edge[1])
        if x == -1:  # If relaxation is not possible, there is no negative cycle
            return None
        
    # Identify negative cycle
    for i in range(n):
        x = predecessors[int(x)]
    cycle = []
    v = x
    while True:
        cycle.append(int(v))
        if v == x and len(cycle) > 1:
            break
        v = predecessors[int(v)]
    
    cycle.reverse() # reverse list
    return cycle#, list_val, predecessors

In [140]:
%time bf_negative_cycle3(G)

CPU times: user 6.4 s, sys: 162 ms, total: 6.56 s
Wall time: 6.59 s


[99, 98, 99]

In [70]:
# The Bellman-Ford function used in this notebook
def bf_negative_cycle_orig(G):

    # Remove nan edges
    n = len(G.nodes()) + 1
    edges = [edge for edge in G.edges().data() if ~np.isnan(edge[2]['weight'])]

    # Add a starting node and add edges with zero weight to all other nodes
    start_node_edges = [(n-1, i, {'weight': 0}) for i in range(n-1)]
    edges = edges + start_node_edges

    # Initialize node distances and predecessors
    d = np.ones(n) * np.inf
    d[n - 1] = 0  # Starting node has zero distance
    p = np.ones(n) * -1

    # Relax n times
    for i in range(n):  
        x = -1
        for e in edges:
            if d[int(e[0])] + e[2]['weight'] < d[int(e[1])]:
                d[int(e[1])] = d[int(e[0])] + e[2]['weight']
                p[int(e[1])] = int(e[0])
                x = int(e[1])
        if x == -1:  # If no relaxation possible, no negative cycle
            return None
        
    # Identify negative cycle
    for i in range(n):
        x = p[int(x)]
    cycle = []
    v = x
    while True:
        cycle.append(int(v))
        if v == x and len(cycle) > 1:
            break
        v = p[int(v)]
    return list(reversed(cycle))

In [150]:
%time bf_negative_cycle(G)

CPU times: user 6 s, sys: 142 ms, total: 6.14 s
Wall time: 6.16 s


[99, 98, 99]

In [141]:
%timeit bf_negative_cycle(G)

5.54 s ± 219 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [142]:
%timeit bf_negative_cycle2(G)

5.41 s ± 209 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [143]:
%timeit bf_negative_cycle3(G)

5.46 s ± 71.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## cProfile

In [74]:
! pip install cProfile

Defaulting to user installation because normal site-packages is not writeable
[31mERROR: Could not find a version that satisfies the requirement cProfile (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for cProfile[0m[31m
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m[33m
[0m

In [76]:
import cProfile

In [151]:
cprof = cProfile.Profile()
cprof.enable()
res = bf_negative_cycle(G,0)
cprof.disable()
cprof.print_stats(sort='cumtime')

         945931 function calls (945929 primitive calls) in 6.905 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        2    0.000    0.000    6.905    3.453 interactiveshell.py:3416(run_code)
        2    0.000    0.000    6.905    3.453 {built-in method builtins.exec}
        1    0.152    0.152    6.905    6.905 3341308294.py:3(<module>)
        1    6.592    6.592    6.753    6.753 4255061534.py:4(bf_negative_cycle)
   925950    0.144    0.000    0.144    0.000 {method 'append' of 'list' objects}
     9901    0.011    0.000    0.017    0.000 reportviews.py:726(<genexpr>)
     9900    0.006    0.000    0.006    0.000 reportviews.py:712(<lambda>)
        2    0.000    0.000    0.000    0.000 codeop.py:142(__call__)
        2    0.000    0.000    0.000    0.000 {built-in method builtins.compile}
        2    0.000    0.000    0.000    0.000 numeric.py:149(ones)
      101    0.000    0.000    0.000    0.000 {method 'item

## pstats

In [78]:
! pip install pstats

Defaulting to user installation because normal site-packages is not writeable
[31mERROR: Could not find a version that satisfies the requirement pstats (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for pstats[0m[31m
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m[33m
[0m

In [79]:
cprof.dump_stats("BF_stats")
import pstats

In [152]:
p_bf_stats = pstats.Stats("BF_stats")
print(p_bf_stats.sort_stats("cumulative").print_stats(10))

Sat May 14 12:31:50 2022    BF_stats

         495652 function calls (495650 primitive calls) in 4.066 seconds

   Ordered by: cumulative time
   List reduced from 67 to 10 due to restriction <10>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        2    0.000    0.000    4.066    2.033 /home/myuser/.local/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3416(run_code)
        2    0.000    0.000    4.066    2.033 {built-in method builtins.exec}
        1    0.080    0.080    4.066    4.066 /tmp/ipykernel_78/3341308294.py:3(<module>)
        1    3.897    3.897    3.986    3.986 /tmp/ipykernel_78/4255061534.py:4(bf_negative_cycle)
   475639    0.071    0.000    0.071    0.000 {method 'append' of 'list' objects}
     9901    0.012    0.000    0.018    0.000 /home/myuser/.local/lib/python3.8/site-packages/networkx/classes/reportviews.py:726(<genexpr>)
     9900    0.006    0.000    0.006    0.000 /home/myuser/.local/lib/python3.8/site-packages/netw

In [153]:
print(p_bf_stats.sort_stats("cumulative").print_stats("lambda|listcomp|math"))

Sat May 14 12:31:50 2022    BF_stats

         495652 function calls (495650 primitive calls) in 4.066 seconds

   Ordered by: cumulative time
   List reduced from 67 to 2 due to restriction <'lambda|listcomp|math'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     9900    0.006    0.000    0.006    0.000 /home/myuser/.local/lib/python3.8/site-packages/networkx/classes/reportviews.py:712(<lambda>)
        2    0.000    0.000    0.000    0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}


<pstats.Stats object at 0x7f942291bf10>


In [154]:
print(p_bf_stats.strip_dirs().sort_stats("cumulative").print_stats("lambda|listcomp|math"))

Sat May 14 12:31:50 2022    BF_stats

         495652 function calls (495650 primitive calls) in 4.066 seconds

   Ordered by: cumulative time
   List reduced from 67 to 2 due to restriction <'lambda|listcomp|math'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     9900    0.006    0.000    0.006    0.000 reportviews.py:712(<lambda>)
        2    0.000    0.000    0.000    0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}


<pstats.Stats object at 0x7f942291bf10>


In [155]:
print(p_bf_stats.prim_calls)

495650


In [156]:
p_bf_stats.strip_dirs().sort_stats("cumulative").print_callers()

   Ordered by: cumulative time

Function                                                                 was called by...
                                                                             ncalls  tottime  cumtime
interactiveshell.py:3416(run_code)                                       <- 
{built-in method builtins.exec}                                          <-       2    0.000    4.066  interactiveshell.py:3416(run_code)
3341308294.py:3(<module>)                                                <-       1    0.080    4.066  {built-in method builtins.exec}
4255061534.py:4(bf_negative_cycle)                                       <-       1    3.897    3.986  3341308294.py:3(<module>)
{method 'append' of 'list' objects}                                      <-  475639    0.071    0.071  4255061534.py:4(bf_negative_cycle)
reportviews.py:726(<genexpr>)                                            <-    9901    0.012    0.018  4255061534.py:4(bf_negative_cycle)
reportviews.py:712(<l

<pstats.Stats at 0x7f942291bf10>

In [157]:
p_bf_stats.strip_dirs().sort_stats("cumulative").print_callees()

   Ordered by: cumulative time

Function                                                                 called...
                                                                             ncalls  tottime  cumtime
interactiveshell.py:3416(run_code)                                       ->       2    0.000    0.000  hooks.py:103(__call__)
                                                                                  2    0.000    0.000  interactiveshell.py:1286(user_global_ns)
                                                                                  2    0.000    0.000  ipstruct.py:125(__getattr__)
                                                                                  2    0.000    4.066  {built-in method builtins.exec}
{built-in method builtins.exec}                                          ->       1    0.080    4.066  3341308294.py:3(<module>)
                                                                                  1    0.000    0.000  3341308294.py:

<pstats.Stats at 0x7f942291bf10>

In [158]:
p_bf_stats.strip_dirs().sort_stats("cumulative").print_callers(10)

   Ordered by: cumulative time
   List reduced from 67 to 10 due to restriction <10>

Function                             was called by...
                                         ncalls  tottime  cumtime
interactiveshell.py:3416(run_code)   <- 
{built-in method builtins.exec}      <-       2    0.000    4.066  interactiveshell.py:3416(run_code)
3341308294.py:3(<module>)            <-       1    0.080    4.066  {built-in method builtins.exec}
4255061534.py:4(bf_negative_cycle)   <-       1    3.897    3.986  3341308294.py:3(<module>)
{method 'append' of 'list' objects}  <-  475639    0.071    0.071  4255061534.py:4(bf_negative_cycle)
reportviews.py:726(<genexpr>)        <-    9901    0.012    0.018  4255061534.py:4(bf_negative_cycle)
reportviews.py:712(<lambda>)         <-    9900    0.006    0.006  reportviews.py:726(<genexpr>)
codeop.py:142(__call__)              <- 




<pstats.Stats at 0x7f942291bf10>

In [159]:
p_bf_stats.strip_dirs().sort_stats("cumulative").print_callees("BF|lambda")

   Ordered by: cumulative time
   List reduced from 67 to 1 due to restriction <'BF|lambda'>

Function                      called...
                                  ncalls  tottime  cumtime
reportviews.py:712(<lambda>)  -> 




<pstats.Stats at 0x7f942291bf10>

## line_profiler

In [92]:
! pip install line_profiler

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m[33m
[0m

In [93]:
import line_profiler

In [160]:
line_prof = line_profiler.LineProfiler()
print(line_prof(bf_negative_cycle)(G))

[99, 98, 99]


In [161]:
print(line_prof.print_stats())

Timer unit: 1e-06 s

Total time: 16.0331 s
File: /tmp/ipykernel_78/4255061534.py
Function: bf_negative_cycle at line 4

Line #      Hits         Time  Per Hit   % Time  Line Contents
     4                                           def bf_negative_cycle(graph, node_ini=None, distance_ini=np.inf):
     5                                               
     6         1          4.0      4.0      0.0      assert distance_ini>=1, f"La distancia inicial debe de ser mayor o igual a 1. El parámetro fue igual a {distance_ini}"
     7                                               
     8         1          1.0      1.0      0.0      if node_ini is None:
     9         1         21.0     21.0      0.0          n_nodes = len(graph.nodes())
    10                                               else:
    11                                                   assert node_ini <= len(graph.nodes), f"El nodo definido es mayor a los del grafo. Deberia de ser menor a {len(graph.nodes)}."
    12              

In [96]:
line_prof = line_profiler.LineProfiler()
print(line_prof(bf_negative_cycle2)(G))
print(line_prof.print_stats())

  if distance[int(edge[0])] + edge[2]['weight'] < distance[int(edge[1])]:


[95, 98, 95]
Timer unit: 1e-06 s

Total time: 9.70621 s
File: /tmp/ipykernel_78/3046895781.py
Function: bf_negative_cycle2 at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def bf_negative_cycle2(graph, node_ini=None, distance_ini=np.inf):
     2                                               
     3         1          4.0      4.0      0.0      assert distance_ini>=1, f"La distancia inicial debe de ser mayor o igual a 1. El parámetro fue igual a {distance_ini}"
     4                                               
     5         1          2.0      2.0      0.0      if node_ini is None:
     6         1         22.0     22.0      0.0          n_nodes = len(graph.nodes())
     7                                               else:
     8                                                   assert node_ini <= len(graph.nodes), f"El nodo definido es mayor a los del grafo. Deberia de ser menor a {len(graph.nodes)}."
     

In [162]:
line_prof = line_profiler.LineProfiler()
print(line_prof(bf_negative_cycle3)(G))
print(line_prof.print_stats())

[99, 98, 99]
Timer unit: 1e-06 s

Total time: 19.9554 s
File: /tmp/ipykernel_78/3000221302.py
Function: bf_negative_cycle3 at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def bf_negative_cycle3(graph, node_ini=None, distance_ini=np.inf):
     2                                               
     3         1          4.0      4.0      0.0      assert distance_ini>=1, f"La distancia inicial debe de ser mayor o igual a 1. El parámetro fue igual a {distance_ini}"
     4                                               
     5         1          3.0      3.0      0.0      if node_ini is None:
     6         1         21.0     21.0      0.0          n_nodes = len(graph.nodes())
     7                                               else:
     8                                                   assert node_ini <= len(graph.nodes), f"El nodo definido es mayor a los del grafo. Deberia de ser menor a {len(graph.nodes)}."
     

## memory_profiler

In [98]:
from memory_profiler import memory_usage

In [163]:
t = (bf_negative_cycle, (G,1,100000))
print(memory_usage(t, max_usage=True))

550.546875


In [164]:
start_mem = memory_usage(max_usage=True)
res = memory_usage(t, max_usage=True, retval=True)
print('start mem', start_mem)
print('max mem', res[0])
print('used mem', res[0]-start_mem)
print('fun output', res[1])

start mem 388.453125
max mem 548.80078125
used mem 160.34765625
fun output [99, 98, 99]


In [165]:
%load_ext memory_profiler

The memory_profiler extension is already loaded. To reload it, use:
  %reload_ext memory_profiler


In [166]:
%memit #how much RAM this process is consuming

peak memory: 388.67 MiB, increment: 0.00 MiB


In [167]:
%memit -c bf_negative_cycle(G)

peak memory: 907.56 MiB, increment: 518.89 MiB


In [168]:
%%file BF_memory_profiler.py

import math

from pytest import approx
from scipy.integrate import quad
from memory_profiler import profile
import numpy as np 
import networkx as nx


@profile #decorate the functions you want to profile with memory_profiler 
def bf_negative_cycle(graph, node_ini=None, distance_ini=np.inf):
    
    assert distance_ini>=1, f"La distancia inicial debe de ser mayor o igual a 1. El parámetro fue igual a {distance_ini}"
    
    if node_ini is None:
        n_nodes = len(graph.nodes())
    else:
        assert node_ini <= len(graph.nodes), f"El nodo definido es mayor a los del grafo. Deberia de ser menor a {len(graph.nodes)}."
        n_nodes = node_ini
            
    n = len(graph.nodes()) + 1
    # Remove nan borders inside graph
    edges = []
    for edge in graph.edges().data():
        if ~np.isnan(edge[2]['weight']):
            edges.append(edge)

    # Add a start node and add zero weighted edges to all other nodes
    for i in range(n-1):
        edges.append((n-1, i, {'weight': 0}))

    # Initialize distances of nodes and predecessors
    distance= np.ones(n) * distance_ini # Starting distances with infinite values
    distance[n_nodes] = 0  # Starting node has zero distance
    predecessors = np.ones(n) * -1  # Starting predecessors with -1 values
    
    list_val=[]

    for i in range(n):  
        x = -1
        for edge in edges:
            if distance[int(edge[0])] + edge[2]['weight'] < distance[int(edge[1])]:                
                a = [distance[int(edge[0])] + edge[2]['weight'], distance[int(edge[1])],predecessors[int(edge[1])],int(edge[1])]
                list_val.append(a)
                distance[int(edge[1])] = distance[int(edge[0])] + edge[2]['weight']
                predecessors[int(edge[1])] = int(edge[0])
                x = int(edge[1])
        if x == -1:  # If relaxation is not possible, there is no negative cycle
            return None
        
    # Identify negative cycle
    for i in range(n):
        x = predecessors[int(x)]
    cycle = []
    v = x
    while True:
        cycle.append(int(v))
        if v == x and len(cycle) > 1:
            break
        v = predecessors[int(v)]
    
    cycle.reverse() # reverse list
    return cycle#, list_val, predecessors

if __name__ == "__main__":
    edges3 = [["0","1", 5],
         ["1","2", 20],
         ["1","5", 30],
         ["1","6", 60],
         ["2","3", 10],
         ["2","4", 75],
         ["3","2", -15],
         ["4","9", 100],
         ["5","4", 25],
         ["5","6", 5],
         ["5","8", 50],
         ["6","7", -50],
         ["7","8", -10]]
        
    G3 = nx.DiGraph()        
    G3.add_weighted_edges_from(edges3)
    res,x,y = bf_negative_cycle(G3)
    print("aproximación: {:0.6e}".format(res))
    print(res == approx(obj))
    

Overwriting BF_memory_profiler.py


In [110]:
import math
edges3 = [["0","1", 5],
         ["1","2", 20],
         ["1","5", 30],
         ["1","6", 60],
         ["2","3", 10],
         ["2","4", 75],
         ["3","2", -15],
         ["4","9", 100],
         ["5","4", 25],
         ["5","6", 5],
         ["5","8", 50],
         ["6","7", -50],
         ["7","8", -10]]
        
G3 = nx.DiGraph()        
G3.add_weighted_edges_from(edges3)

In [117]:
from BF_memory_profiler import bf_negative_cycle

In [122]:
%mprun -f bf_negative_cycle bf_negative_cycle(G)

  if distance[int(edge[0])] + edge[2]['weight'] < distance[int(edge[1])]:


Filename: /datos/maestria_2022p/Opt2/notebooks/BF_memory_profiler.py

Line #    Mem usage    Increment  Occurrences   Line Contents
    11    379.6 MiB    379.6 MiB           1   @profile #decorate the functions you want to profile with memory_profiler 
    12                                         def bf_negative_cycle(graph, node_ini=None, distance_ini=np.inf):
    13                                             
    14    379.6 MiB      0.0 MiB           1       assert distance_ini>=1, f"La distancia inicial debe de ser mayor o igual a 1. El parámetro fue igual a {distance_ini}"
    15                                             
    16    379.6 MiB      0.0 MiB           1       if node_ini is None:
    17    379.6 MiB      0.0 MiB           1           n_nodes = len(graph.nodes())
    18                                             else:
    19                                                 assert node_ini <= len(graph.nodes), f"El nodo definido es mayor a los del grafo. Deberia d

Filename: /home/myuser/.local/lib/python3.8/site-packages/memory_profiler.py

Line #    Mem usage    Increment  Occurrences   Line Contents
  1183    379.6 MiB    379.6 MiB           1               @wraps(wrapped=func)
  1184                                                     def wrapper(*args, **kwargs):
  1185    379.6 MiB      0.0 MiB           1                   prof = get_prof()
  1186    381.0 MiB      1.4 MiB           1                   val = prof(func)(*args, **kwargs)
  1187    381.0 MiB      0.0 MiB           1                   show_results_bound(prof)
  1188    381.0 MiB      0.0 MiB           1                   return val

In [123]:
%%bash
python3 BF_memory_profiler.py

Filename: BF_memory_profiler.py

Line #    Mem usage    Increment  Occurrences   Line Contents
    11     85.6 MiB     85.6 MiB           1   @profile #decorate the functions you want to profile with memory_profiler 
    12                                         def bf_negative_cycle(graph, node_ini=None, distance_ini=np.inf):
    13                                             
    14     85.6 MiB      0.0 MiB           1       assert distance_ini>=1, f"La distancia inicial debe de ser mayor o igual a 1. El parámetro fue igual a {distance_ini}"
    15                                             
    16     85.6 MiB      0.0 MiB           1       if node_ini is None:
    17     85.6 MiB      0.0 MiB           1           n_nodes = len(graph.nodes())
    18                                             else:
    19                                                 assert node_ini <= len(graph.nodes), f"El nodo definido es mayor a los del grafo. Deberia de ser menor a {len(graph.nodes)}."
  

Traceback (most recent call last):
  File "BF_memory_profiler.py", line 85, in <module>
    print(res == approx(obj))
NameError: name 'obj' is not defined


CalledProcessError: Command 'b'python3 BF_memory_profiler.py\n'' returned non-zero exit status 1.

Heapy

In [None]:
# Para porciones de código