# **Compilación con C**

In [19]:
from IPython.display import display, HTML, Image
from datetime import date
import pandas as pd
import random
import numpy as np 
import networkx as nx
import time

In [4]:
def get_data(data_dir, date=date.today().strftime("%Y-%m-%d"), tam_data=100, price='Open'):
    """
    Obtiene datos de un directorio de una fecha dada
    param:
    data_dir: directorio de datos
    date: fecha a analizar, por default toma la fecha actual si no se especifica
    tam_data: analiza cierto tamaño del dataset de forma aleatoria, por default son 100 monedas a analizar
    price: selecciona con que precio se hará el ejercicio(High,Low,Open,Close), por default es Open
    return:
    dataframe: con datos especificados
    """
    df = pd.read_csv(data_dir)
    df_date = df[df['Date']==date]
    df_date = df_date[df_date[price] > 0]
    
    if(tam_data == None):
        df_random = df_date
    else:
        df_random = df_date.sample(n = tam_data)
    
    
    df_random = df_random.reset_index()
    data = df_random[["ticker", price]]
    data.columns = ['Símbolo', 'Precio']
    
    return data    

def exchange_rate_matrix(data):
    """
    Exchange Rate Matrix Representation
    param:
        dataframe
    return:
        dataframe
    """
    n = data.shape[0]

    max_spread_pct = 0.05 # maximum bid-ask spread in pct of bid, 0.05 for 5%

    c1 = data[['Precio']]
    aux = c1.copy()
    random.seed(10)
    for i in range(n):
        c1[i] = aux/c1[['Precio']].values[i]*(1+random.uniform(0,max_spread_pct))
    c1.drop(columns=['Precio'],inplace=True)
    for i in range(len(c1.index)):
        for j in range(len(c1.columns)):
            if i==j:
                c1.loc[i,j] = 1
    return c1

def log_transformed_rep(data):
    """
    Log-Transformed Representations
    param:
        dataframe
    return:
        dataframe
    """
    
    df_ln = np.round(-np.log(data),2)
    return df_ln

def create_grap(data):
    """
    Crea grafo a partir de los datos de cripomonedas y precio
    param:
        dataframe
    return:
        grafo
    """
    df = exchange_rate_matrix(data)
    df_ln = log_transformed_rep(df)
    
    n = df_ln.shape[0]
    
    edge = []
    # Covert to formatto use un graph
    for i in range(n):
        for j in range(n):
            if (i != j):
                edge.append([str(i), str(j), df_ln.loc[i][j]])

    G = nx.DiGraph()        
    G.add_weighted_edges_from(edge)
    
    return G

### **Función a compilar**

In [5]:
def bf_negative_cycle(graph, node_ini=None, distance_ini=np.inf):
    
    assert distance_ini>=1, f"La distancia inicial debe de ser mayor o igual a 1. El parámetro fue igual a {distance_ini}"
    
    if node_ini is None:
        n_nodes = len(graph.nodes())
    else:
        assert node_ini <= len(graph.nodes), f"El nodo definido es mayor a los del grafo. Deberia de ser menor a {len(graph.nodes)}."
        n_nodes = node_ini
            
    n = len(graph.nodes()) + 1
    # Remove nan borders inside graph
    edges = []
    for edge in graph.edges().data():
        if ~np.isnan(edge[2]['weight']):
            edges.append(edge)

    # Add a start node and add zero weighted edges to all other nodes
    for i in range(n-1):
        edges.append((n-1, i, {'weight': 0}))

    # Initialize distances of nodes and predecessors
    distance= np.ones(n) * distance_ini # Starting distances with infinite values
    distance[n_nodes] = 0  # Starting node has zero distance
    predecessors = np.ones(n) * -1  # Starting predecessors with -1 values
    
    for i in range(n):  
        x = -1
        for edge in edges:
            if distance[int(edge[0])] + edge[2]['weight'] < distance[int(edge[1])]:  
                distance[int(edge[1])] = distance[int(edge[0])] + edge[2]['weight']
                predecessors[int(edge[1])] = int(edge[0])
                x = int(edge[1])
        if x == -1:  # If relaxation is not possible, there is no negative cycle
            return None
        
    # Identify negative cycle
    for i in range(n):
        x = predecessors[int(x)]
    cycle = []
    v = x
    while True:
        cycle.append(int(v))
        if v == x and len(cycle) > 1:
            break
        v = predecessors[int(v)]
    
    return cycle.reverse()

In [6]:
df = get_data('../data/historical_data.csv', '2022-05-12')
df.head(15)

Unnamed: 0,Símbolo,Precio
0,HOOP,135.345673
1,CFC,0.002306
2,DDOS,0.190501
3,LAVAX,0.025992
4,DACXI,0.001967
5,MOR,0.000259
6,STRX,0.041119
7,PIXEL,0.001359
8,MSA,0.002315
9,CCC,0.006802


In [7]:
G = create_grap(df)

In [8]:
%timeit bf_negative_cycle(G,0)

2.77 s ± 43.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [15]:
%%bash
lscpu

Architecture:                    x86_64
CPU op-mode(s):                  32-bit, 64-bit
Byte Order:                      Little Endian
Address sizes:                   36 bits physical, 48 bits virtual
CPU(s):                          4
On-line CPU(s) list:             0-3
Thread(s) per core:              2
Core(s) per socket:              2
Socket(s):                       1
NUMA node(s):                    1
Vendor ID:                       GenuineIntel
CPU family:                      6
Model:                           58
Model name:                      Intel(R) Core(TM) i5-3210M CPU @ 2.50GHz
Stepping:                        9
CPU MHz:                         2694.743
CPU max MHz:                     3100.0000
CPU min MHz:                     1200.0000
BogoMIPS:                        4988.79
Virtualization:                  VT-x
L1d cache:                       64 KiB
L1i cache:                       64 KiB
L2 cache:                        512 KiB
L3 cache:                       

In [19]:
#%%bash
#sudo lshw -C memory

In [20]:
%%bash
uname -ar #r for kernel, a for all

Linux 5a4ea55e41da 5.13.0-41-generic #46~20.04.1-Ubuntu SMP Wed Apr 20 13:16:21 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux


# Cython

In [7]:
%%file bf_cython.pyx
import numpy as np 
def bf_negative_cycle_p(graph, node_ini=None, distance_ini=np.inf):
    
    assert distance_ini>=1, f"La distancia inicial debe de ser mayor o igual a 1. El parámetro fue igual a {distance_ini}"
    
    if node_ini is None:
        n_nodes = len(graph.nodes())
    else:
        assert node_ini <= len(graph.nodes), f"El nodo definido es mayor a los del grafo. Deberia de ser menor a {len(graph.nodes)}."
        n_nodes = node_ini
            
    n = len(graph.nodes()) + 1
    # Remove nan borders inside graph
    edges = []
    for edge in graph.edges().data():
        if ~np.isnan(edge[2]['weight']):
            edges.append(edge)

    # Add a start node and add zero weighted edges to all other nodes
    for i in range(n-1):
        edges.append((n-1, i, {'weight': 0}))

    # Initialize distances of nodes and predecessors
    distance= np.ones(n) * distance_ini # Starting distances with infinite values
    distance[n_nodes] = 0  # Starting node has zero distance
    predecessors = np.ones(n) * -1  # Starting predecessors with -1 values
    
    for i in range(n):  
        x = -1
        for edge in edges:
            if distance[int(edge[0])] + edge[2]['weight'] < distance[int(edge[1])]:                
                distance[int(edge[1])] = distance[int(edge[0])] + edge[2]['weight']
                predecessors[int(edge[1])] = int(edge[0])
                x = int(edge[1])
        if x == -1:  # If relaxation is not possible, there is no negative cycle
            return None
        
    # Identify negative cycle
    for i in range(n):
        x = predecessors[int(x)]
    cycle = []
    v = x
    while True:
        cycle.append(int(v))
        if v == x and len(cycle) > 1:
            break
        v = predecessors[int(v)]
        
    return cycle.reverse()

Overwriting bf_cython.pyx


In [8]:
%%file setup.py
from distutils.core import setup
from Cython.Build import cythonize

setup(ext_modules = cythonize("bf_cython.pyx", 
                              compiler_directives={'language_level' : 3})
     )

Overwriting setup.py


In [9]:
%%bash
python3 setup.py build_ext --inplace

Compiling bf_cython.pyx because it changed.
[1/1] Cythonizing bf_cython.pyx
running build_ext
building 'bf_cython' extension
x86_64-linux-gnu-gcc -pthread -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC -I/usr/include/python3.8 -c bf_cython.c -o build/temp.linux-x86_64-3.8/bf_cython.o
x86_64-linux-gnu-gcc -pthread -shared -Wl,-O1 -Wl,-Bsymbolic-functions -Wl,-Bsymbolic-functions -Wl,-z,relro -g -fwrapv -O2 -Wl,-Bsymbolic-functions -Wl,-z,relro -g -fwrapv -O2 -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 build/temp.linux-x86_64-3.8/bf_cython.o -o /datos/maestria_2022p/Opt2/notebooks/bf_cython.cpython-38-x86_64-linux-gnu.so


**Importando**

In [10]:
import bf_cython
start_time = time.time()
res = bf_cython.bf_negative_cycle_p(G)
end_time = time.time()

In [11]:
secs = end_time-start_time
print("Bellman Ford tomó",secs,"segundos" )

Bellman Ford tomó 1.9562475681304932 segundos


In [12]:
%load_ext Cython

In [13]:
%%cython
import numpy as np 
def bf_negative_cycle_cc(graph, node_ini=None, distance_ini=np.inf):
    
    assert distance_ini>=1, f"La distancia inicial debe de ser mayor o igual a 1. El parámetro fue igual a {distance_ini}"
    
    if node_ini is None:
        n_nodes = len(graph.nodes())
    else:
        assert node_ini <= len(graph.nodes), f"El nodo definido es mayor a los del grafo. Deberia de ser menor a {len(graph.nodes)}."
        n_nodes = node_ini
            
    n = len(graph.nodes()) + 1
    # Remove nan borders inside graph
    edges = []
    for edge in graph.edges().data():
        if ~np.isnan(edge[2]['weight']):
            edges.append(edge)

    # Add a start node and add zero weighted edges to all other nodes
    for i in range(n-1):
        edges.append((n-1, i, {'weight': 0}))

    # Initialize distances of nodes and predecessors
    distance= np.ones(n) * distance_ini # Starting distances with infinite values
    distance[n_nodes] = 0  # Starting node has zero distance
    predecessors = np.ones(n) * -1  # Starting predecessors with -1 values

    for i in range(n):  
        x = -1
        for edge in edges:
            if distance[int(edge[0])] + edge[2]['weight'] < distance[int(edge[1])]:                
                distance[int(edge[1])] = distance[int(edge[0])] + edge[2]['weight']
                predecessors[int(edge[1])] = int(edge[0])
                x = int(edge[1])
        if x == -1:  # If relaxation is not possible, there is no negative cycle
            return None
        
    # Identify negative cycle
    for i in range(n):
        x = predecessors[int(x)]
    cycle = []
    v = x
    while True:
        cycle.append(int(v))
        if v == x and len(cycle) > 1:
            break
        v = predecessors[int(v)]    
    
    return cycle.reverse()

In [15]:
start_time = time.time()
res = bf_negative_cycle_cc(G)
end_time = time.time()
secs = end_time-start_time
print("BF tomó",secs,"segundos" )

BF tomó 2.082916021347046 segundos


In [16]:
%%bash
$HOME/.local/bin/cython --force -3 --annotate bf_cython.pyx

In [18]:
display(HTML("bf_cython.html"))

In [23]:
%%file bf_cython2.pyx
import numpy as np 
def bf_negative_cycle_cc(graph, node_ini=None, distance_ini=np.inf):
    
    assert distance_ini>=1, f"La distancia inicial debe de ser mayor o igual a 1. El parámetro fue igual a {distance_ini}"
    
    n_nodes = len(graph.nodes)
    
    if node_ini is not None:
        assert node_ini <= n_nodes, f"El nodo definido es mayor a los del grafo. Deberia de ser menor a {n_nodes}."
        n_nodes = node_ini
                    
    n = n_nodes + 1
    # Remove nan borders inside graph
    edges = [edge for edge in graph.edges().data() if ~np.isnan(edge[2]['weight'])]

    # Add a start node and add zero weighted edges to all other nodes
    for i in range(n-1):
        edges.append((n-1, i, {'weight': 0}))
        
    # Initialize distances of nodes and predecessors
    # https://codingdeekshi.com/initialize-an-array-in-python/
    distance= [distance_ini ]*n
    distance[n_nodes] = 0  
    predecessors = [-1]*n 
    
    for i in range(n):  
        x = -1
        for edge in edges:
            if distance[int(edge[0])] + edge[2]['weight'] < distance[int(edge[1])]:                
                distance[int(edge[1])] = distance[int(edge[0])] + edge[2]['weight']
                predecessors[int(edge[1])] = int(edge[0])
                x = int(edge[1])
        if x == -1:  # If relaxation is not possible, there is no negative cycle
            return None
        
    # Identify negative cycle
    for i in range(n):
        x = predecessors[int(x)]
    cycle = []
    v = x
    while True:
        cycle.append(int(v))
        if v == x and len(cycle) > 1:
            break
        v = predecessors[int(v)]
    
    return cycle.reverse()

Overwriting bf_cython2.pyx


In [24]:
start_time = time.time()
res = bf_negative_cycle_cc(G)
end_time = time.time()
secs = end_time-start_time
print("BF tomó",secs,"segundos" )

BF tomó 1.96793794631958 segundos


In [25]:
%%bash
$HOME/.local/bin/cython --force -3 --annotate bf_cython2.pyx

In [26]:
display(HTML("bf_cython2.html"))

In [63]:
print(res)

[99, 98, 99]


# Cython y OpenMP

In [27]:
%%file bf_cython_openmp.pyx
from cython.parallel import prange
from libc.math cimport exp as c_exp
import numpy as np 

def bf_negative_cycle_cc(graph, node_ini=None, distance_ini=np.inf):
    
    assert distance_ini>=1, f"La distancia inicial debe de ser mayor o igual a 1. El parámetro fue igual a {distance_ini}"
    
    n_nodes = len(graph.nodes)
    
    if node_ini is not None:
        assert node_ini <= n_nodes, f"El nodo definido es mayor a los del grafo. Deberia de ser menor a {n_nodes}."
        n_nodes = node_ini
                    
    n = n_nodes + 1
    # Remove nan borders inside graph
    edges = [edge for edge in graph.edges().data() if ~np.isnan(edge[2]['weight'])]

    # Add a start node and add zero weighted edges to all other nodes
    for i in range(n-1):
        edges.append((n-1, i, {'weight': 0}))
        
    # Initialize distances of nodes and predecessors
    # https://codingdeekshi.com/initialize-an-array-in-python/
    distance= [distance_ini ]*n
    distance[n_nodes] = 0  
    predecessors = [-1]*n 
    
    for i in range(n):  
        x = -1
        for edge in edges:
            if distance[int(edge[0])] + edge[2]['weight'] < distance[int(edge[1])]:                
                distance[int(edge[1])] = distance[int(edge[0])] + edge[2]['weight']
                predecessors[int(edge[1])] = int(edge[0])
                x = int(edge[1])
        if x == -1:  # If relaxation is not possible, there is no negative cycle
            return None
        
    # Identify negative cycle
    for i in range(n):
        x = predecessors[int(x)]
    cycle = []
    v = x
    while True:
        cycle.append(int(v))
        if v == x and len(cycle) > 1:
            break
        v = predecessors[int(v)]
    
    return cycle.reverse()

Overwriting bf_cython_openmp.pyx


In [28]:
%%bash
$HOME/.local/bin/cython -3 --force bf_cython_openmp.pyx

In [29]:
%%file setup_openmp.py
from setuptools import Extension, setup
from Cython.Build import cythonize

ext_modules = [Extension("bf_cython_openmp",
                         ["bf_cython_openmp.pyx"], 
                         extra_compile_args=["-fopenmp"],
                         extra_link_args=["-fopenmp"],
                        )
              ]

setup(ext_modules = cythonize(ext_modules))

Overwriting setup_openmp.py


In [30]:
%%bash
python3 setup_openmp.py build_ext --inplace

running build_ext
building 'bf_cython_openmp' extension
x86_64-linux-gnu-gcc -pthread -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC -I/usr/include/python3.8 -c bf_cython_openmp.c -o build/temp.linux-x86_64-3.8/bf_cython_openmp.o -fopenmp
x86_64-linux-gnu-gcc -pthread -shared -Wl,-O1 -Wl,-Bsymbolic-functions -Wl,-Bsymbolic-functions -Wl,-z,relro -g -fwrapv -O2 -Wl,-Bsymbolic-functions -Wl,-z,relro -g -fwrapv -O2 -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 build/temp.linux-x86_64-3.8/bf_cython_openmp.o -o build/lib.linux-x86_64-3.8/bf_cython_openmp.cpython-38-x86_64-linux-gnu.so -fopenmp
copying build/lib.linux-x86_64-3.8/bf_cython_openmp.cpython-38-x86_64-linux-gnu.so -> 


In [31]:
import bf_cython_openmp

In [32]:
start_time = time.time()
res_openmp = bf_cython_openmp.bf_negative_cycle_cc(G)
end_time = time.time()
secs = end_time-start_time
print("BF_openmp tomó",secs,"segundos" )

BF_openmp tomó 1.1095116138458252 segundos


### **Perfilando código de exchange matrix**

In [10]:
df = get_data('../data/historical_data.csv', '2022-05-12',5)
df.head(15)

Unnamed: 0,Símbolo,Precio
0,MTRL,0.008032
1,BGTT,0.002728
2,PSY,0.012801
3,NDN,0.00149
4,FIA,0.000158


In [11]:
def exchange_rate_matrix(data):
    """
    Exchange Rate Matrix Representation
    param:
        dataframe
    return:
        dataframe
    """
    n = data.shape[0]

    max_spread_pct = 0.05 # maximum bid-ask spread in pct of bid, 0.05 for 5%

    c1 = data[['Precio']]
    aux = c1.copy()
    random.seed(10)
    for i in range(n):
        c1[i] = aux/c1[['Precio']].values[i]*(1+random.uniform(0,max_spread_pct))
    c1.drop(columns=['Precio'],inplace=True)
    for i in range(len(c1.index)):
        for j in range(len(c1.columns)):
            if i==j:
                c1.loc[i,j] = 1
    return c1

In [12]:
start_time = time.time()
d2 = exchange_rate_matrix(df)
end_time = time.time()
secs = end_time-start_time
print("exchange_rate_matrix tomó: ",secs,"segundos" )

exchange_rate_matrix tomó:  0.01960301399230957 segundos


In [13]:
def exchange_rate_matrix2(data):
    """
    Exchange Rate Matrix Representation
    param:
        dataframe
    return:
        dataframe
    """
    n = data.shape[0]

    max_spread_pct = 0.05 # maximum bid-ask spread in pct of bid, 0.05 for 5%

    c1 = data[['Precio']]
    aux = c1.copy()
    random.seed(10)
    for i in range(n):
        c1[i] = aux/c1.loc[i]['Precio']*(1+random.uniform(0,max_spread_pct))
        
    c1.drop(columns=['Precio'],inplace=True)
    
    for i in range(len(c1.index)):
        for j in range(len(c1.columns)):
            if i==j:
                c1.loc[i,j] = 1    
    return c1

In [14]:
start_time = time.time()
d2 = exchange_rate_matrix2(df)
end_time = time.time()
secs = end_time-start_time
print("exchange_rate_matrix tomó: ",secs,"segundos" )

exchange_rate_matrix tomó:  0.011817216873168945 segundos


In [15]:
%%file ex_mat.pyx
import random
def exchange_rate_matrix(data):
    """
    Exchange Rate Matrix Representation
    param:
        dataframe
    return:
        dataframe
    """
    n = data.shape[0]

    max_spread_pct = 0.05 # maximum bid-ask spread in pct of bid, 0.05 for 5%

    c1 = data[['Precio']]
    aux = c1.copy()
    random.seed(10)
    for i in range(n):
        c1[i] = aux/c1[['Precio']].values[i]*(1+random.uniform(0,max_spread_pct))
    c1.drop(columns=['Precio'],inplace=True)
    for i in range(len(c1.index)):
        for j in range(len(c1.columns)):
            if i==j:
                c1.loc[i,j] = 1
    return c1

Writing ex_mat.pyx


In [16]:
start_time = time.time()
res = exchange_rate_matrix(df)
end_time = time.time()
secs = end_time-start_time
print("exchange_rate_matrix tomó: ",secs,"segundos" )

exchange_rate_matrix tomó:  0.020116090774536133 segundos


In [17]:
%%bash
$HOME/.local/bin/cython --force -3 --annotate ex_mat.pyx

In [20]:
display(HTML("ex_mat.html"))

In [21]:
%%file ex_mat2.pyx
import random
def exchange_rate_matrix2(data):
    """
    Exchange Rate Matrix Representation
    param:
        dataframe
    return:
        dataframe
    """
    n = data.shape[0]

    max_spread_pct = 0.05 # maximum bid-ask spread in pct of bid, 0.05 for 5%

    c1 = data[['Precio']]
    aux = c1.copy()
    random.seed(10)
    for i in range(n):
        c1[i] = aux/c1.loc[i]['Precio']*(1+random.uniform(0,max_spread_pct))
        
    c1.drop(columns=['Precio'],inplace=True)
    
    for i in range(len(c1.index)):
        for j in range(len(c1.columns)):
            if i==j:
                c1.loc[i,j] = 1    
    return c1

Writing ex_mat2.pyx


In [22]:
%%bash
$HOME/.local/bin/cython --force -3 --annotate ex_mat2.pyx

In [23]:
display(HTML("ex_mat2.html"))

### **Compulando función exchange matrix**

In [28]:
%%file ex_mat_openmp.pyx
from cython.parallel import prange
from libc.math cimport exp as c_exp
import numpy as np 
import random

def exchange_rate_matrix(data):
    """
    Exchange Rate Matrix Representation
    param:
        dataframe
    return:
        dataframe
    """
    n = data.shape[0]

    max_spread_pct = 0.05 # maximum bid-ask spread in pct of bid, 0.05 for 5%

    c1 = data[['Precio']]
    aux = c1.copy()
    random.seed(10)
    for i in range(n):
        c1[i] = aux/c1.loc[i]['Precio']*(1+random.uniform(0,max_spread_pct))
        
    c1.drop(columns=['Precio'],inplace=True)
    
    for i in range(len(c1.index)):
        for j in range(len(c1.columns)):
            if i==j:
                c1.loc[i,j] = 1    
    return c1

Overwriting ex_mat_openmp.pyx


In [29]:
%%bash
$HOME/.local/bin/cython -3 --force ex_mat_openmp.pyx

In [30]:
%%file setup_openmp.py
from setuptools import Extension, setup
from Cython.Build import cythonize

ext_modules = [Extension("ex_mat_openmp",
                         ["ex_mat_openmp.pyx"], 
                         extra_compile_args=["-fopenmp"],
                         extra_link_args=["-fopenmp"],
                        )
              ]

setup(ext_modules = cythonize(ext_modules))

Overwriting setup_openmp.py


In [31]:
%%bash
python3 setup_openmp.py build_ext --inplace

running build_ext
building 'ex_mat_openmp' extension
x86_64-linux-gnu-gcc -pthread -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC -I/usr/include/python3.8 -c ex_mat_openmp.c -o build/temp.linux-x86_64-3.8/ex_mat_openmp.o -fopenmp
x86_64-linux-gnu-gcc -pthread -shared -Wl,-O1 -Wl,-Bsymbolic-functions -Wl,-Bsymbolic-functions -Wl,-z,relro -g -fwrapv -O2 -Wl,-Bsymbolic-functions -Wl,-z,relro -g -fwrapv -O2 -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 build/temp.linux-x86_64-3.8/ex_mat_openmp.o -o build/lib.linux-x86_64-3.8/ex_mat_openmp.cpython-38-x86_64-linux-gnu.so -fopenmp
copying build/lib.linux-x86_64-3.8/ex_mat_openmp.cpython-38-x86_64-linux-gnu.so -> 


In [32]:
import ex_mat_openmp

In [33]:
start_time = time.time()
d = ex_mat_openmp.exchange_rate_matrix(df)
end_time = time.time()
secs = end_time-start_time
print("exchange_rate_matrix tomó: ",secs,"segundos" )

exchange_rate_matrix tomó:  0.014462947845458984 segundos


In [34]:
%%file ex_mat_openmp2.pyx
from cython.parallel import prange
from libc.math cimport exp as c_exp
import random

def exchange_rate_matrix2(data):
    """
    Exchange Rate Matrix Representation
    param:
        dataframe
    return:
        dataframe
    """
    n = data.shape[0]

    max_spread_pct = 0.05 # maximum bid-ask spread in pct of bid, 0.05 for 5%

    c1 = data[['Precio']]
    aux = c1.copy()
    random.seed(10)
    for i in range(n):
        c1[i] = aux/c1.loc[i]['Precio']*(1+random.uniform(0,max_spread_pct))
        
    c1.drop(columns=['Precio'],inplace=True)
    
    for i in range(len(c1.index)):
        for j in range(len(c1.columns)):
            if i==j:
                c1.loc[i,j] = 1    
    return c1

Writing ex_mat_openmp2.pyx


In [35]:
%%bash
$HOME/.local/bin/cython -3 --force ex_mat_openmp2.pyx

In [36]:
%%file setup_openmp2.py
from setuptools import Extension, setup
from Cython.Build import cythonize

ext_modules = [Extension("ex_mat_openmp2",
                         ["ex_mat_openmp2.pyx"], 
                         extra_compile_args=["-fopenmp"],
                         extra_link_args=["-fopenmp"],
                        )
              ]

setup(ext_modules = cythonize(ext_modules))

Writing setup_openmp2.py


In [37]:
%%bash
python3 setup_openmp2.py build_ext --inplace

running build_ext
building 'ex_mat_openmp2' extension
x86_64-linux-gnu-gcc -pthread -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC -I/usr/include/python3.8 -c ex_mat_openmp2.c -o build/temp.linux-x86_64-3.8/ex_mat_openmp2.o -fopenmp
x86_64-linux-gnu-gcc -pthread -shared -Wl,-O1 -Wl,-Bsymbolic-functions -Wl,-Bsymbolic-functions -Wl,-z,relro -g -fwrapv -O2 -Wl,-Bsymbolic-functions -Wl,-z,relro -g -fwrapv -O2 -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 build/temp.linux-x86_64-3.8/ex_mat_openmp2.o -o build/lib.linux-x86_64-3.8/ex_mat_openmp2.cpython-38-x86_64-linux-gnu.so -fopenmp
copying build/lib.linux-x86_64-3.8/ex_mat_openmp2.cpython-38-x86_64-linux-gnu.so -> 


In [38]:
import ex_mat_openmp2

In [39]:
start_time = time.time()
d = ex_mat_openmp2.exchange_rate_matrix2(df)
end_time = time.time()
secs = end_time-start_time
print("exchange_rate_matrix tomó: ",secs,"segundos" )

exchange_rate_matrix tomó:  0.01583266258239746 segundos


## **Midiendo el tiempo completo**

### **- Sin perfilamiento**

In [49]:
def get_data(data_dir, date=date.today().strftime("%Y-%m-%d"), tam_data=100, price='Open'):
    """
    Obtiene datos de un directorio de una fecha dada
    param:
    data_dir: directorio de datos
    date: fecha a analizar, por default toma la fecha actual si no se especifica
    tam_data: analiza cierto tamaño del dataset de forma aleatoria, por default son 100 monedas a analizar
    price: selecciona con que precio se hará el ejercicio(High,Low,Open,Close), por default es Open
    return:
    dataframe: con datos especificados
    """
    df = pd.read_csv(data_dir)
    df_date = df[df['Date']==date]
    df_date = df_date[df_date[price] > 0]
    
    if(tam_data == None):
        df_random = df_date
    else:
        df_random = df_date.sample(n = tam_data)
    
    
    df_random = df_random.reset_index()
    data = df_random[["ticker", price]]
    data.columns = ['Símbolo', 'Precio']
    
    return data    

def exchange_rate_matrix(data):
    """
    Exchange Rate Matrix Representation
    param:
        dataframe
    return:
        dataframe
    """
    n = data.shape[0]

    max_spread_pct = 0.05 # maximum bid-ask spread in pct of bid, 0.05 for 5%

    c1 = data[['Precio']]
    aux = c1.copy()
    random.seed(10)
    for i in range(n):
        c1[i] = aux/c1[['Precio']].values[i]*(1+random.uniform(0,max_spread_pct))
    c1.drop(columns=['Precio'],inplace=True)
    for i in range(len(c1.index)):
        for j in range(len(c1.columns)):
            if i==j:
                c1.loc[i,j] = 1
    return c1

def log_transformed_rep(data):
    """
    Log-Transformed Representations
    param:
        dataframe
    return:
        dataframe
    """
    
    df_ln = np.round(-np.log(data),2)
    return df_ln

def create_grap(data):
    """
    Crea grafo a partir de los datos de cripomonedas y precio
    param:
        dataframe
    return:
        grafo
    """
    df = exchange_rate_matrix(data)
    df_ln = log_transformed_rep(df)
    
    n = df_ln.shape[0]
    
    edge = []
    # Covert to formatto use un graph
    for i in range(n):
        for j in range(n):
            if (i != j):
                edge.append([str(i), str(j), df_ln.loc[i][j]])

    G = nx.DiGraph()        
    G.add_weighted_edges_from(edge)
    
    return G

def bf_negative_cycle(graph, node_ini=None, distance_ini=np.inf):
    
    assert distance_ini>=1, f"La distancia inicial debe de ser mayor o igual a 1. El parámetro fue igual a {distance_ini}"
    
    if node_ini is None:
        n_nodes = len(graph.nodes())
    else:
        assert node_ini <= len(graph.nodes), f"El nodo definido es mayor a los del grafo. Deberia de ser menor a {len(graph.nodes)}."
        n_nodes = node_ini
            
    n = len(graph.nodes()) + 1
    # Remove nan borders inside graph
    edges = []
    for edge in graph.edges().data():
        if ~np.isnan(edge[2]['weight']):
            edges.append(edge)

    # Add a start node and add zero weighted edges to all other nodes
    for i in range(n-1):
        edges.append((n-1, i, {'weight': 0}))

    # Initialize distances of nodes and predecessors
    distance= np.ones(n) * distance_ini # Starting distances with infinite values
    distance[n_nodes] = 0  # Starting node has zero distance
    predecessors = np.ones(n) * -1  # Starting predecessors with -1 values
    
    for i in range(n):  
        x = -1
        for edge in edges:
            if distance[int(edge[0])] + edge[2]['weight'] < distance[int(edge[1])]:  
                distance[int(edge[1])] = distance[int(edge[0])] + edge[2]['weight']
                predecessors[int(edge[1])] = int(edge[0])
                x = int(edge[1])
        if x == -1:  # If relaxation is not possible, there is no negative cycle
            return None
        
    # Identify negative cycle
    for i in range(n):
        x = predecessors[int(x)]
    cycle = []
    v = x
    while True:
        cycle.append(int(v))
        if v == x and len(cycle) > 1:
            break
        v = predecessors[int(v)]
    
    return cycle.reverse()

In [50]:
start_time = time.time()
df = get_data('../data/historical_data.csv', '2022-05-12')
G = create_grap(df)
bf_negative_cycle(G)
end_time = time.time()
secs = end_time-start_time
print("Arbitrage Identification Cycle in Crypto Trading tomó: ",secs,"segundos")

Arbitrage Identification Cycle in Crypto Trading tomó:  4.241421222686768 segundos


### **- Con perfilamiento**

In [51]:
def exchange_rate_matrix(data):
    """
    Exchange Rate Matrix Representation
    param:
        dataframe
    return:
        dataframe
    """
    n = data.shape[0]

    max_spread_pct = 0.05 # maximum bid-ask spread in pct of bid, 0.05 for 5%

    c1 = data[['Precio']]
    aux = c1.copy()
    random.seed(10)
    for i in range(n):
        c1[i] = aux/c1.loc[i]['Precio']*(1+random.uniform(0,max_spread_pct))
        
    c1.drop(columns=['Precio'],inplace=True)
    
    for i in range(len(c1.index)):
        for j in range(len(c1.columns)):
            if i==j:
                c1.loc[i,j] = 1    
    return c1

def bf_negative_cycle_cc(graph, node_ini=None, distance_ini=np.inf):
    
    assert distance_ini>=1, f"La distancia inicial debe de ser mayor o igual a 1. El parámetro fue igual a {distance_ini}"
    
    n_nodes = len(graph.nodes)
    
    if node_ini is not None:
        assert node_ini <= n_nodes, f"El nodo definido es mayor a los del grafo. Deberia de ser menor a {n_nodes}."
        n_nodes = node_ini
                    
    n = n_nodes + 1
    # Remove nan borders inside graph
    edges = [edge for edge in graph.edges().data() if ~np.isnan(edge[2]['weight'])]

    # Add a start node and add zero weighted edges to all other nodes
    for i in range(n-1):
        edges.append((n-1, i, {'weight': 0}))
        
    # Initialize distances of nodes and predecessors
    # https://codingdeekshi.com/initialize-an-array-in-python/
    distance= [distance_ini ]*n
    distance[n_nodes] = 0  
    predecessors = [-1]*n 
    
    for i in range(n):  
        x = -1
        for edge in edges:
            if distance[int(edge[0])] + edge[2]['weight'] < distance[int(edge[1])]:                
                distance[int(edge[1])] = distance[int(edge[0])] + edge[2]['weight']
                predecessors[int(edge[1])] = int(edge[0])
                x = int(edge[1])
        if x == -1:  # If relaxation is not possible, there is no negative cycle
            return None
        
    # Identify negative cycle
    for i in range(n):
        x = predecessors[int(x)]
    cycle = []
    v = x
    while True:
        cycle.append(int(v))
        if v == x and len(cycle) > 1:
            break
        v = predecessors[int(v)]
    
    return cycle.reverse()

In [52]:
start_time = time.time()
df = get_data('../data/historical_data.csv', '2022-05-12')
G = create_grap(df)
bf_negative_cycle_cc(G)
end_time = time.time()
secs = end_time-start_time
print("Arbitrage Identification Cycle in Crypto Trading tomó: ",secs,"segundos")

Arbitrage Identification Cycle in Crypto Trading tomó:  3.4711267948150635 segundos
