In [7]:
!pip install python-binance

Collecting python-binance
  Downloading python_binance-1.0.19-py2.py3-none-any.whl.metadata (11 kB)
Collecting dateparser (from python-binance)
  Downloading dateparser-1.2.0-py2.py3-none-any.whl.metadata (28 kB)
Collecting aiohttp (from python-binance)
  Downloading aiohttp-3.9.4-cp310-cp310-win_amd64.whl.metadata (7.7 kB)
Collecting ujson (from python-binance)
  Downloading ujson-5.9.0-cp310-cp310-win_amd64.whl.metadata (8.9 kB)
Collecting websockets (from python-binance)
  Downloading websockets-12.0-cp310-cp310-win_amd64.whl.metadata (6.8 kB)
Collecting pycryptodome (from python-binance)
  Downloading pycryptodome-3.20.0-cp35-abi3-win_amd64.whl.metadata (3.4 kB)
Collecting aiosignal>=1.1.2 (from aiohttp->python-binance)
  Using cached aiosignal-1.3.1-py3-none-any.whl.metadata (4.0 kB)
Collecting attrs>=17.3.0 (from aiohttp->python-binance)
  Using cached attrs-23.2.0-py3-none-any.whl.metadata (9.5 kB)
Collecting frozenlist>=1.1.1 (from aiohttp->python-binance)
  Downloading frozenl

In [51]:
!pip install cupy-cuda12x cupyx-cuda12x



ERROR: Could not find a version that satisfies the requirement cupyx-cuda12x (from versions: none)
ERROR: No matching distribution found for cupyx-cuda12x





In [48]:
!pip install cupyx


ERROR: Could not find a version that satisfies the requirement cupyx (from versions: none)
ERROR: No matching distribution found for cupyx


In [43]:
import numba
import cupyx
numba.__version__

'0.59.1'

In [3]:
from numba import cuda
import numpy as np
import time

x = np.arange(100000000).reshape(10000, 10000)

@jit(nopython=True)
def go_fast(a): # Function is compiled and runs in machine code
    trace = 0.0
    for i in range(a.shape[0]):
        trace += np.tanh(a[i, i])
    return a + trace

# DO NOT REPORT THIS... COMPILATION TIME IS INCLUDED IN THE EXECUTION TIME!
start = time.time()
go_fast(x)
end = time.time()
print("Elapsed (with compilation) = %s" % (end - start))

# NOW THE FUNCTION IS COMPILED, RE-TIME IT EXECUTING FROM CACHE
start = time.time()
go_fast(x)
end = time.time()
print("Elapsed (after compilation) = %s" % (end - start))



x = np.arange(100000000).reshape(10000, 10000)

def go_slow(a): # Function is compiled and runs in machine code
    trace = 0.0
    for i in range(a.shape[0]):
        trace += np.tanh(a[i, i])
    return a + trace

# DO NOT REPORT THIS... COMPILATION TIME IS INCLUDED IN THE EXECUTION TIME!
start = time.time()
go_slow(x)
end = time.time()
print("Elapsed (with compilation) = %s" % (end - start))

# NOW THE FUNCTION IS COMPILED, RE-TIME IT EXECUTING FROM CACHE
start = time.time()
go_slow(x)
end = time.time()
print("Elapsed (after compilation) = %s" % (end - start))

ValueError: 
Kernel launch configuration was not specified. Use the syntax:

kernel_function[blockspergrid, threadsperblock](arg0, arg1, ..., argn)

See https://numba.readthedocs.io/en/stable/cuda/kernels.html#kernel-invocation for help.



In [14]:
import pandas as pd
import numpy as np
from datetime import datetime
import scipy as sc
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
from binance import Client
from numba import jit
import time
import cupy as cp

client = Client()

In [None]:

##################################################
# Lista de parámetros.
##################################################
# Parámetros de interpolación.
###################################
smooth_interval = 25
smooth_exp = 3
###################################
# Parámetros de estrategia.
###################################
rango = 50
std_mult = 25
z_aprox = 0.15

##################################################################################################################################
# Descarga y limpieza de datos de X días atrás a hoy.
##################################################################################################################################
recent_data = pd.DataFrame(client.get_historical_klines('BTCUSDT','1m','4 day ago UTC'))
recent_data = recent_data.iloc[:,:6]
recent_data.columns = ['Tiempo','Open','High','Low','Close','Volume']
recent_data['Tiempo'] = pd.to_datetime(recent_data.Tiempo,unit='ms')
recent_data = recent_data.set_index('Tiempo')
recent_data = recent_data.astype(float)
recent_data = recent_data.reset_index()

##################################################
# Interpolación (suavizado) del Precio (OPEN) con SciPy y obtención de derivadas con su respectiva interpolación.
##################################################

@jit(nopython=True)
def calculate_derivatives(open_prices, times):
    interpolacion = sc.signal.savgol_filter(open_prices, smooth_interval, 3)
    pd_interpolacion = np.diff(interpolacion) / times
    sd_interpolacion = np.diff(pd_interpolacion) / times
    return interpolacion, pd_interpolacion, sd_interpolacion

open_prices = recent_data['Open'].values
times = recent_data['Tiempo'].diff()/np.timedelta64(1, 's')
times = times.values

interpolacion, pd_interpolacion, sd_interpolacion = calculate_derivatives(open_prices, times)

recent_data['Interpolacion'] = interpolacion
recent_data['Primer_Derivada'] = pd_interpolacion
recent_data['Segunda_Derivada'] = sd_interpolacion

##################################################
# Encontrar puntos máximos y mínimos.
##################################################

@jit(nopython=True)
def find_extrema(data, z_aprox, std_mult, rango):
    min_max_sd = data.diff() / (data['Tiempo'].diff()/np.timedelta64(1, 's'))
    lista_min_max = min_max_sd[(min_max_sd['Segunda_Derivada'] >= -z_aprox) & (min_max_sd['Segunda_Derivada'] <= z_aprox)]
    indices_min_max = lista_min_max.index.tolist()

    max_points_idx = []
    min_points_idx = []

    for i in range(len(indices_min_max)):
        PMSD = indices_min_max[i]
        if data['Segunda_Derivada'][PMSD] > 0:
            pmax = data['SD_Interpolacion'][PMSD]
            for i in range(rango+1):
                if (data['SD_Interpolacion'][PMSD-i] > pmax) & (abs(data['SD_Interpolacion'][PMSD-i]*100) > abs(np.std(data['SD_Interpolacion'])*std_mult)):
                    pmax_idx = PMSD-i
                elif (data['SD_Interpolacion'][PMSD+i] > pmax) & (abs(data['SD_Interpolacion'][PMSD+i]*100) > abs(np.std(data['SD_Interpolacion'])*std_mult)):
                    pmax_idx = PMSD+i
            max_points_idx.append(pmax_idx)
        elif data['Segunda_Derivada'][PMSD] < 0:
            pmin = data['SD_Interpolacion'][PMSD]
            for i in range(rango+1):
                if (data['SD_Interpolacion'][PMSD-i] < pmin) & (abs(data['SD_Interpolacion'][PMSD-i]*80) > abs(np.std(data['SD_Interpolacion'])*std_mult)):
                    pmin_idx = PMSD-i
                elif (data['SD_Interpolacion'][PMSD+i] < pmin) & (abs(data['SD_Interpolacion'][PMSD+i]*80) > abs(np.std(data['SD_Interpolacion'])*std_mult)):
                    pmin_idx = PMSD+i
            min_points_idx.append(pmin_idx)

    return max_points_idx, min_points_idx

max_points_idx, min_points_idx = find_extrema(recent_data, z_aprox, std_mult, rango)

##################################################
# Graficar resultados.
##################################################

mp = recent_data.Open[initial_date_index:final_date_index].mean()

plt.figure(figsize=(15,8))
plt.plot(recent_data.Tiempo[initial_date_index:final_date_index], recent_data.Open[initial_date_index:final_date_index].multiply(1), color='y')
plt.scatter(recent_data.Tiempo.iloc[max_points_idx], recent_data.Open.iloc[max_points_idx], color='g')
plt.scatter(recent_data.Tiempo.iloc[min_points_idx], recent_data.Open.iloc[min_points_idx], color='r')
plt.grid(visible=True)

# Configuración de ejes y etiquetas.
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d-%m-%y %H:%M'))
plt.gca().xaxis.set_major_locator(mdates.MinuteLocator(interval=60))
plt.gcf().autofmt_xdate()
plt.grid(which='minor')
plt.xticks(rotation=75)
plt.axhline(mp)
plt.show()


In [11]:
import pandas as pd
recent_data = pd.read_csv('../../../../../csvs/historical/by1m/BTCUSDT')
recent_data = recent_data.iloc[:,:6]
recent_data.columns = ['Tiempo','Open','High','Low','Close','Volume']
recent_data['Tiempo'] = recent_data['Tiempo'].astype('datetime64[ns]')# pd.to_datetime(recent_data.Tiempo,unit='ms')
recent_data = recent_data.set_index('Tiempo')
recent_data = recent_data.astype(float)
recent_data = recent_data.reset_index()
recent_data

Unnamed: 0,Tiempo,Open,High,Low,Close,Volume
0,2017-08-17 04:00:00,4261.48,4261.48,4261.48,4261.48,1.775183
1,2017-08-17 04:01:00,4261.48,4261.48,4261.48,4261.48,0.000000
2,2017-08-17 04:02:00,4280.56,4280.56,4280.56,4280.56,0.261074
3,2017-08-17 04:03:00,4261.48,4261.48,4261.48,4261.48,0.012008
4,2017-08-17 04:04:00,4261.48,4261.48,4261.48,4261.48,0.140796
...,...,...,...,...,...,...
3473177,2024-03-31 02:09:00,69992.10,69992.10,69992.09,69992.10,1.172140
3473178,2024-03-31 02:10:00,69992.10,70019.39,69992.09,70019.38,7.438230
3473179,2024-03-31 02:11:00,70019.38,70085.98,70019.38,70082.00,14.504680
3473180,2024-03-31 02:12:00,70081.99,70116.24,70081.99,70113.39,18.548410


In [12]:
recent_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3473182 entries, 0 to 3473181
Data columns (total 6 columns):
 #   Column  Dtype         
---  ------  -----         
 0   Tiempo  datetime64[ns]
 1   Open    float64       
 2   High    float64       
 3   Low     float64       
 4   Close   float64       
 5   Volume  float64       
dtypes: datetime64[ns](1), float64(5)
memory usage: 159.0 MB


In [16]:
smooth_interval = 25
smooth_exp = 3
rango = 50
std_mult = 25
z_aprox = 0.15

start = time.time()
recent_data['Interpolacion'] = sc.signal.savgol_filter(recent_data.Open,smooth_interval,smooth_exp)
recent_data['Primer_Derivada'] = recent_data.Interpolacion.diff()/(recent_data.Tiempo.diff()/np.timedelta64(1, 's'))
recent_data['PD_Interpolacion'] = sc.signal.savgol_filter(recent_data.Primer_Derivada,smooth_interval,smooth_exp)
recent_data['Segunda_Derivada'] = recent_data.PD_Interpolacion.diff()/(recent_data.Tiempo.diff()/np.timedelta64(1, 's'))
recent_data['SD_Interpolacion'] = sc.signal.savgol_filter(recent_data.Segunda_Derivada,smooth_interval,smooth_exp)
recent_data['Tercer_Derivada'] = recent_data.SD_Interpolacion.diff()/(recent_data.Tiempo.diff()/np.timedelta64(1, 's'))
recent_data['TD_Interpolacion'] = sc.signal.savgol_filter(recent_data.Tercer_Derivada,smooth_interval,smooth_exp)
end = time.time()
print("DERIVATIVES = %s" % (end - start))

DERIVATIVES = 0.5010151863098145


In [21]:
##################################################
# Lista de parámetros.
##################################################
# Parámetros de interpolación.
###################################
smooth_interval = 25
smooth_exp = 3
###################################
# Parámetros de estrategia.
###################################
rango = 0
std_mult = 25
z_aprox = 0.15

#####################
# CPU
#####################
def savgol_filter(y, window_size, poly_order):
    half_window = window_size // 2
    order_range = np.arange(poly_order+1)
    weight = np.zeros((window_size, poly_order+1))
    
    for i in range(-half_window, half_window+1):
        weight[i+half_window, :] = [i**j for j in order_range]
    
    weights = np.linalg.pinv(weight).T.sum(axis=0)
    
    smoothed = np.convolve(weights, y, mode='valid')
    
    return smoothed #smoothed_padded

def calculate_derivatives(interpolacion, times, derivative):
    d_interpolacion = np.diff(interpolacion) / times[2+derivative:]
    return d_interpolacion

open_prices = recent_data['Open'].values
times = (np.diff(recent_data.Tiempo)/np.timedelta64(1, 's')).astype(float)

start = time.time()
interpolacion = savgol_filter(open_prices, smooth_interval, 3)
primer_derivada = calculate_derivatives(interpolacion, times, 1)
pd_interpolacion = savgol_filter(primer_derivada, smooth_interval, 3)
segunda_derivada = calculate_derivatives(pd_interpolacion, times, 5)
sd_interpolacion = savgol_filter(segunda_derivada, smooth_interval, 3)
tercer_derivada = calculate_derivatives(sd_interpolacion, times, 9)
td_interpolacion = savgol_filter(tercer_derivada, smooth_interval, 3)
end = time.time()
print("CPU ON DERIVATIVES = %s" % (end - start))

CPU ON DERIVATIVES = 0.05028891563415527


In [19]:
#####################
# GPU
#####################
@jit(nopython=False)
def savgol_filter(y, window_size, poly_order):
    half_window = window_size // 2
    order_range = np.arange(poly_order+1)
    weight = np.zeros((window_size, poly_order+1))
    
    for i in range(-half_window, half_window+1):
        weight[i+half_window, :] = [i**j for j in order_range]
    
    weights = np.linalg.pinv(weight).T.sum(axis=0)
    
    smoothed = np.convolve(weights, y, mode='valid')
    
    return smoothed #smoothed_padded

@jit(nopython=False)
def calculate_derivatives(interpolacion, times, derivative):
    d_interpolacion = np.diff(interpolacion) / times[2+derivative:]
    return d_interpolacion

open_prices = recent_data['Open'].values
times = (np.diff(recent_data.Tiempo)/np.timedelta64(1, 's')).astype(float)

start = time.time()
interpolacion = savgol_filter(open_prices, smooth_interval, 3)
primer_derivada = calculate_derivatives(interpolacion, times, 1)
pd_interpolacion = savgol_filter(primer_derivada, smooth_interval, 3)
segunda_derivada = calculate_derivatives(pd_interpolacion, times, 5)
sd_interpolacion = savgol_filter(segunda_derivada, smooth_interval, 3)
tercer_derivada = calculate_derivatives(sd_interpolacion, times, 9)
td_interpolacion = savgol_filter(tercer_derivada, smooth_interval, 3)
end = time.time()
print("OPTIMIZED CPU ON DERIVATIVES (compiling) = %s" % (end - start))

start = time.time()
interpolacion = savgol_filter(open_prices, smooth_interval, 3)
primer_derivada = calculate_derivatives(interpolacion, times, 1)
pd_interpolacion = savgol_filter(primer_derivada, smooth_interval, 3)
segunda_derivada = calculate_derivatives(pd_interpolacion, times, 5)
sd_interpolacion = savgol_filter(segunda_derivada, smooth_interval, 3)
tercer_derivada = calculate_derivatives(sd_interpolacion, times, 9)
td_interpolacion = savgol_filter(tercer_derivada, smooth_interval, 3)
end = time.time()
print("OPTIMIZED CPU ON DERIVATIVES (after compiling) = %s" % (end - start))

OPTIMIZED CPU ON DERIVATIVES (compiling) = 2.2700412273406982
OPTIMIZED CPU ON DERIVATIVES (after compiling) = 1.2489359378814697


In [58]:
np.diff(recent_data.Open)

array([-11.15,   7.3 ,   3.33, ..., -50.85,  -9.96, -31.24])

In [20]:
#####################
# CPU
#####################
def find_extrema(data, z_aprox, std_mult, rango):
    min_max_sd = np.diff(data.Open) / (np.diff(recent_data.Tiempo)/np.timedelta64(1, 's')).astype(float)
    lista_min_max = recent_data[1:].iloc[(min_max_sd >= -z_aprox) & (min_max_sd <= z_aprox)]
    indices_min_max = lista_min_max.index.tolist()

    max_points_idx = []
    min_points_idx = []
    pmin_idx = 0
    pmax_idx = 0

    for i in range(len(indices_min_max)):
        PMSD = indices_min_max[i]
        if data['SD_Interpolacion'][PMSD] > 0:
            pmax = data['SD_Interpolacion'][PMSD]
            for i in range(rango+1):
                if (data['SD_Interpolacion'][PMSD-i] > pmax) & (abs(data['SD_Interpolacion'][PMSD-i]*100) > abs(np.std(data['SD_Interpolacion'])*std_mult)):
                    pmax_idx = PMSD-i
                elif (data['SD_Interpolacion'][PMSD+i] > pmax) & (abs(data['SD_Interpolacion'][PMSD+i]*100) > abs(np.std(data['SD_Interpolacion'])*std_mult)):
                    pmax_idx = PMSD+i
            max_points_idx.append(pmax_idx)
        elif data['SD_Interpolacion'][PMSD] < 0:
            pmin = data['SD_Interpolacion'][PMSD]
            for i in range(1,rango+1):
                if (data['SD_Interpolacion'][PMSD-i] < pmin) & (abs(data['SD_Interpolacion'][PMSD-i]*80) > abs(np.std(data['SD_Interpolacion'])*std_mult)):
                    pmin_idx = PMSD-i
                elif (data['SD_Interpolacion'][PMSD+i] < pmin) & (abs(data['SD_Interpolacion'][PMSD+i]*80) > abs(np.std(data['SD_Interpolacion'])*std_mult)):
                    pmin_idx = PMSD+i
            min_points_idx.append(pmin_idx)

    return max_points_idx, min_points_idx

start = time.time()
max_points_idx, min_points_idx = find_extrema(recent_data, z_aprox, std_mult, rango)
end = time.time()
print("CPU ON STRATEGY = %s" % (end - start))

KeyboardInterrupt: 

In [None]:
#####################
# GPU
#####################
@jit(nopython=True)
def find_extrema(data, z_aprox, std_mult, rango):
    min_max_sd = data.diff() / (data['Tiempo'].diff().astype(int) / 1e9)
    lista_min_max = min_max_sd[(min_max_sd['Segunda_Derivada'] >= -z_aprox) & (min_max_sd['Segunda_Derivada'] <= z_aprox)]
    indices_min_max = lista_min_max.index.tolist()

    max_points_idx = []
    min_points_idx = []

    for i in range(len(indices_min_max)):
        PMSD = indices_min_max[i]
        if data['Segunda_Derivada'][PMSD] > 0:
            pmax = data['SD_Interpolacion'][PMSD]
            for i in range(rango+1):
                if (data['SD_Interpolacion'][PMSD-i] > pmax) & (abs(data['SD_Interpolacion'][PMSD-i]*100) > abs(np.std(data['SD_Interpolacion'])*std_mult)):
                    pmax_idx = PMSD-i
                elif (data['SD_Interpolacion'][PMSD+i] > pmax) & (abs(data['SD_Interpolacion'][PMSD+i]*100) > abs(np.std(data['SD_Interpolacion'])*std_mult)):
                    pmax_idx = PMSD+i
            max_points_idx.append(pmax_idx)
        elif data['Segunda_Derivada'][PMSD] < 0:
            pmin = data['SD_Interpolacion'][PMSD]
            for i in range(rango+1):
                if (data['SD_Interpolacion'][PMSD-i] < pmin) & (abs(data['SD_Interpolacion'][PMSD-i]*80) > abs(np.std(data['SD_Interpolacion'])*std_mult)):
                    pmin_idx = PMSD-i
                elif (data['SD_Interpolacion'][PMSD+i] < pmin) & (abs(data['SD_Interpolacion'][PMSD+i]*80) > abs(np.std(data['SD_Interpolacion'])*std_mult)):
                    pmin_idx = PMSD+i
            min_points_idx.append(pmin_idx)

    return max_points_idx, min_points_idx

start = time.time()
max_points_idx, min_points_idx = find_extrema(recent_data, z_aprox, std_mult, rango)
end = time.time()
print("OPTIMIZED CPU ON STRATEGY = %s" % (end - start))