In [2]:
import pandas as pd
import numpy as np
from datetime import datetime
import scipy as sc
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
from binance import Client
from numba import jit
import time
import cupy as cp

client = Client()

recent_data = pd.read_csv('../../../../csvs/historical/by1m/BTCUSDT')
recent_data = recent_data.iloc[:,:6]
recent_data.columns = ['Tiempo','Open','High','Low','Close','Volume']
recent_data['Tiempo'] = recent_data['Tiempo'].astype('datetime64[ns]')# pd.to_datetime(recent_data.Tiempo,unit='ms')
recent_data = recent_data.set_index('Tiempo')
recent_data = recent_data.astype(float)
recent_data = recent_data.reset_index()

##################################################
# Lista de parámetros.
##################################################
# Parámetros de interpolación.
###################################
smooth_interval = 25
smooth_exp = 3
###################################
# Parámetros de estrategia.
###################################
rango = 0
std_mult = 25
z_aprox = 0.15

In [4]:
#####################
# CPU
#####################
def savgol_filter(y, window_size, poly_order):
    half_window = window_size // 2
    order_range = np.arange(poly_order+1)
    weight = np.zeros((window_size, poly_order+1))
    
    for i in range(-half_window, half_window+1):
        weight[i+half_window, :] = [i**j for j in order_range]
    
    weights = np.linalg.pinv(weight).T.sum(axis=0)
    
    smoothed = np.convolve(weights, y, mode='valid')
    
    return smoothed #smoothed_padded

def calculate_derivatives(interpolacion, times, derivative):
    d_interpolacion = np.diff(interpolacion) / times[2+derivative:]
    return d_interpolacion


open_prices = recent_data['Open'].values
times = (np.diff(recent_data.Tiempo)/np.timedelta64(1, 's')).astype(float)

start = time.time()
interpolacion = savgol_filter(open_prices, smooth_interval, 3)
primer_derivada = calculate_derivatives(interpolacion, times, 1)
pd_interpolacion = savgol_filter(primer_derivada, smooth_interval, 3)
segunda_derivada = calculate_derivatives(pd_interpolacion, times, 5)
sd_interpolacion = savgol_filter(segunda_derivada, smooth_interval, 3)
tercer_derivada = calculate_derivatives(sd_interpolacion, times, 9)
td_interpolacion = savgol_filter(tercer_derivada, smooth_interval, 3)
recent_data['Primer_Derivada'] = np.concatenate([np.zeros(3),interpolacion])
recent_data['Primer_Derivada'] = np.concatenate([np.zeros(4),primer_derivada])
recent_data['PD_Interpolacion'] = np.concatenate([np.zeros(7),pd_interpolacion])
recent_data['Segunda_Derivada'] = np.concatenate([np.zeros(8),segunda_derivada])
recent_data['SD_Interpolacion'] = np.concatenate([np.zeros(11),sd_interpolacion])
recent_data['Tercer_Derivada'] = np.concatenate([np.zeros(12),tercer_derivada])
recent_data['TD_Interpolacion'] = np.concatenate([np.zeros(15),td_interpolacion])
end = time.time()
print("CPU ON DERIVATIVES = %s" % (end - start))

CPU ON DERIVATIVES = 0.361248254776001


In [5]:
td_interpolacion

AttributeError: 'numpy.ndarray' object has no attribute 'index'

In [3]:
start = time.time()
recent_data['Interpolacion'] = sc.signal.savgol_filter(recent_data.Open,smooth_interval,smooth_exp)
recent_data['Primer_Derivada'] = recent_data.Interpolacion.diff()/(recent_data.Tiempo.diff()/np.timedelta64(1, 's'))
recent_data['PD_Interpolacion'] = sc.signal.savgol_filter(recent_data.Primer_Derivada,smooth_interval,smooth_exp)
recent_data['Segunda_Derivada'] = recent_data.PD_Interpolacion.diff()/(recent_data.Tiempo.diff()/np.timedelta64(1, 's'))
recent_data['SD_Interpolacion'] = sc.signal.savgol_filter(recent_data.Segunda_Derivada,smooth_interval,smooth_exp)
recent_data['Tercer_Derivada'] = recent_data.SD_Interpolacion.diff()/(recent_data.Tiempo.diff()/np.timedelta64(1, 's'))
recent_data['TD_Interpolacion'] = sc.signal.savgol_filter(recent_data.Tercer_Derivada,smooth_interval,smooth_exp)
end = time.time()
print("DERIVATIVES = %s" % (end - start))

DERIVATIVES = 0.6396634578704834


In [15]:
recent_data['SD_Interpolacion']

0          0.000000
1          0.000000
2          0.000000
3          0.000000
4          0.000000
             ...   
3473177    0.003414
3473178   -0.001167
3473179    0.007578
3473180    0.009814
3473181   -0.008669
Name: SD_Interpolacion, Length: 3473182, dtype: float64

In [5]:
sdi = data['SD_Interpolacion']

@jit(nopython=True)
def find_extrema(data, z_aprox, std_mult, rango):
    min_max_sd = np.diff(data.Open) / (np.diff(recent_data.Tiempo)/np.timedelta64(1, 's')).astype(float)
    lista_min_max = recent_data[1:].iloc[(min_max_sd >= -z_aprox) & (min_max_sd <= z_aprox)]
    indices_min_max = lista_min_max.index.tolist()

    max_points_idx = []
    min_points_idx = []
    pmin_idx = 0
    pmax_idx = 0

    for i in range(len(indices_min_max)):
        PMSD = indices_min_max[i]
        if sdi[PMSD] > 0:
            pmax = sdi[PMSD]
            for i in range(rango+1):
                if (data['SD_Interpolacion'][PMSD-i] > pmax) & (abs(data['SD_Interpolacion'][PMSD-i]*100) > abs(np.std(data['SD_Interpolacion'])*std_mult)):
                    pmax_idx = PMSD-i
                elif (data['SD_Interpolacion'][PMSD+i] > pmax) & (abs(data['SD_Interpolacion'][PMSD+i]*100) > abs(np.std(data['SD_Interpolacion'])*std_mult)):
                    pmax_idx = PMSD+i
            max_points_idx.append(pmax_idx)
        elif data['SD_Interpolacion'][PMSD] < 0:
            pmin = data['SD_Interpolacion'][PMSD]
            for i in range(1,rango+1):
                if (data['SD_Interpolacion'][PMSD-i] < pmin) & (abs(data['SD_Interpolacion'][PMSD-i]*80) > abs(np.std(data['SD_Interpolacion'])*std_mult)):
                    pmin_idx = PMSD-i
                elif (data['SD_Interpolacion'][PMSD+i] < pmin) & (abs(data['SD_Interpolacion'][PMSD+i]*80) > abs(np.std(data['SD_Interpolacion'])*std_mult)):
                    pmin_idx = PMSD+i
            min_points_idx.append(pmin_idx)

    return max_points_idx, min_points_idx


start = time.time()
max_points_idx, min_points_idx = find_extrema(recent_data, z_aprox, std_mult, rango)
end = time.time()
print("OPTIMIZED CPU ON STRATEGY = %s" % (end - start))

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1mUntyped global name 'recent_data':[0m [1m[1mCannot determine Numba type of <class 'pandas.core.frame.DataFrame'>[0m
[1m
File "..\..\..\..\..\..\AppData\Local\Temp\ipykernel_10568\2094299100.py", line 3:[0m
[1m<source missing, REPL/exec in use?>[0m
[0m 

This error may have been caused by the following argument(s):
- argument 0: [1mCannot determine Numba type of <class 'pandas.core.frame.DataFrame'>[0m
