In [92]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import bisect
from sklearn.preprocessing import *

# dist_measure
# 1 = Euclidean Distance
# 2 = Perpendicular Distance
# 3 = Vertical Distance
def find_pips(data: np.array, n_pips: int, dist_measure: int):
    pips_x = [0, len(data) - 1]  # Index
    pips_y = [data[0], data[-1]] # Price

    for curr_point in range(2, n_pips):
        md = 0.0 # Max distance
        md_i = -1 # Max distance index
        insert_index = -1
        # Use a single loop to iterate over all the points
        for i in range(1, len(data) - 1):
            left_adj = bisect.bisect_right(pips_x, i) - 1
            right_adj = left_adj + 1
            # Calculate the distance from the point to the line segment
            d = distance(data, pips_x, pips_y, i, left_adj, right_adj, dist_measure)
            # Update the maximum distance and the insert index
            if d > md:
                md = d
                md_i = i
                insert_index = right_adj
        # Insert the new pip
        pips_x.insert(insert_index, md_i)
        pips_y.insert(insert_index, data[md_i])
    return pips_x, pips_y

# Define a helper function to calculate the distance
def distance(data, pips_x, pips_y, i, left_adj, right_adj, dist_measure):
    time_diff = pips_x[right_adj] - pips_x[left_adj]
    price_diff = pips_y[right_adj] - pips_y[left_adj]
    slope = price_diff / time_diff
    intercept = pips_y[left_adj] - pips_x[left_adj] * slope
    dist_funcs = {
        1: lambda x, y: ( (pips_x[left_adj] - x) ** 2 + (pips_y[left_adj] - y) ** 2 ) ** 0.5 + ( (pips_x[right_adj] - x) ** 2 + (pips_y[right_adj] - y) ** 2 ) ** 0.5, # Euclidean distance
        2: lambda x, y: abs( (slope * x + intercept) - y ) / (slope ** 2 + 1) ** 0.5, # Perpendicular distance
        3: lambda x, y: abs( (slope * x + intercept) - y ) # Vertical distance
    }
    return dist_funcs[dist_measure](i, data[i])


data = pd.read_csv('BTCUSDT86400.csv')
data['date'] = data['date'].astype('datetime64[s]')
data = data.set_index('date')
n_close_points = 24
n_perc_points = 5
dist_measure = 1

pips_y_list = []
# loop through the data
# start from the 24th point
for index in range(n_close_points, len(data)):
    x = data['close'].iloc[index-n_close_points:index].to_numpy()
    pips_x, pips_y = find_pips(x, n_perc_points, dist_measure)
    scaled_pips_y = StandardScaler().fit_transform(np.array(pips_y).reshape(-1, 1)).reshape(-1)
    pips_y_list.append(scaled_pips_y)
    # print(scaled_pips_y)
    # pd.Series(x).plot()
    # for i in range(n_perc_points):
    #     plt.plot(pips_x[i], pips_y[i], marker='o', color='red')
    # plt.plot(pips_x, pips_y, color='black')
    # plt.show()
pips_y_df = pd.DataFrame(pips_y_list, columns=[f'pip_{i}' for i in range(n_perc_points)])
pips_y_df

Unnamed: 0,pip_0,pip_1,pip_2,pip_3,pip_4
0,0.694346,1.639584,-0.795781,-0.851023,-0.687125
1,0.763920,1.596970,-0.793297,-0.847516,-0.720076
2,0.785900,1.574462,-0.845765,-0.900664,-0.613933
3,1.194685,1.232608,-0.906160,-0.954674,-0.566459
4,1.815756,0.347616,-0.750384,-0.808592,-0.604396
...,...,...,...,...,...
1796,-0.108050,1.854137,-0.649028,-1.062093,-0.034966
1797,0.347807,1.787796,-0.629970,-1.028942,-0.476690
1798,0.179280,1.868943,-0.495595,-0.885785,-0.666843
1799,0.345572,1.800409,-0.585031,-0.978670,-0.582281
