In [327]:
import warnings
warnings.filterwarnings('ignore')

import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from random import shuffle
from random import choice

In [328]:
tickers = ["BTC-USD"]
shuffle(tickers)
ticker = tickers[0]
print(ticker)

BTC-USD


In [329]:
intervals = choice(["1m", "5m", "15m", "30m", "1h", "1d"])
peridods = ["8d", "60d"]
data_format = {
    "1m":"8d",
    "2m": "60d",
    "5m": "60d",
    "15m": "60d",
    "30m": "60d",
    "1h": "730d",
    "90m": "60d",
    "1d": "5y"
    }
interval, period = choice(list(data_format.items()))

data = yf.download(ticker, period=period, interval=interval)
data.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-12-26 00:00:00+00:00,99430.890625,99430.890625,99430.890625,99430.890625,99430.890625,0
2024-12-26 00:02:00+00:00,99488.429688,99488.429688,99475.046875,99475.046875,99475.046875,21307392
2024-12-26 00:04:00+00:00,99685.429688,99685.429688,99682.945312,99682.945312,99682.945312,116674560
2024-12-26 00:06:00+00:00,99764.671875,99764.671875,99764.671875,99764.671875,99764.671875,17240064
2024-12-26 00:08:00+00:00,99789.242188,99789.242188,99786.71875,99786.71875,99786.71875,135516160


In [330]:
data = data.fillna(method='ffill').dropna()

In [331]:
input_length = [5,10,15,20,30,50,100,150,200]
target_length = [1,3,5,10,20,30]
k = choice([1,3,5,10,15,20,30,50,100])

In [332]:
# Add a 'Target' column for prediction
input_length = choice(input_length)  # Period for input
target_length = choice(target_length)  # Period for target
data['Target'] = data['Close'].shift(-target_length)
data = data.dropna()
print(data.head(5))
print(data.shape)

                                   Open          High           Low  \
Datetime                                                              
2024-12-26 00:00:00+00:00  99430.890625  99430.890625  99430.890625   
2024-12-26 00:02:00+00:00  99488.429688  99488.429688  99475.046875   
2024-12-26 00:04:00+00:00  99685.429688  99685.429688  99682.945312   
2024-12-26 00:06:00+00:00  99764.671875  99764.671875  99764.671875   
2024-12-26 00:08:00+00:00  99789.242188  99789.242188  99786.718750   

                                  Close     Adj Close     Volume        Target  
Datetime                                                                        
2024-12-26 00:00:00+00:00  99430.890625  99430.890625          0  99735.929688  
2024-12-26 00:02:00+00:00  99475.046875  99475.046875   21307392  99745.890625  
2024-12-26 00:04:00+00:00  99682.945312  99682.945312  116674560  99713.875000  
2024-12-26 00:06:00+00:00  99764.671875  99764.671875   17240064  99641.726562  
2024-12-26 00:08

In [333]:
def calculate_adx_di(df, len_param=14):
    """
    Function to add ADX and DIPlus, DIMinus columns to the provided DataFrame.

    Parameters:
    - df: DataFrame containing stock data.
    - len_param: int, the period to use for calculating Smoothed True Range and ADX.

    Returns:
    - df: Updated DataFrame.
    """
    # Calculate True Range
    df['TrueRange'] = df.apply(lambda row: max(row['High'] - row['Low'], abs(row['High'] - df['Close'].shift(1)[row.name]), abs(row['Low'] - df['Close'].shift(1)[row.name])), axis=1)

    # Calculate Directional Movement
    df['DirectionalMovementPlus'] = df.apply(lambda row: max(row['High'] - df['High'].shift(1)[row.name], 0) if row['High'] - df['High'].shift(1)[row.name] > df['Low'].shift(1)[row.name] - row['Low'] else 0, axis=1)
    df['DirectionalMovementMinus'] = df.apply(lambda row: max(df['Low'].shift(1)[row.name] - row['Low'], 0) if df['Low'].shift(1)[row.name] - row['Low'] > row['High'] - df['High'].shift(1)[row.name] else 0, axis=1)

    # Initialize Smoothed True Range and Directional Movements
    df['SmoothedTrueRange'] = 0.0
    df['SmoothedDirectionalMovementPlus'] = 0.0
    df['SmoothedDirectionalMovementMinus'] = 0.0

    # Calculate Smoothed True Range and Directional Movements
    for i in range(1, len(df)):
        df.at[df.index[i], 'SmoothedTrueRange'] = df.at[df.index[i-1], 'SmoothedTrueRange'] - (df.at[df.index[i-1], 'SmoothedTrueRange'] / len_param) + df.at[df.index[i], 'TrueRange']
        df.at[df.index[i], 'SmoothedDirectionalMovementPlus'] = df.at[df.index[i-1], 'SmoothedDirectionalMovementPlus'] - (df.at[df.index[i-1], 'SmoothedDirectionalMovementPlus'] / len_param) + df.at[df.index[i], 'DirectionalMovementPlus']
        df.at[df.index[i], 'SmoothedDirectionalMovementMinus'] = df.at[df.index[i-1], 'SmoothedDirectionalMovementMinus'] - (df.at[df.index[i-1], 'SmoothedDirectionalMovementMinus'] / len_param) + df.at[df.index[i], 'DirectionalMovementMinus']

    # Calculate DIPlus, DIMinus, and ADX
    df['DIPlus'] = df['SmoothedDirectionalMovementPlus'] / df['SmoothedTrueRange'] * 100
    df['DIMinus'] = df['SmoothedDirectionalMovementMinus'] / df['SmoothedTrueRange'] * 100
    df['DX'] = abs(df['DIPlus'] - df['DIMinus']) / (df['DIPlus'] + df['DIMinus']) * 100
    df['ADX'] = df['DX'].rolling(window=len_param).mean()

    # Drop unnecessary columns
    df = df.drop(['TrueRange', 'DirectionalMovementPlus', 'DirectionalMovementMinus', 'SmoothedTrueRange', 'SmoothedDirectionalMovementPlus', 'SmoothedDirectionalMovementMinus', 'DX'], axis=1)

    # Clean NaN values
    df = df.dropna()

    return df

In [334]:
# Apply the ADX and DI calculation function
data_with_features = calculate_adx_di(data)
print(data_with_features.head())

# # Normalize the 'Target' by dividing it by the closing price
data_with_features["Target"] = data_with_features["Target"] / data_with_features["Close"]



# Hangi sütunlarda işlem yapılacağı
columns_to_pct_change = ["Open", "High", "Low", "Close", 'DIPlus', 'DIMinus', 'ADX']
df_pct_change = data_with_features[columns_to_pct_change].pct_change() * 100# Yüzdeye çevirmek için 100 ile çarpılır.

# Yüzde değişim hesaplama sadece belirli sütunlarda
data_with_features = pd.concat([df_pct_change.add_suffix("_pct_change"), data_with_features["Target"]], axis=1)
print(data_with_features.head())
data_with_features = data_with_features.dropna()

# Select input features and target
features = ['Open_pct_change', 'High_pct_change', 'Low_pct_change', 'Close_pct_change', 'DIPlus_pct_change', 'DIMinus_pct_change', 'ADX_pct_change']
target = 'Target'

                                   Open          High           Low  \
Datetime                                                              
2024-12-26 00:28:00+00:00  99377.109375  99377.109375  99353.914062   
2024-12-26 00:30:00+00:00  99230.523438  99230.523438  99230.523438   
2024-12-26 00:32:00+00:00  99205.500000  99205.500000  99164.351562   
2024-12-26 00:34:00+00:00  99225.101562  99225.101562  99206.570312   
2024-12-26 00:36:00+00:00  99153.359375  99153.359375  99140.312500   

                                  Close     Adj Close    Volume        Target  \
Datetime                                                                        
2024-12-26 00:28:00+00:00  99353.914062  99353.914062  45821952  99044.070312   
2024-12-26 00:30:00+00:00  99230.523438  99230.523438         0  99063.562500   
2024-12-26 00:32:00+00:00  99164.351562  99164.351562  47640576  99077.421875   
2024-12-26 00:34:00+00:00  99206.570312  99206.570312  56594432  99147.750000   
2024-12-26 00:36

In [335]:
data_with_features

Unnamed: 0_level_0,Open_pct_change,High_pct_change,Low_pct_change,Close_pct_change,DIPlus_pct_change,DIMinus_pct_change,ADX_pct_change,Target
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-12-26 00:30:00+00:00,-0.147505,-0.147505,-0.124193,-0.124193,-13.356215,9.737941,-9.021133,0.998317
2024-12-26 00:32:00+00:00,-0.025217,-0.025217,-0.066685,-0.066685,-7.161247,4.122427,-9.173742,0.999123
2024-12-26 00:34:00+00:00,0.019759,0.019759,0.042575,0.042575,0.588154,-6.612063,-10.625442,0.999407
2024-12-26 00:36:00+00:00,-0.072302,-0.072302,-0.066788,-0.066788,-7.206587,4.471090,-10.958695,1.000842
2024-12-26 00:38:00+00:00,-0.065815,-0.065815,-0.097077,-0.097077,-10.131006,5.582868,-4.885004,1.000350
...,...,...,...,...,...,...,...,...
2025-01-25 11:18:00+00:00,-0.010133,-0.010133,0.007667,0.007667,-1.901819,-1.901819,-9.500457,0.999710
2025-01-25 11:20:00+00:00,-0.018900,-0.018900,-0.030874,-0.030874,-7.619882,8.681993,-3.085322,1.000054
2025-01-25 11:22:00+00:00,-0.013518,-0.013518,-0.034064,-0.034064,-8.299687,8.038108,-0.677267,1.000571
2025-01-25 11:24:00+00:00,-0.037798,-0.037798,-0.005276,-0.005276,-1.365112,1.122159,-1.160745,1.000930


In [336]:
data_with_features.head()

Unnamed: 0_level_0,Open_pct_change,High_pct_change,Low_pct_change,Close_pct_change,DIPlus_pct_change,DIMinus_pct_change,ADX_pct_change,Target
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-12-26 00:30:00+00:00,-0.147505,-0.147505,-0.124193,-0.124193,-13.356215,9.737941,-9.021133,0.998317
2024-12-26 00:32:00+00:00,-0.025217,-0.025217,-0.066685,-0.066685,-7.161247,4.122427,-9.173742,0.999123
2024-12-26 00:34:00+00:00,0.019759,0.019759,0.042575,0.042575,0.588154,-6.612063,-10.625442,0.999407
2024-12-26 00:36:00+00:00,-0.072302,-0.072302,-0.066788,-0.066788,-7.206587,4.47109,-10.958695,1.000842
2024-12-26 00:38:00+00:00,-0.065815,-0.065815,-0.097077,-0.097077,-10.131006,5.582868,-4.885004,1.00035


In [337]:
tmp_df = data_with_features.copy()

tmp_df = tmp_df.dropna()

In [338]:
X = np.array([tmp_df[features].values[i:i+input_length].flatten() for i in range(len(tmp_df) - input_length)])
print(X)

[[-1.47504731e-01 -1.47504731e-01 -1.24193019e-01 ... -8.72092237e+00
   9.59874178e+00 -3.54375008e+00]
 [-2.52174801e-02 -2.52174801e-02 -6.66850004e-02 ... -2.20393782e-01
   6.12436265e-02 -7.26067940e+00]
 [ 1.97585441e-02  1.97585441e-02  4.25745233e-02 ...  4.34284961e+00
  -2.95468106e+00 -9.33780988e+00]
 ...
 [-2.25473944e-02 -2.25473944e-02  2.04414102e-03 ... -7.61988166e+00
   8.68199349e+00 -3.08532195e+00]
 [ 5.65309495e-03  1.34176770e-02  5.65309495e-03 ... -8.29968657e+00
   8.03810804e+00 -6.77267066e-01]
 [ 3.11613923e-02  3.32027980e-02  3.11613923e-02 ... -1.36511235e+00
   1.12215899e+00 -1.16074451e+00]]


In [339]:
data_with_features

Unnamed: 0_level_0,Open_pct_change,High_pct_change,Low_pct_change,Close_pct_change,DIPlus_pct_change,DIMinus_pct_change,ADX_pct_change,Target
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-12-26 00:30:00+00:00,-0.147505,-0.147505,-0.124193,-0.124193,-13.356215,9.737941,-9.021133,0.998317
2024-12-26 00:32:00+00:00,-0.025217,-0.025217,-0.066685,-0.066685,-7.161247,4.122427,-9.173742,0.999123
2024-12-26 00:34:00+00:00,0.019759,0.019759,0.042575,0.042575,0.588154,-6.612063,-10.625442,0.999407
2024-12-26 00:36:00+00:00,-0.072302,-0.072302,-0.066788,-0.066788,-7.206587,4.471090,-10.958695,1.000842
2024-12-26 00:38:00+00:00,-0.065815,-0.065815,-0.097077,-0.097077,-10.131006,5.582868,-4.885004,1.000350
...,...,...,...,...,...,...,...,...
2025-01-25 11:18:00+00:00,-0.010133,-0.010133,0.007667,0.007667,-1.901819,-1.901819,-9.500457,0.999710
2025-01-25 11:20:00+00:00,-0.018900,-0.018900,-0.030874,-0.030874,-7.619882,8.681993,-3.085322,1.000054
2025-01-25 11:22:00+00:00,-0.013518,-0.013518,-0.034064,-0.034064,-8.299687,8.038108,-0.677267,1.000571
2025-01-25 11:24:00+00:00,-0.037798,-0.037798,-0.005276,-0.005276,-1.365112,1.122159,-1.160745,1.000930


In [340]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error



tmp_df = data_with_features.copy()

tmp_df['Target'] = data_with_features['Target']#.shift(-target_length)
tmp_df["RoC"] = tmp_df['Target']# / tmp_df["Close"]

tmp_df = tmp_df.dropna()
# print(tmp_df[["Target", "Close", "RoC"]])

# print(len(tmp_df))

# Prepare input and target data
#X = np.array([tmp_df[features].values[i:i+input_length].flatten() for i in range(len(tmp_df) - input_length + 1)])
X = np.array([tmp_df[features].values[i:i+input_length].flatten() for i in range(len(tmp_df) - input_length)])
# Y = tmp_df["RoC"].values[input_length-1:]
Y = tmp_df["Target"].values[input_length:]
print(input_length, target_length)
print(X.shape)
print(Y.shape)
print("---")

    
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, shuffle=False)
# print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

# print(X_test, y_test)
        

# KNN Regressor tanımla ve eğit
knn = KNeighborsRegressor(n_neighbors=k)
knn.fit(X_train, y_train)

# Tahmin yap ve model performansını değerlendir
y_pred = knn.predict(X_test)

# print(y_pred)
# print(y_test)
mse = mean_squared_error(y_test, y_pred)
        
# Sonuçları kaydet

print(input_length, target_length, k, mse)
# print("RESULTS", results)

50 5
(21378, 350)
(21378,)
---
50 5 50 1.3564973461757254e-05


In [341]:
para = 100
karşı= 100
sistem = 100

for pred, gt in zip(y_pred, y_test):
    if pred > 1:
        para *= gt
    if pred < 1:
        karşı *= gt
    sistem *= gt

print(para)
print(karşı)
print(sistem)

77.18630284661161
127.76873142864096
98.61995998378549
