In [60]:
# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# read csv file
csvPath = 'ADAUSDT_M15.csv'
df = pd.read_csv(csvPath)
df.head()

columns = df.columns.values
# high - low
if not 'Size' in columns:
    df['Size'] = df['High'] - df['Low']
# shift back
if not 'Previous_Close' in columns:
    df['Previous_Close'] = df['Close'].shift(-1)
# size * volume
if not 'VS_multipy' in columns:
    df['VS_multipy'] = df['Size'] * df['Volume']
    df = df[1:-1]
# true range
if not 'True_ranage' in columns:
    df['True_ranage'] = np.abs(df['Close'] - df['Open'])

# cacluate atr 18
if not 'atr_14' in columns:
    high_low = df['High'] - df['Low']
    high_cp = np.abs(df['High'] - df['Close'].shift())
    low_cp = np.abs(df['Low'] - df['Close'].shift())
    df_ = pd.concat([high_low, high_cp, low_cp], axis=1)
    true_range = np.max(df_, axis=1)
    average_true_range = true_range.rolling(14).mean()
    df['atr_14'] = average_true_range.tolist()
    df = df[14:]

# calculate atr 55
if not 'atr_55' in columns:
    high_low = df['High'] - df['Low']
    high_cp = np.abs(df['High'] - df['Close'].shift())
    low_cp = np.abs(df['Low'] - df['Close'].shift())
    df_ = pd.concat([high_low, high_cp, low_cp], axis=1)
    true_range = np.max(df_, axis=1)
    average_true_range = true_range.rolling(55).mean()
    df['atr_55'] = average_true_range.tolist()
    df = df[55:]

# candle colors
if not 'status' in columns:
    conditions = [(df['Close'] > df['Open']), (df['Close'] < df['Open']), (df['Close'] == df['Open'])]
    choices = [1, -1, 0] # up, down, zero
    df['status'] = np.select(condlist=conditions, choicelist=choices, default=0)
# candle types
if not 'candle_type' in columns:
    condistions = [
        (df['True_ranage'] < 0.8 * df['atr_14']), 
        ((df['True_ranage'] > 0.8 * df['atr_14']) & (df['True_ranage'] < 1.2 * df['atr_14'])), 
        ((df['True_ranage'] > 1.2 * df['atr_14']))]
    choices = [0, 1, 2] # 0 --> spining candles, 1 --> standard candles, 2 --> spike candles
    df['candle_type'] = np.select(condlist=condistions, choicelist=choices, default=1)

# atr_55 * previous close
if not 'AP_multipy' in columns:
    df['AP_multipy'] = df['atr_55'] * df['Previous_Close']
# atr_55 * Size
if not 'AS_multipy' in columns:
    df['AS_multipy'] = df['atr_55'] * df['Size']
# atr_55 * Volume
if not 'AV_multipy' in columns:
    df['AV_multipy'] = df['atr_55'] * df['Volume']
if not 'typeSize' in columns:
    df['typeSize'] = df[(df.True_ranage < 0.8 * df.atr_14)]['atr_14'] * 0.8
    df['typeSize'] = df[(df.True_ranage > 0.8 * df.atr_14) & df.True_ranage < 1.2 * df.atr_14]['atr_14']
    df['typeSize'] = df[(df.True_ranage > 1.2 * df.atr_14)]['atr_14'] * 1.2
# write to csv
df.to_csv(csvPath)
df = pd.read_csv(csvPath)

In [67]:
# train all date base
def modeling(dFrame: pd.DataFrame, candleType: str):
    dfTrain = dFrame.sample(frac=0.8, random_state=0)
    dfTest = dFrame.drop(dfTrain.index)
    reg = LinearRegression()

    xTrain = dfTrain[['Size', 'Volume', 'VS_multipy', 'AP_multipy', 'AS_multipy']].values
    yTrain = dfTrain['Previous_Close'].values
    xTest = dfTest[['Size', 'Volume', 'VS_multipy', 'AP_multipy', 'AS_multipy']].values
    yTest = dfTest['Previous_Close'].values
    reg.fit(xTrain, yTrain)
    yPredict = reg.predict(xTest)
    n = r2_score(yTest, yPredict)
    print('{}: {}'.format(candleType, n))

def spinig():
    global df
    df_ = df[(df.candle_type == 0)]
    modeling(df_, "Spining")

def standard():
    global df
    df_ = df[(df.candle_type == 1)]
    modeling(df_, 'Standard')

def spike():
    global df
    df_ = df[(df.candle_type == 2)]
    modeling(df_, 'Spike')

def standard_spining():
    global df
    df_ = df[(df.candle_type == 0) | (df.candle_type == 1)]
    modeling(df_, 'Standard & Spining')

spike()
standard()
spinig()
standard_spining()


Spike: 0.7583036751483617
Standard: 0.8535499977911221
Spining: 0.8402513029000225
Standard & Spining: 0.8392421716794853


In [65]:
df.drop('Unnamed: 0', axis=1)
df.drop('Unnamed: 0.6', axis=1)
df.drop('Unnamed: 0.5', axis=1)
df.drop('Unnamed: 0.4', axis=1)

Unnamed: 0.4,Unnamed: 0.6,Unnamed: 0.5,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Time,Open,High,Low,...,VS_multipy,atr_55,AP_multipy,AS_multipy,AV_multipy,True_ranage,atr_14,status,candle_type,typeSize
0,0,0,14,70,126,182,2018-04-19 00:30:00,0.267,0.270,0.267,...,1730.928,0.002473,0.000675,0.000007,1426.704291,0.003,0.003000,1,1,
1,1,1,15,71,127,183,2018-04-19 00:45:00,0.270,0.275,0.270,...,7011.265,0.002545,0.000690,0.000013,3569.371273,0.003,0.003071,1,1,
2,2,2,16,72,128,184,2018-04-19 01:00:00,0.273,0.273,0.266,...,5998.321,0.002636,0.000714,0.000018,2259.107909,0.002,0.003286,-1,0,
3,3,3,17,73,129,185,2018-04-19 01:15:00,0.271,0.272,0.270,...,1038.302,0.002636,0.000714,0.000005,1368.670818,0.000,0.003357,0,0,
4,4,4,18,74,130,186,2018-04-19 01:30:00,0.271,0.273,0.270,...,1411.998,0.002636,0.000722,0.000008,1240.846727,0.000,0.003500,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145892,145892,145892,145906,145962,146018,146074,2022-06-20 17:00:00,0.482,0.485,0.481,...,15283.264,0.005709,0.002769,0.000023,21813.385891,0.000,0.006143,0,0,
145893,145893,145893,145907,145963,146019,146075,2022-06-20 17:15:00,0.482,0.487,0.481,...,14042.856,0.005764,0.002790,0.000035,13489.652582,0.003,0.006286,1,0,
145894,145894,145894,145908,145964,146020,146076,2022-06-20 17:30:00,0.485,0.486,0.483,...,3808.587,0.005673,0.002751,0.000017,7201.691782,0.001,0.005857,-1,0,
145895,145895,145895,145909,145965,146021,146077,2022-06-20 17:45:00,0.484,0.486,0.483,...,2987.718,0.005655,0.002731,0.000017,5631.395745,0.001,0.005500,1,0,
