In [1]:
################# IMPORTS AND SETUP ##############################


from binance import Client
import pandas as pd
from supporting_functionsM2 import *
from API_KEYS2 import get_keys

# Setting up binance client
KEY, SECRET = get_keys()
client = Client(KEY, SECRET)

# Defining setup variables for creating dataset
start = "1514761200" # 01.01.2018
periods = 3 # approx 8 years
TICKER = "LTCUSDT"
options = ["high", "low", "close", "volume"] # BASE OPTIONS
EPOCHS=5
THRESHOLD=0.5 # Threshold value for lableling, ie. BTCUSDT:close = 0.5 > 0.0 => label = 1, if BTCUSDT:close = -0.5 < 0.0 => 0
PREDICTOR="LTCUSDT:high"
TIME = "LTCUSDT:time"

In [2]:
################# DOWNLOADING RAW DATA ##########################################

periods = periods
LIMIT = 720 # 720h = 30 days
start1 = start
end1 = next_30_days_unix_timestamp(start1)

############################################ DOWNLOADING DATA ###################################################################### 
data = pd.DataFrame(columns=["time", "open", "high", "low", "close", "volume"])

# downloading the first set of candlestick lines
klines = client.get_historical_klines(TICKER, client.KLINE_INTERVAL_1HOUR, limit=LIMIT, start_str=unix_to_datetime_string(start1, in_milliseconds=False), end_str=unix_to_datetime_string(end1, in_milliseconds=False))
    # print(klines)

# Converting data from list to pandas dataframe
new_data = pd.DataFrame(data=[row[0:6] for row in klines], columns=["time", "open", "high", "low", "close", "volume"])
data = pd.concat([data, new_data], ignore_index=True)

for i in range(periods - 1):
    # Moving the start and end interval to next day
    start1 = next_30_days_unix_timestamp(start1)
    end1 = next_30_days_unix_timestamp(start1) 

    # downloading candlestick lines
    klines = client.get_historical_klines(TICKER, client.KLINE_INTERVAL_1HOUR, limit=LIMIT, start_str=unix_to_datetime_string(start1, in_milliseconds=False), end_str=unix_to_datetime_string(end1, in_milliseconds=False))
    # print(klines)

    # Converting data from list to pandas dataframe
    new_data = pd.DataFrame(data=[row[0:6] for row in klines], columns=["time", "open", "high", "low", "close", "volume"])

    # concatinating the new data with the existing data
    data = pd.concat([data, new_data], ignore_index=True)

# converting all time values from unix to readable string, not important, just for visual purposes and fact checking
data["time"] = data["time"].apply(unix_to_datetime_string) #converting time from 

raw_data = data
display(raw_data)


Unnamed: 0,time,open,high,low,close,volume
0,2017-12-31 23:00:00,226.44000000,227.22000000,222.53000000,223.69000000,1157.31361000
1,2018-01-01 00:00:00,223.69000000,223.69000000,217.25000000,219.98000000,978.11778000
2,2018-01-01 01:00:00,219.25000000,220.50000000,212.74000000,212.75000000,1165.80333000
3,2018-01-01 02:00:00,212.75000000,216.99000000,212.75000000,216.32000000,736.41344000
4,2018-01-01 03:00:00,215.71000000,220.99000000,215.20000000,218.12000000,1005.10601000
...,...,...,...,...,...,...
2123,2018-03-31 18:00:00,120.39000000,120.90000000,118.99000000,119.24000000,2903.20039000
2124,2018-03-31 19:00:00,119.24000000,120.14000000,118.70000000,119.37000000,3769.47556000
2125,2018-03-31 20:00:00,119.37000000,119.55000000,116.26000000,117.00000000,8656.94043000
2126,2018-03-31 21:00:00,117.14000000,117.34000000,116.03000000,116.54000000,4864.31633000


In [3]:
########################################## PREPROCESSING DATA ####################################################################

# New dataobject for storing processed data
# processed_data = {f"{TICKER}:time": [], f"{TICKER}:open": [], f"{TICKER}:high": [], f"{TICKER}:low": [], f"{TICKER}:close": [], f"{TICKER}:volume": []}
processed_data = {f"{TICKER}:time": [], f"{TICKER}:high": [], f"{TICKER}:low": [], f"{TICKER}:close": [], f"{TICKER}:volume": []}

for i, o in enumerate(raw_data["open"]): #o == open, the open price value of the candle stick
    if i == 0: #Skipping the first hour to calculate the percent diff using this hour
        continue

    if o == 0:
        continue

    processed_data[f"{TICKER}:time"].append(raw_data["time"][i]) #time is the same
    # processed_data[f"{TICKER}:open"].append(percent_difference(float(data["open"][i-1]), float(o))) # percent difference between the opening price of the prior candlestick vs. open of current candle
    processed_data[f"{TICKER}:high"].append(percent_difference(float(o), float(raw_data["high"][i]))) # percent diff between open and high
    processed_data[f"{TICKER}:low"].append(percent_difference(float(o), float(raw_data["low"][i]))) # percent diff between open and low
    processed_data[f"{TICKER}:close"].append(percent_difference(float(o), float(raw_data["close"][i]))) # percent diff between open and close
    processed_data[f"{TICKER}:volume"].append(percent_difference(float(raw_data["volume"][i-1]), float(raw_data["volume"][i]))) # percent difference between the colume of the prior candlestick vs. open of current candle


# processed_data = pd.DataFrame(data=processed_data, columns=[f"{TICKER}:time", f"{TICKER}:open", f"{TICKER}:high", f"{TICKER}:low", f"{TICKER}:close", f"{TICKER}:volume"])
processed_data = pd.DataFrame(data=processed_data, columns=[f"{TICKER}:time", f"{TICKER}:high", f"{TICKER}:low", f"{TICKER}:close", f"{TICKER}:volume"])
display(processed_data)

Unnamed: 0,LTCUSDT:time,LTCUSDT:high,LTCUSDT:low,LTCUSDT:close,LTCUSDT:volume
0,2018-01-01 00:00:00,0.000000,-2.878984,-1.658545,-15.483775
1,2018-01-01 01:00:00,0.570125,-2.969213,-2.964652,19.188441
2,2018-01-01 02:00:00,1.992949,0.000000,1.678026,-36.832104
3,2018-01-01 03:00:00,2.447731,-0.236429,1.117241,36.486647
4,2018-01-01 04:00:00,2.911242,0.000000,2.672841,-13.130611
...,...,...,...,...,...
2122,2018-03-31 18:00:00,0.423623,-1.162887,-0.955229,-40.748274
2123,2018-03-31 19:00:00,0.754780,-0.452868,0.109024,29.838628
2124,2018-03-31 20:00:00,0.150792,-2.605345,-1.985423,129.659015
2125,2018-03-31 21:00:00,0.170736,-0.947584,-0.512208,-43.810214


In [4]:
########################### LABELING THE DATA ##################################


# column_labels = ["BTCUSDT:time"] # name of the columns for the return dataframe
column_labels = ["time"] # name of the columns for the return dataframe

# filling up the list with labels for the columns
for roundd in range(EPOCHS):
    for option in options:
        column_labels.append(f"{TICKER}:{option}{roundd}")

column_labels.append("Label")


# filling up list of data, row by row in the dataset
data = [] # this list stores all the rows filled with all the data
for i in range(len(processed_data[TIME]) - EPOCHS): #looping from the third element to the third last element, with stepsize 1, if epoch=3
    data_row = []

    data_row.append(processed_data[TIME][i + EPOCHS - 1])

    for t in range(EPOCHS):
        for option in options:
            data_row.append(processed_data[f"{TICKER}:{option}"][i + t])

    if processed_data[PREDICTOR][i + EPOCHS] > THRESHOLD: # here we use the threshold
        data_row.append(1)
    else:
        data_row.append(0)

    data.append(data_row)


labelled_data_frame = pd.DataFrame(data, columns=column_labels)
display(labelled_data_frame)

Unnamed: 0,time,LTCUSDT:high0,LTCUSDT:low0,LTCUSDT:close0,LTCUSDT:volume0,LTCUSDT:high1,LTCUSDT:low1,LTCUSDT:close1,LTCUSDT:volume1,LTCUSDT:high2,...,LTCUSDT:volume2,LTCUSDT:high3,LTCUSDT:low3,LTCUSDT:close3,LTCUSDT:volume3,LTCUSDT:high4,LTCUSDT:low4,LTCUSDT:close4,LTCUSDT:volume4,Label
0,2018-01-01 04:00:00,0.000000,-2.878984,-1.658545,-15.483775,0.570125,-2.969213,-2.964652,19.188441,1.992949,...,-36.832104,2.447731,-0.236429,1.117241,36.486647,2.911242,0.000000,2.672841,-13.130611,1
1,2018-01-01 05:00:00,0.570125,-2.969213,-2.964652,19.188441,1.992949,0.000000,1.678026,-36.832104,2.447731,...,36.486647,2.911242,0.000000,2.672841,-13.130611,0.723343,-0.549205,0.107162,-31.348340,1
2,2018-01-01 06:00:00,1.992949,0.000000,1.678026,-36.832104,2.447731,-0.236429,1.117241,36.486647,2.911242,...,-13.130611,0.723343,-0.549205,0.107162,-31.348340,2.952984,-0.388081,2.551521,58.440409,0
3,2018-01-01 07:00:00,2.447731,-0.236429,1.117241,36.486647,2.911242,0.000000,2.672841,-13.130611,0.723343,...,-31.348340,2.952984,-0.388081,2.551521,58.440409,0.039146,-2.222609,-2.096473,-29.899582,1
4,2018-01-01 08:00:00,2.911242,0.000000,2.672841,-13.130611,0.723343,-0.549205,0.107162,-31.348340,2.952984,...,58.440409,0.039146,-2.222609,-2.096473,-29.899582,0.830631,-0.946120,-0.279838,51.699673,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2117,2018-03-31 17:00:00,0.008226,-1.842560,-1.743851,-36.189370,0.970224,-0.953496,0.869856,6.845180,0.041459,...,-17.404746,1.190278,-0.166472,0.815715,12.208238,0.346764,-1.147622,-0.470608,25.043170,0
2118,2018-03-31 18:00:00,0.970224,-0.953496,0.869856,6.845180,0.041459,-1.036484,-0.348259,-17.404746,1.190278,...,12.208238,0.346764,-1.147622,-0.470608,25.043170,0.423623,-1.162887,-0.955229,-40.748274,1
2119,2018-03-31 19:00:00,0.041459,-1.036484,-0.348259,-17.404746,1.190278,-0.166472,0.815715,12.208238,0.346764,...,25.043170,0.423623,-1.162887,-0.955229,-40.748274,0.754780,-0.452868,0.109024,29.838628,0
2120,2018-03-31 20:00:00,1.190278,-0.166472,0.815715,12.208238,0.346764,-1.147622,-0.470608,25.043170,0.423623,...,-40.748274,0.754780,-0.452868,0.109024,29.838628,0.150792,-2.605345,-1.985423,129.659015,0


# Model Training

This code block divides the dataset into a training dataset and trains a new model using the **AutoGluon Tabular** predictor.

In [5]:
################################ TRAINING NEW MODEL #######################################
from autogluon.tabular import TabularDataset, TabularPredictor

# defining training size and colums to use for training within the labelled dataset
TRAINING_SIZE = 1000
columns_to_use = ["LTCUSDT:high0", "LTCUSDT:low0", "LTCUSDT:close0", "LTCUSDT:volume0", "LTCUSDT:high1", "LTCUSDT:low1", "LTCUSDT:close1", "LTCUSDT:volume1", "LTCUSDT:high2", "LTCUSDT:low2", "LTCUSDT:close2", "LTCUSDT:volume2", "LTCUSDT:high3", "LTCUSDT:low3", "LTCUSDT:close3", "LTCUSDT:volume3", "LTCUSDT:high4", "LTCUSDT:low4", "LTCUSDT:close4", "LTCUSDT:volume4", "Label"]
LABEL = "Label"

# defining training data
training_dataframe = labelled_data_frame.iloc[:TRAINING_SIZE]
train_data_frame2 = training_dataframe[columns_to_use]
train_tabular_dataset = TabularDataset(train_data_frame2)

# Training model -> TabularPredictor
# predictor = TabularPredictor(label=label, eval_metric="balanced_accuracy", positive_class=1).fit(train_tabular_dataset, num_bag_folds=5, num_bag_sets=5, num_stack_levels=3)
# predictor = TabularPredictor(label=label, eval_metric="accuracy").fit(train_tabular_dataset, presets="high_quality")
predictor = TabularPredictor(label=LABEL).fit(train_tabular_dataset)



  from .autonotebook import tqdm as notebook_tqdm
No path specified. Models will be saved in: "AutogluonModels\ag-20250211_213725"
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.1.1
Python Version:     3.10.11
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          12
Memory Avail:       8.49 GB / 15.92 GB (53.3%)
Disk Space Avail:   172.04 GB / 475.69 GB (36.2%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets.
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='best_quality'   : Maximize accuracy. Default time_limit=3600.
	presets='high_quality'   : Strong accuracy with fast inference speed. Default time_limit=3600.
	presets='good_quality'   : Good accuracy with very fast inference speed. Default time_limit=3600.
	presets='medium_quality' : Fast training time, ideal for initial prot

# Model Evaluation

This code block divides the dataset into a validation dataset and evaluates the model using the **AutoGluons** inbuilt evaluation library. In addition the model is backtested using the validation set to measure its performance and calulate its "expected return" over the period.

In [6]:
#################### MODEL EVALUATION ################################

# Defining the testing set using the training size and columns to use
testing_dataframe = labelled_data_frame.iloc[TRAINING_SIZE:]
test_data_frame2 = testing_dataframe[columns_to_use]
test_tabular_dataset = TabularDataset(test_data_frame2)

######## Making predictions
y_pred = predictor.predict(test_tabular_dataset.drop(columns=[LABEL]))
display(y_pred)


#### Evaluation
eval_report = predictor.evaluate(test_tabular_dataset, detailed_report=True)
display(eval_report)

feature_importance = predictor.feature_importance(test_tabular_dataset)
display(feature_importance)


1000    1
1001    1
1002    1
1003    1
1004    1
       ..
2117    1
2118    1
2119    0
2120    1
2121    1
Name: Label, Length: 1122, dtype: int64

{'accuracy': 0.6381461675579323,
 'balanced_accuracy': 0.5511870368044216,
 'mcc': 0.1412537632321912,
 'roc_auc': 0.6160539386264798,
 'f1': 0.7560096153846154,
 'precision': 0.6545265348595213,
 'recall': 0.8947368421052632,
 'confusion_matrix':     0    1
 0  87  332
 1  74  629,
 'classification_report': {'0': {'precision': 0.5403726708074534,
   'recall': 0.20763723150357996,
   'f1-score': 0.3,
   'support': 419.0},
  '1': {'precision': 0.6545265348595213,
   'recall': 0.8947368421052632,
   'f1-score': 0.7560096153846154,
   'support': 703.0},
  'accuracy': 0.6381461675579323,
  'macro avg': {'precision': 0.5974496028334874,
   'recall': 0.5511870368044216,
   'f1-score': 0.5280048076923077,
   'support': 1122.0},
  'weighted avg': {'precision': 0.6118968833106653,
   'recall': 0.6381461675579323,
   'f1-score': 0.5857172545591662,
   'support': 1122.0}}}

Computing feature importance via permutation shuffling for 20 features using 1122 rows with 5 shuffle sets...
	2.74s	= Expected runtime (0.55s per shuffle set)
	1.31s	= Actual runtime (Completed 5 of 5 shuffle sets)


Unnamed: 0,importance,stddev,p_value,n,p99_high,p99_low
LTCUSDT:high4,0.012478,0.004841,0.002248,5,0.022445,0.00251
LTCUSDT:close3,0.011586,0.005042,0.003399,5,0.021968,0.001205
LTCUSDT:close4,0.010517,0.003696,0.001564,5,0.018128,0.002906
LTCUSDT:low3,0.010339,0.004113,0.002464,5,0.018808,0.001869
LTCUSDT:high1,0.009091,0.002915,0.001112,5,0.015094,0.003088
LTCUSDT:high0,0.008556,0.004571,0.006927,5,0.017967,-0.000855
LTCUSDT:close1,0.0082,0.008488,0.04844,5,0.025677,-0.009277
LTCUSDT:volume2,0.007308,0.005721,0.023044,5,0.019088,-0.004471
LTCUSDT:volume0,0.006952,0.007224,0.048893,5,0.021827,-0.007923
LTCUSDT:low4,0.006952,0.001827,0.000523,5,0.010713,0.003191


In [19]:
##################### PROBABILITY ANALYSIS AND EXPECTED RETURNS
################# Probability analysis ###########################
import statistics as st

# Minimum required confidence for executing a trade
PROB_THRESH = 0.55

y_prob = predictor.predict_proba(test_tabular_dataset.drop(columns=["Label"]))
# display(y_prob)

counter = 0
correct = 0
predicted_high_list = []
predicted_low_list = []
predicted_close_list = []

balance = 100

hour_count = 0
month_gain = []
month = 100
LEVERAGE = 10

print("------------------- TRADES: ------------------------------")
for index, pred in enumerate(y_pred):
    try:
        index = index + TRAINING_SIZE
        prob = y_prob[1][index]
        actual = test_data_frame2["Label"][index]

        true_high = test_data_frame2["LTCUSDT:high4"][index + 1]
        true_low = test_data_frame2["LTCUSDT:low4"][index + 1]
        true_close = test_data_frame2["LTCUSDT:close4"][index + 1]
        

        if prob > PROB_THRESH:
            counter += 1

            if pred == 1 and actual == 1:
                correct += 1
                predicted_high_list.append(true_high)
                predicted_low_list.append(true_low)
                predicted_close_list.append(true_close)

            ### LOGIC FOR CALCULATING GAIN ###
            if true_high >= 0.5:
                balance *= 1 + (0.005 * LEVERAGE)
                month *= 1 + (0.005 * LEVERAGE)
                print(f"{index}. Gain +5%")
            else:
                balance *= 1 + ((true_close / 100) * LEVERAGE)
                month *= 1 + ((true_close / 100) * LEVERAGE)
                #print(f"{index}. Close + {true_close} ---> High: {true_high}, Low: {true_low}, Close: {true_close}")
                print(f"{index}. Close +{true_close * LEVERAGE}")
    
        hour_count += 1
        if hour_count == 730:
            hour_count = 0
            month_gain.append(round(month - 100, 3))
            month = 100

    except Exception as e:
        print(f"Got following error: {e}")


month_gain.append(month - 100)

        

print("------------------------------------------")
print(f"Correct: {correct}")
print(f"Counter: {counter}")
print(f"Winrate: {correct / counter}")

print(f"AVG High: {st.mean(predicted_high_list)}")
print(f"AVG Low: {st.mean(predicted_low_list)}")
print(f"AVG Close: {st.mean(predicted_close_list)}")
print()
print(f"Balance: {balance}")
print(f"Return: {balance - 100}%")
print()
print(f"Month List: {month_gain}")
print(f"Mean month gain: {st.mean(month_gain)}")

------------------- TRADES: ------------------------------
1020. Gain +5%
1023. Gain +5%
1025. Close +-5.730189914865812
1030. Gain +5%
1032. Gain +5%
1034. Close +-21.378460371634525
1036. Gain +5%
1038. Gain +5%
1039. Close +-6.181417413388241
1041. Close +-9.612243945686167
1043. Gain +5%
1047. Gain +5%
1048. Gain +5%
1053. Gain +5%
1054. Gain +5%
1055. Close +-28.353277370959074
1057. Close +-16.545274705512
1064. Gain +5%
1065. Gain +5%
1067. Close +-11.822037722227257
1070. Gain +5%
1079. Gain +5%
1085. Gain +5%
1120. Gain +5%
1121. Gain +5%
1123. Gain +5%
1125. Close +-5.556061572092171
1134. Gain +5%
1137. Close +-5.6313119560664955
1144. Close +-0.18601190476186774
1145. Gain +5%
1150. Gain +5%
1167. Gain +5%
1185. Gain +5%
1186. Close +-19.693654266958426
1188. Gain +5%
1190. Close +-12.764223962340981
1192. Gain +5%
1193. Close +-0.17841213202494222
1197. Gain +5%
1200. Gain +5%
1201. Gain +5%
1206. Gain +5%
1221. Gain +5%
1222. Gain +5%
1224. Gain +5%
1226. Gain +5%
1230. C